From bb0f24101565d34ea8b70fdec4dd3f3b35a70e7b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 19 Sep 2021 17:49:25 +0200 Subject: [PATCH] Compile table traversals: next(), pairs(), BC_ISNEXT/BC_ITERN. Sponsored by OpenResty Inc. --- src/jit/dump.lua | 17 +++--- src/lib_base.c | 2 +- src/lj_asm.c | 12 ++++- src/lj_asm_arm.h | 2 + src/lj_asm_arm64.h | 9 +++- src/lj_asm_mips.h | 2 + src/lj_asm_ppc.h | 5 +- src/lj_asm_x86.h | 13 ++++- src/lj_dispatch.c | 8 ++- src/lj_ffrecord.c | 34 ++++++++++++ src/lj_ir.h | 2 + src/lj_ircall.h | 2 + src/lj_jit.h | 6 ++- src/lj_opt_fold.c | 9 ++++ src/lj_opt_mem.c | 5 +- src/lj_record.c | 130 +++++++++++++++++++++++++++++++++++++++++++-- src/lj_record.h | 1 + src/lj_snap.c | 10 +++- src/lj_trace.c | 22 +++++--- src/lj_vm.h | 2 + src/vm_arm.dasc | 79 ++++++++++++++++++++++++++- src/vm_arm64.dasc | 79 ++++++++++++++++++++++++++- src/vm_mips.dasc | 97 +++++++++++++++++++++++++++++++-- src/vm_mips64.dasc | 92 ++++++++++++++++++++++++++++++-- src/vm_ppc.dasc | 9 +++- src/vm_x64.dasc | 80 +++++++++++++++++++++++++++- src/vm_x86.dasc | 99 +++++++++++++++++++++++++++++++++- 27 files changed, 781 insertions(+), 47 deletions(-) diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 5fb1e144db..9eda08c462 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -219,8 +219,10 @@ local function colorize_text(s) return s end -local function colorize_ansi(s, t) - return format(colortype_ansi[t], s) +local function colorize_ansi(s, t, extra) + local out = format(colortype_ansi[t], s) + if extra then out = "\027[3m"..out end + return out end local irtype_ansi = setmetatable({}, @@ -229,9 +231,10 @@ local irtype_ansi = setmetatable({}, local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } -local function colorize_html(s, t) +local function colorize_html(s, t, extra) s = gsub(s, "[<>&]", html_escape) - return format('%s', irtype_text[t], s) + return format('%s', + irtype_text[t], extra and " irt_extra" or "", s) end local irtype_html = setmetatable({}, @@ -256,6 +259,7 @@ span.irt_tab { color: #c00000; } span.irt_udt, span.irt_lud { color: #00c0c0; } span.irt_num { color: #4040c0; } span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } +span.irt_extra { font-style: italic; } ]] @@ -271,6 +275,7 @@ local litname = { if band(mode, 8) ~= 0 then s = s.."C" end if band(mode, 16) ~= 0 then s = s.."R" end if band(mode, 32) ~= 0 then s = s.."I" end + if band(mode, 64) ~= 0 then s = s.."K" end t[mode] = s return s end}), @@ -350,7 +355,7 @@ local function formatk(tr, idx, sn) else s = tostring(k) -- For primitives. end - s = colorize(format("%-4s", s), t) + s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) if slot then s = format("%s @%d", s, slot) end @@ -370,7 +375,7 @@ local function printsnap(tr, snap) out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else local m, ot, op1, op2 = traceir(tr, ref) - out:write(colorize(format("%04d", ref), band(ot, 31))) + out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) end out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME else diff --git a/src/lib_base.c b/src/lib_base.c index f16c66f506..55e3c6b8ad 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -76,7 +76,7 @@ LJLIB_ASM_(type) LJLIB_REC(.) /* This solves a circular dependency problem -- change FF_next_N as needed. */ LJ_STATIC_ASSERT((int)FF_next == FF_next_N); -LJLIB_ASM(next) +LJLIB_ASM(next) LJLIB_REC(.) { lj_lib_checktab(L, 1); lj_err_msg(L, LJ_ERR_NEXTIDX); diff --git a/src/lj_asm.c b/src/lj_asm.c index d377eb4d48..cc78840793 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2225,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as) as->modset |= RSET_SCRATCH; continue; } - case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { + case IR_CALLL: + /* lj_vm_next needs two TValues on the stack. */ +#if LJ_TARGET_X64 && LJ_ABI_WIN + if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) + as->evenspill = SPS_FIRST + 4; +#else + if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) + as->evenspill = 4; +#endif + /* fallthrough */ + case IR_CALLN: case IR_CALLA: case IR_CALLS: { const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; ir->prev = asm_setup_call_slots(as, ir, ci); if (inloop) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index e53f9b08a7..cc608c0d9c 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -2064,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); } diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 3cedd021e6..5decfff480 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1814,7 +1814,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; - if (irt_isnum(ir->t)) { + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : + ra_alloc1(as, ref, allow); + rset_clear(allow, r); + emit_lso(as, A64I_STRw, r, RID_BASE, ofs); + emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); + } else if (irt_isnum(ir->t)) { Reg src = ra_alloc1(as, ref, RSET_FPR); emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); } else { diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 7f7dc6a0ad..ba05f193e5 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -2568,6 +2568,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index f99561b31f..ac5d88ce84 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1103,7 +1103,8 @@ static void asm_sload(ASMState *as, IRIns *ir) lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), "inconsistent SLOAD variant"); lj_assertA(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), "bad SLOAD type"); #if LJ_SOFTFP lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), @@ -2096,6 +2097,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 48c31fe3ce..5eb183659e 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1700,7 +1700,8 @@ static void asm_sload(ASMState *as, IRIns *ir) lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), "inconsistent SLOAD variant"); lj_assertA(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), "bad SLOAD type"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); @@ -2727,7 +2728,15 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; - if (irt_isnum(ir->t)) { + if ((sn & SNAP_KEYINDEX)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); + if (irref_isk(ref)) { + emit_movmroi(as, RID_BASE, ofs, ir->i); + } else { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_movtomro(as, src, RID_BASE, ofs); + } + } else if (irt_isnum(ir->t)) { Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index bf8d8812cc..7b73d3dd8a 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) /* The JIT engine is off by default. luaopen_jit() turns it on. */ disp[BC_FORL] = disp[BC_IFORL]; disp[BC_ITERL] = disp[BC_IITERL]; + /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ + disp[BC_ITERN] = &lj_vm_IITERN; disp[BC_LOOP] = disp[BC_ILOOP]; disp[BC_FUNCF] = disp[BC_IFUNCF]; disp[BC_FUNCV] = disp[BC_IFUNCV]; @@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g) mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; if (oldmode != mode) { /* Mode changed? */ ASMFunction *disp = G2GG(g)->dispatch; - ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; + ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; g->dispatchmode = mode; /* Hotcount if JIT is on, but not while recording. */ if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); + f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); } else { /* Otherwise use the non-hotcounting instructions. */ f_forl = disp[GG_LEN_DDISP+BC_IFORL]; f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; + f_itern = &lj_vm_IITERN; f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); @@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g) /* Init static counting instruction dispatch first (may be copied below). */ disp[GG_LEN_DDISP+BC_FORL] = f_forl; disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; + disp[GG_LEN_DDISP+BC_ITERN] = f_itern; disp[GG_LEN_DDISP+BC_LOOP] = f_loop; /* Set dynamic instruction dispatch. */ @@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g) /* Otherwise set dynamic counting ins. */ disp[BC_FORL] = f_forl; disp[BC_ITERL] = f_iterl; + disp[BC_ITERN] = f_itern; disp[BC_LOOP] = f_loop; /* Set dynamic return dispatch. */ if ((mode & DISPMODE_RET)) { diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 24432d84bb..01e53fb670 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -521,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) recff_nyiu(J, rd); } +static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + recff_nyi(J, rd); +#else + TRef tab = J->base[0]; + if (tref_istab(tab)) { + RecordIndex ix; + cTValue *keyv; + ix.tab = tab; + if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ + ix.key = lj_ir_kint(J, 0); + keyv = niltvg(J2G(J)); + } else { + TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); + ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); + keyv = &rd->argv[1]; + } + copyTV(J->L, &ix.tabv, &rd->argv[0]); + ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); + /* Omit the value, if not used by the caller. */ + ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && + bc_b(frame_pc(J->L->base-1)[-1]) <= 2); + ix.mobj = 0; /* We don't need the next index. */ + rd->nres = lj_record_next(J, &ix); + J->base[0] = ix.key; + J->base[1] = ix.val; + } /* else: Interpreter will throw. */ +#endif +} + /* -- Math library fast functions ----------------------------------------- */ static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) diff --git a/src/lj_ir.h b/src/lj_ir.h index 6a16193387..2b127f6c04 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -236,6 +236,7 @@ IRFLDEF(FLENUM) #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ +#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ /* XLOAD mode bits, stored in op2. */ #define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ @@ -495,6 +496,7 @@ typedef uint32_t TRef; #define TREF_REFMASK 0x0000ffff #define TREF_FRAME 0x00010000 #define TREF_CONT 0x00020000 +#define TREF_KEYINDEX 0x00100000 #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) diff --git a/src/lj_ircall.h b/src/lj_ircall.h index c837b18d42..9e7013ba3c 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -187,6 +187,8 @@ typedef struct CCallInfo { _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ + _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ + _(ANY, lj_vm_next, 2, FL, PTR, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ diff --git a/src/lj_jit.h b/src/lj_jit.h index 34ddf90726..c9fe83191f 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -150,6 +150,7 @@ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ LJ_TRACE_ACTIVE = 0x10, LJ_TRACE_RECORD, /* Bytecode recording active. */ + LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ LJ_TRACE_START, /* New trace started. */ LJ_TRACE_END, /* End of trace. */ LJ_TRACE_ASM, /* Assemble trace. */ @@ -200,12 +201,15 @@ typedef uint32_t SnapEntry; #define SNAP_CONT 0x020000 /* Continuation slot. */ #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ +#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); +LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ - (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) + (((SnapEntry)(slot) << 24) + \ + ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) #if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) #endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 41e0d1ca4f..2f903e2782 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2320,6 +2320,15 @@ LJFOLDF(fload_sbuf) return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; } +/* The fast function ID of function objects is immutable. */ +LJFOLD(FLOAD KGC IRFL_FUNC_FFID) +LJFOLDF(fload_func_ffid_kgc) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) + return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); + return NEXTFOLD; +} + /* The C type ID of cdata objects is immutable. */ LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) LJFOLDF(fload_cdata_typeid_kgc) diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 81184f147a..d6a419e41b 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -364,7 +364,10 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) /* Different value: try to eliminate the redundant store. */ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ IRIns *ir; - /* Check for any intervening guards (includes conflicting loads). */ + /* Check for any intervening guards (includes conflicting loads). + ** Note that lj_tab_keyindex and lj_vm_next don't need guards, + ** since they are followed by at least one guarded VLOAD. + */ for (ir = IR(J->cur.nins-1); ir > store; ir--) if (irt_isguard(ir->t) || ir->o == IR_ALEN) goto doemit; /* No elimination possible. */ diff --git a/src/lj_record.c b/src/lj_record.c index a1471aaec4..e51c98ba98 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -156,6 +156,9 @@ static void rec_check_slots(jit_State *J) lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), "cont slot %d not followed by frame", s); depth++; + } else if ((tr & TREF_KEYINDEX)) { + lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", + s, tref_type(tr)); } else { /* Number repr. may differ, but other types must be the same. */ lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : @@ -283,9 +286,9 @@ static void canonicalize_slots(jit_State *J) if (LJ_DUALNUM) return; for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { TRef tr = J->slot[s]; - if (tref_isinteger(tr)) { + if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { IRIns *ir = IR(tref_ref(tr)); - if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) + if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); } } @@ -606,6 +609,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) { if (J->parent == 0 && J->exitno == 0) { if (pc == J->startpc && J->framedepth + J->retdepth == 0) { + if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ /* Same loop? */ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); @@ -646,6 +650,68 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) } /* Side trace continues across a loop that's left or not entered. */ } +/* Record ITERN. */ +static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + UNUSED(ra); UNUSED(rb); + setintV(&J->errinfo, (int32_t)BC_ITERN); + lj_trace_err_info(J, LJ_TRERR_NYIBC); +#else + RecordIndex ix; + /* Since ITERN is recorded at the start, we need our own loop detection. */ + if (J->pc == J->startpc && J->cur.nins > REF_FIRST && + J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ + return LOOPEV_ENTER; + } + J->maxslot = ra; + lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ + ix.tab = getslot(J, ra-2); + ix.key = J->base[ra-1] ? J->base[ra-1] : + sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX); + copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); + copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); + ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ + ix.mobj = 1; /* We need the next index, too. */ + J->maxslot = ra + lj_record_next(J, &ix); + J->needsnap = 1; + if (!tref_isnil(ix.key)) { /* Looping back? */ + J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ + J->base[ra] = ix.key; + J->base[ra+1] = ix.val; + J->pc += bc_j(J->pc[1])+2; + return LOOPEV_ENTER; + } else { + J->maxslot = ra-3; + J->pc += 2; + return LOOPEV_LEAVE; + } +#endif +} + +/* Record ISNEXT. */ +static void rec_isnext(jit_State *J, BCReg ra) +{ + cTValue *b = &J->L->base[ra-3]; + if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && + tvistab(b+1) && tvisnil(b+2)) { + /* These checks are folded away for a compiled pairs(). */ + TRef func = getslot(J, ra-3); + TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); + (void)getslot(J, ra-2); /* Type check for table. */ + (void)getslot(J, ra-1); /* Type check for nil key. */ + J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; + J->maxslot = ra; + } else { /* Abort trace. Interpreter will despecialize bytecode. */ + lj_trace_err(J, LJ_TRERR_RECERR); + } +} + /* -- Record profiler hook checks ----------------------------------------- */ #if LJ_HASPROFILE @@ -716,7 +782,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) /* NYI: io_file_iter doesn't have an ffid, yet. */ { /* Specialize to the ffid. */ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); - emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid)); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); } return tr; default: @@ -1565,6 +1631,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } } +/* Determine result type of table traversal. */ +static IRType rec_next_types(GCtab *t, uint32_t idx) +{ + for (; idx < t->asize; idx++) { + cTValue *a = arrayslot(t, idx); + if (LJ_LIKELY(!tvisnil(a))) + return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); + } + idx -= t->asize; + for (; idx <= t->hmask; idx++) { + Node *n = &noderef(t->node)[idx]; + if (!tvisnil(&n->val)) + return itype2irt(&n->key) + (itype2irt(&n->val) << 8); + } + return IRT_NIL + (IRT_NIL << 8); +} + +/* Record a table traversal step aka next(). */ +int lj_record_next(jit_State *J, RecordIndex *ix) +{ + IRType t, tkey, tval; + TRef trvk; + t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); + tkey = (t & 0xff); tval = (t >> 8); + trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); + if (ix->mobj || tkey == IRT_NIL) { + TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); + /* Always check for invalid key from next() for nil result. */ + if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); + ix->mobj = idx; + } + ix->key = lj_record_vload(J, trvk, 1, tkey); + if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ + ix->val = TREF_NIL; + return 1; + } else { /* Need value. */ + ix->val = lj_record_vload(J, trvk, 0, tval); + return 2; + } +} + static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) { RecordIndex ix; @@ -2440,6 +2547,9 @@ void lj_record_ins(jit_State *J) case BC_ITERL: rec_loop_interp(J, pc, rec_iterl(J, *pc)); break; + case BC_ITERN: + rec_loop_interp(J, pc, rec_itern(J, ra, rb)); + break; case BC_LOOP: rec_loop_interp(J, pc, rec_loop(J, ra, 1)); break; @@ -2468,6 +2578,10 @@ void lj_record_ins(jit_State *J) J->maxslot = ra; /* Shrink used slots. */ break; + case BC_ISNEXT: + rec_isnext(J, ra); + break; + /* -- Function headers -------------------------------------------------- */ case BC_FUNCF: @@ -2497,8 +2611,6 @@ void lj_record_ins(jit_State *J) break; } /* fallthrough */ - case BC_ITERN: - case BC_ISNEXT: case BC_UCLO: case BC_FNEW: setintV(&J->errinfo, (int32_t)op); @@ -2550,6 +2662,13 @@ static const BCIns *rec_setup_root(jit_State *J) lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); J->bc_min = pc; break; + case BC_ITERN: + lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); + J->maxslot = ra; + J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); + J->bc_min = pc+2 + bc_j(pc[1]); + J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ + break; case BC_LOOP: /* Only check BC range for real loops, but not for "repeat until true". */ pcj = pc + bc_j(ins); @@ -2657,6 +2776,7 @@ void lj_record_setup(jit_State *J) J->pc = rec_setup_root(J); /* Note: the loop instruction itself is recorded at the end and not ** at the start! So snapshot #0 needs to point to the *next* instruction. + ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. */ lj_snap_add(J); if (bc_op(J->cur.startins) == BC_FORL) diff --git a/src/lj_record.h b/src/lj_record.h index 3bf461c8d8..01cc6041ac 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -38,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); +LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); LJ_FUNC void lj_record_ins(jit_State *J); LJ_FUNC void lj_record_setup(jit_State *J); diff --git a/src/lj_snap.c b/src/lj_snap.c index 40bfad925b..97097a5b18 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -463,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) MSize j; for (j = 0; j < nmax; j++) if (snap_ref(map[j]) == ref) - return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); + return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); return 0; } @@ -538,10 +538,12 @@ void lj_snap_replay(jit_State *J, GCtrace *T) uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); + if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: - J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ + /* Same as TREF_* flags. */ + J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); if ((sn & SNAP_FRAME)) J->baseslot = s+1; @@ -961,6 +963,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; #endif + } else if ((sn & SNAP_KEYINDEX)) { + /* A IRT_INT key index slot is restored as a number. Undo this. */ + o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); + o->u32.hi = LJ_KEYINDEX; } } } diff --git a/src/lj_trace.c b/src/lj_trace.c index a0ff8864c7..be886f35e1 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -215,8 +215,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) break; case BC_JITERL: case BC_JLOOP: - lj_assertJ(op == BC_ITERL || op == BC_LOOP || bc_isret(op), - "bad original bytecode %d", op); + lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || + bc_isret(op), "bad original bytecode %d", op); *pc = T->startins; break; case BC_JMP: @@ -411,7 +411,7 @@ static void trace_start(jit_State *J) TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ - if (J->parent == 0 && J->exitno == 0) { + if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { /* Lazy bytecode patching to disable hotcount events. */ lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, @@ -496,6 +496,7 @@ static void trace_stop(jit_State *J) J->cur.nextroot = pt->trace; pt->trace = (TraceNo1)traceno; break; + case BC_ITERN: case BC_RET: case BC_RET0: case BC_RET1: @@ -575,7 +576,8 @@ static int trace_abort(jit_State *J) return 1; /* Retry ASM with new MCode area. */ } /* Penalize or blacklist starting bytecode instruction. */ - if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { + if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins)) && + bc_op(J->cur.startins) != BC_ITERN) { if (J->exitno == 0) { BCIns *startpc = mref(J->cur.startpc, BCIns); if (e == LJ_TRERR_RETRY) @@ -651,8 +653,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ trace_start(J); lj_dispatch_update(J2G(J)); - break; + if (J->state != LJ_TRACE_RECORD_1ST) + break; + /* fallthrough */ + case LJ_TRACE_RECORD_1ST: + J->state = LJ_TRACE_RECORD; + /* fallthrough */ case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); @@ -899,13 +906,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } if (bc_op(*pc) == BC_JLOOP) { BCIns *retpc = &traceref(J, bc_d(*pc))->startins; - if (bc_isret(bc_op(*retpc))) { + int isret = bc_isret(bc_op(*retpc)); + if (isret || bc_op(*retpc) == BC_ITERN) { if (J->state == LJ_TRACE_RECORD) { J->patchins = *pc; J->patchpc = (BCIns *)pc; *J->patchpc = *retpc; J->bcskip = 1; - } else { + } else if (isret) { pc = retpc; setcframe_pc(cf, pc); } diff --git a/src/lj_vm.h b/src/lj_vm.h index 84348e7a4d..81ee8e28ea 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -51,6 +51,7 @@ LJ_ASMF void lj_vm_inshook(void); LJ_ASMF void lj_vm_rethook(void); LJ_ASMF void lj_vm_callhook(void); LJ_ASMF void lj_vm_profhook(void); +LJ_ASMF void lj_vm_IITERN(void); /* Trace exit handling. */ LJ_ASMF void lj_vm_exit_handler(void); @@ -98,6 +99,7 @@ LJ_ASMF double lj_vm_trunc_sf(double); #if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); #endif +LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); #endif /* Continuations for metamethods. */ diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 0e80bf00d2..3a73e00bc6 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2424,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_TMP0, CARG3 + |.define NEXT_TMP1, CARG4 + |.define NEXT_LIM, r12 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY_I, [sp, #8] + |.define NEXT_RES_KEY_IT, [sp, #12] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldr NEXT_TMP0, NEXT_TAB->array + | ldr NEXT_LIM, NEXT_TAB->asize + | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 + |1: // Traverse array part. + | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM + | bhs >5 + | ldr NEXT_TMP1, [NEXT_TMP0, #4] + | str NEXT_IDX, NEXT_RES_KEY_I + | add NEXT_TMP0, NEXT_TMP0, #8 + | add NEXT_IDX, NEXT_IDX, #1 + | checktp NEXT_TMP1, LJ_TNIL + | beq <1 // Skip holes in array part. + | ldr NEXT_TMP0, [NEXT_TMP0, #-8] + | mov NEXT_RES, NEXT_RES_PTR + | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. + | mvn NEXT_TMP0, #~LJ_TISNUM + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + | + |5: // Traverse hash part. + | ldr NEXT_TMP0, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP1, NODE:NEXT_RES->val.it + | checktp NEXT_TMP1, LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bxne lr + | // Skip holes in hash part. + | add NEXT_RES, NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | mvn NEXT_TMP0, #0 + | mov NEXT_RES, NEXT_RES_PTR + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -3914,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | add RA, BASE, RA | ldr TAB:RB, [RA, #-16] | ldr CARG1, [RA, #-8] // Get index from control var. @@ -3992,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov OP, #BC_ITERC | strb CARG1, [PC, #-4] | sub PC, RC, #0x20000 + |.if JIT + | ldrb CARG1, [PC] + | cmp CARG1, #BC_ITERN + | bne >6 + |.endif | strb OP, [PC] // Subsumes ins_next1. | ins_next2 | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | ldrh CARG2, [PC, #2] + | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] + | // Subsumes ins_next1 and ins_next2. + | ldr INS, TRACE:CARG1->startins + | bfi INS, OP, #0, #8 + | str INS, [PC], #4 + | b <1 + |.endif break; case BC_VARG: diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 2a2e3a9aa8..1abc6eccde 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2064,6 +2064,63 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2w + |.define NEXT_LIM, CARG3w + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP0w, TMP0w + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP1w, TMP1w + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY, [sp, #8] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2w. + |->vm_next: + |.if JIT + | ldr NEXT_LIM, NEXT_TAB->asize + | ldr NEXT_TMP1, NEXT_TAB->array + |1: // Traverse array part. + | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM + | bhs >5 // Index points after array part? + | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] + | cmn NEXT_TMP0, #-LJ_TNIL + | cinc NEXT_IDX, NEXT_IDX, eq + | beq <1 // Skip holes in array part. + | str NEXT_TMP0, NEXT_RES_VAL + | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY + | add NEXT_IDX, NEXT_IDX, #1 + | mov NEXT_RES, NEXT_RES_PTR + |4: + | ret + | + |5: // Traverse hash part. + | ldr NEXT_TMP1w, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP0, NODE:NEXT_RES->val + | cmn NEXT_TMP0, #-LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bne <4 + | // Skip holes in hash part. + | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | movn NEXT_TMP0, #0 + | str NEXT_TMP0, NEXT_RES_KEY + | mov NEXT_RES, NEXT_RES_PTR + | ret + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -3320,10 +3377,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop |.endif + |->vm_IITERN: + | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] | ldrh TMP3w, [PC, # OFS_RD] @@ -3390,11 +3448,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next | |5: // Despecialize bytecode if any of the checks fail. + |.if JIT + | ldrb TMP2w, [RC, # OFS_OP] + |.endif | mov TMP0, #BC_JMP | mov TMP1, #BC_ITERC | strb TMP0w, [PC, #-4+OFS_OP] + |.if JIT + | cmp TMP2w, #BC_ITERN + | bne >6 + |.endif | strb TMP1w, [RC, # OFS_OP] | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr RA, [GL, #GL_J(trace)] + | ldrh TMP2w, [RC, # OFS_RD] + | ldr TRACE:RA, [RA, TMP2, lsl #3] + | ldr TMP2w, TRACE:RA->startins + | bfxil TMP2w, TMP1w, #0, #8 + | str TMP2w, [RC] + | b <1 + |.endif break; case BC_VARG: diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 3b0ea4a23a..f70c613e40 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -190,7 +190,7 @@ |//----------------------------------------------------------------------- | |// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro +|.macro NYI; .long 0xec1cf0f0; .endmacro | |// Macros to mark delay slots. |.macro ., a; a; .endmacro @@ -2798,6 +2798,73 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL_I, 0(sp) + |.define NEXT_RES_VAL_IT, 4(sp) + |.define NEXT_RES_KEY_I, 8(sp) + |.define NEXT_RES_KEY_IT, 12(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | lw NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | lw NEXT_TMP2, 4(NEXT_TMP1) + | sw NEXT_IDX, NEXT_RES_KEY_I + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | lw NEXT_TMP0, 0(NEXT_TMP1) + | li AT, LJ_TISNUM + | sw NEXT_TMP2, NEXT_RES_VAL_IT + | sw AT, NEXT_RES_KEY_IT + | sw NEXT_TMP0, NEXT_RES_VAL_I + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | lw NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sw NEXT_NIL, NEXT_RES_KEY_IT + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -4521,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. + |.if JIT and ENDIAN_LE + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | addu RA, BASE, RA | lw TAB:RB, -16+LO(RA) | lw RC, -8+LO(RA) // Get index from control var. @@ -4614,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP3, BC_JMP | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 + | addu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif | b <1 |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | lw TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 2 + | addu TMP0, TMP0, TMP2 + | lw TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif break; case BC_VARG: diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 0d28326ad6..5c5d761c2c 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -193,7 +193,7 @@ |//----------------------------------------------------------------------- | |// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro +|.macro NYI; .long 0xec1cf0f0; .endmacro | |// Macros to mark delay slots. |.macro ., a; a; .endmacro @@ -2904,6 +2904,70 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | ld NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | li AT, LJ_TISNUM + | ld NEXT_TMP2, 0(NEXT_TMP1) + | dsll AT, AT, 47 + | or NEXT_TMP1, NEXT_IDX, AT + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | sd NEXT_TMP2, NEXT_RES_VAL + | sd NEXT_TMP1, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | ld NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | ld NEXT_TMP2, NODE:NEXT_RES_VK->val + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sd NEXT_NIL, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -4700,10 +4764,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. + |.if JIT and ENDIAN_LE + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | daddu RA, BASE, RA | ld TAB:RB, -16(RA) | lw RC, -8+LO(RA) // Get index from control var. @@ -4789,8 +4854,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) | daddu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif | b <1 |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | ld TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 3 + | daddu TMP0, TMP0, TMP2 + | ld TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif break; case BC_VARG: diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index d4133a653e..d9e19298f9 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3163,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx) | blr |.endif | + |->vm_next: + |.if JIT + | NYI // On big-endian. + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -5112,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | // NYI on big-endian |.endif + |->vm_IITERN: | add RA, BASE, RA | lwz TAB:RB, -12(RA) | lwz RC, -4(RA) // Get index from control var. @@ -5244,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | stb TMP0, -1(PC) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + | // NYI on big-endian: unpatch JLOOP. | stb TMP1, 3(PC) | b <1 break; diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index d2119bc449..fdffd4b6c3 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2633,6 +2633,67 @@ static void build_subroutines(BuildCtx *ctx) | .if X64WIN; pop rsi; .endif | ret | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2d + |.define NEXT_IDXa, CARG2 + |.define NEXT_PTR, RC + |.define NEXT_PTRd, RCd + |.define NEXT_TMP, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] + | cmp NEXT_TMP, LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR], NEXT_TMP + |.if DUALNUM + | setint NEXT_TMP, NEXT_IDXa + | mov qword [NEXT_PTR+qword*1], NEXT_TMP + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+qword*1], xmm0 + |.endif + | NEXT_RES_IDX 1 + | ret + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTR, NEXT_TAB->node + | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | ret + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR+qword*1], LJ_TNIL + | ret + |.endif + | |//----------------------------------------------------------------------- |//-- Assertions --------------------------------------------------------- |//----------------------------------------------------------------------- @@ -4044,10 +4105,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop RBd |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB | mov RCd, [BASE+RA*8-8] // Get index from control var. @@ -4118,8 +4180,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | mov PC_OP, BC_JMP | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif | mov byte [PC], BC_ITERC | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RCd, word [PC+2] + | mov TRACE:RA, [RA+RC*8] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif break; case BC_VARG: diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 718cb8f02f..cbf0810cc3 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3120,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endif | + |.define NEXT_TAB, TAB:FCARG1 + |.define NEXT_IDX, FCARG2 + |.define NEXT_PTR, RCa + |.define NEXT_PTRd, RC + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64 + |.define NEXT_TMP, CARG3d + |.define NEXT_TMPq, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_ENTER; .endmacro + |.macro NEXT_LEAVE; ret; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + |.else + |.define NEXT_ASIZE, esi + |.define NEXT_TMP, edi + |.macro NEXT_ENTER; push esi; push edi; .endmacro + |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro + |.define NEXT_RES_PTR, [esp+dword*3] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | NEXT_ENTER + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + |.if X64 + | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] + | mov qword [NEXT_PTR], NEXT_TMPq + |.else + | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] + | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] + | mov dword [NEXT_PTR+4], NEXT_ASIZE + | mov dword [NEXT_PTR], NEXT_TMP + |.endif + |.if DUALNUM + | mov dword [NEXT_PTR+dword*3], LJ_TISNUM + | mov dword [NEXT_PTR+dword*2], NEXT_IDX + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+dword*2], xmm0 + |.endif + | NEXT_RES_IDX 1 + | NEXT_LEAVE + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTRd, dword NEXT_TAB->node + | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | NEXT_LEAVE + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov dword [NEXT_PTR+dword*3], LJ_TNIL + | NEXT_LEAVE + |.endif + | |//----------------------------------------------------------------------- |//-- Assertions --------------------------------------------------------- |//----------------------------------------------------------------------- @@ -4771,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop RB |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TMP1, KBASE // Need two more free registers. | mov TMP2, DISPATCH | mov TAB:RB, [BASE+RA*8-16] @@ -4868,8 +4949,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | mov PC_OP, BC_JMP | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif | mov byte [PC], BC_ITERC | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RC, word [PC+2] + | mov TRACE:RA, [RA+RC*4] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif break; case BC_VARG: