diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/BytecodeCompiler.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/BytecodeCompiler.kt index 62565f8..2ebadbb 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/BytecodeCompiler.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/BytecodeCompiler.kt @@ -5038,18 +5038,29 @@ class BytecodeCompiler( private fun compileListFillIntCall(ref: MethodCallRef): CompiledValue? { if (ref.name != "fill" || !isListTypeRef(ref.receiver)) return null - if (ref.args.size != 2 || ref.args.any { it.isSplat || it.name != null }) return null - val lambdaRef = ((ref.args[1].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null + if (ref.args.size != 2 && ref.args.size != 3) return null + if (ref.args.any { it.isSplat || it.name != null }) return null + val lambdaArgIndex = ref.args.lastIndex + val lambdaRef = ((ref.args[lambdaArgIndex].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null if (lambdaRef.inferredReturnClass != ObjInt.type) return null val size = compileArgValue(ref.args[0].value) ?: return null if (size.type != SlotType.INT) return null + val capacity = if (ref.args.size == 3) { + val compiled = compileArgValue(ref.args[1].value) ?: return null + if (compiled.type != SlotType.INT) return null + compiled + } else null lambdaRef.inlineBodyRef?.let { inlineRef -> - return compileInlineListFillInt(size, lambdaRef, inlineRef) + return compileInlineListFillInt(size, capacity, lambdaRef, inlineRef) } run { - val callable = ensureObjSlot(compileArgValue(ref.args[1].value) ?: return null) + val callable = ensureObjSlot(compileArgValue(ref.args[lambdaArgIndex].value) ?: return null) val dst = allocSlot() - builder.emit(Opcode.LIST_FILL_INT, size.slot, callable.slot, dst) + if (capacity != null) { + builder.emit(Opcode.LIST_FILL_INT_CAP, size.slot, capacity.slot, callable.slot, dst) + } else { + builder.emit(Opcode.LIST_FILL_INT, size.slot, callable.slot, dst) + } updateSlotType(dst, SlotType.OBJ) slotObjClass[dst] = ObjList.type listElementClassBySlot[dst] = ObjInt.type @@ -5747,8 +5758,13 @@ class BytecodeCompiler( } } - private fun compileInlineListFillInt(size: CompiledValue, lambdaRef: LambdaFnRef, inlineRef: ObjRef): CompiledValue { - if (isImplicitItIdentityRef(inlineRef)) { + private fun compileInlineListFillInt( + size: CompiledValue, + capacity: CompiledValue?, + lambdaRef: LambdaFnRef, + inlineRef: ObjRef + ): CompiledValue { + if (capacity == null && isImplicitItIdentityRef(inlineRef)) { val dst = allocSlot() builder.emit(Opcode.LIST_IOTA_INT, size.slot, dst) updateSlotType(dst, SlotType.OBJ) @@ -5758,7 +5774,11 @@ class BytecodeCompiler( } val dst = allocSlot() - builder.emit(Opcode.LIST_NEW_INT, size.slot, dst) + if (capacity != null) { + builder.emit(Opcode.LIST_NEW_INT_CAP, size.slot, capacity.slot, dst) + } else { + builder.emit(Opcode.LIST_NEW_INT, size.slot, dst) + } updateSlotType(dst, SlotType.OBJ) slotObjClass[dst] = ObjList.type listElementClassBySlot[dst] = ObjInt.type diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdBuilder.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdBuilder.kt index 2ab3faa..344769e 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdBuilder.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdBuilder.kt @@ -239,6 +239,10 @@ class CmdBuilder { listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) Opcode.LIST_IOTA_INT -> listOf(OperandKind.SLOT, OperandKind.SLOT) + Opcode.LIST_NEW_INT_CAP -> + listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) + Opcode.LIST_FILL_INT_CAP -> + listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) Opcode.MAKE_RANGE -> listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) Opcode.LIST_LITERAL -> @@ -844,6 +848,8 @@ class CmdBuilder { Opcode.LIST_NEW_INT -> CmdListNewInt(operands[0], operands[1]) Opcode.LIST_FILL_INT -> CmdListFillInt(operands[0], operands[1], operands[2]) Opcode.LIST_IOTA_INT -> CmdListIotaInt(operands[0], operands[1]) + Opcode.LIST_NEW_INT_CAP -> CmdListNewIntCap(operands[0], operands[1], operands[2]) + Opcode.LIST_FILL_INT_CAP -> CmdListFillIntCap(operands[0], operands[1], operands[2], operands[3]) Opcode.LIST_LITERAL -> CmdListLiteral(operands[0], operands[1], operands[2], operands[3]) Opcode.GET_MEMBER_SLOT -> CmdGetMemberSlot(operands[0], operands[1], operands[2], operands[3]) Opcode.SET_MEMBER_SLOT -> CmdSetMemberSlot(operands[0], operands[1], operands[2], operands[3]) diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdDisassembler.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdDisassembler.kt index 14362a9..ceb54be 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdDisassembler.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdDisassembler.kt @@ -498,6 +498,8 @@ object CmdDisassembler { is CmdListNewInt -> Opcode.LIST_NEW_INT to intArrayOf(cmd.sizeSlot, cmd.dst) is CmdListFillInt -> Opcode.LIST_FILL_INT to intArrayOf(cmd.sizeSlot, cmd.callableSlot, cmd.dst) is CmdListIotaInt -> Opcode.LIST_IOTA_INT to intArrayOf(cmd.sizeSlot, cmd.dst) + is CmdListNewIntCap -> Opcode.LIST_NEW_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.dst) + is CmdListFillIntCap -> Opcode.LIST_FILL_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.callableSlot, cmd.dst) is CmdListLiteral -> Opcode.LIST_LITERAL to intArrayOf(cmd.planId, cmd.baseSlot, cmd.count, cmd.dst) is CmdGetMemberSlot -> Opcode.GET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.dst) is CmdSetMemberSlot -> Opcode.SET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.valueSlot) @@ -627,6 +629,10 @@ object CmdDisassembler { listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) Opcode.LIST_IOTA_INT -> listOf(OperandKind.SLOT, OperandKind.SLOT) + Opcode.LIST_NEW_INT_CAP -> + listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) + Opcode.LIST_FILL_INT_CAP -> + listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT) Opcode.LIST_LITERAL -> listOf(OperandKind.CONST, OperandKind.SLOT, OperandKind.COUNT, OperandKind.SLOT) Opcode.GET_MEMBER_SLOT -> diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdRuntime.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdRuntime.kt index 3749ab9..c7691fb 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdRuntime.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/CmdRuntime.kt @@ -3455,6 +3455,42 @@ class CmdListFillInt( } } +class CmdListFillIntCap( + internal val sizeSlot: Int, + internal val capacitySlot: Int, + internal val callableSlot: Int, + internal val dst: Int, +) : Cmd() { + override suspend fun perform(frame: CmdFrame) { + val size = frame.getInt(sizeSlot).toInt() + if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative") + val capacity = frame.getInt(capacitySlot).toInt() + val actualCapacity = maxOf(size, capacity) + if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative") + val callable = frame.storedSlotObj(callableSlot) + val scope = frame.ensureScope() + val result = ObjList(LongArray(actualCapacity), size) + for (i in 0 until size) { + val args = Arguments(ObjInt.of(i.toLong())) + val value = if (callable is BytecodeLambdaCallable && callable.supportsImplicitIntFillFastPath()) { + callable.invokeImplicitIntArgFast(scope, i.toLong()) ?: callable.invokeImplicitIntArg(scope, i.toLong()) + } else if (callable is BytecodeArgCallable) { + callable.callWithArgsFast(scope, args) ?: run { + val child = scope.createChildScope(scope.pos, args = args) + (callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child) + } + } else { + val child = scope.createChildScope(scope.pos, args = args) + (callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child) + } + val intValue = (value as? ObjInt)?.value ?: scope.raiseClassCastError("expected Int fill result") + result.setIntAtFast(i, intValue) + } + frame.storeObjResult(dst, result) + return + } +} + private fun decodeMemberId(id: Int): Pair { return if (id <= -2) { Pair(-id - 2, true) @@ -3859,6 +3895,22 @@ class CmdListNewInt( } } +class CmdListNewIntCap( + internal val sizeSlot: Int, + internal val capacitySlot: Int, + internal val dst: Int, +) : Cmd() { + override suspend fun perform(frame: CmdFrame) { + val size = frame.getInt(sizeSlot).toInt() + if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative") + val capacity = frame.getInt(capacitySlot).toInt() + val actualCapacity = maxOf(size, capacity) + if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative") + frame.storeObjResult(dst, ObjList(LongArray(actualCapacity), size)) + return + } +} + class CmdGetIndex( internal val targetSlot: Int, internal val indexSlot: Int, diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/Opcode.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/Opcode.kt index 9283326..ffa8b0e 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/Opcode.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/bytecode/Opcode.kt @@ -190,6 +190,8 @@ enum class Opcode(val code: Int) { GET_DYNAMIC_MEMBER(0xAC), SET_DYNAMIC_MEMBER(0xAD), CALL_DYNAMIC_MEMBER(0xAE), + LIST_NEW_INT_CAP(0xAF), + LIST_FILL_INT_CAP(0xB0), RESOLVE_SCOPE_SLOT(0xB1), LOAD_OBJ_ADDR(0xB2), diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjList.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjList.kt index 1fc58ce..0588485 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjList.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjList.kt @@ -165,9 +165,9 @@ open class ObjList(initialList: MutableList = mutableListOf()) : Obj() { } } - internal constructor(intValues: LongArray) : this(mutableListOf()) { + internal constructor(intValues: LongArray, size: Int = intValues.size) : this(mutableListOf()) { primitiveIntList = intValues - primitiveIntSize = intValues.size + primitiveIntSize = size boxedList = null } @@ -519,8 +519,8 @@ open class ObjList(initialList: MutableList = mutableListOf()) : Obj() { doc = "Append one or more elements to the end of this list.", moduleName = "lyng.stdlib" ) { - val l = thisAs().list - for (a in args) l.add(a) + val l = thisAs() + for (a in args) l.appendFast(a) ObjVoid } addFnDoc( diff --git a/lynglib/src/commonTest/kotlin/BytecodeRecentOpsTest.kt b/lynglib/src/commonTest/kotlin/BytecodeRecentOpsTest.kt index a708bc9..94212da 100644 --- a/lynglib/src/commonTest/kotlin/BytecodeRecentOpsTest.kt +++ b/lynglib/src/commonTest/kotlin/BytecodeRecentOpsTest.kt @@ -213,6 +213,24 @@ class BytecodeRecentOpsTest { assertEquals(4, scope.eval("calc()").toInt()) } + @Test + fun listFillWithCapacityUsesPrimitiveCapacityBytecode() = runTest { + val scope = Script.newScope() + scope.eval( + """ + fun calc() { + val xs = List.fill(5, 12) { it * 2 } + xs.add(99) + xs[0] + xs[4] + xs[5] + } + """.trimIndent() + ) + val disasm = scope.disassembleSymbol("calc") + assertTrue(disasm.contains("LIST_NEW_INT_CAP"), disasm) + assertFalse(disasm.contains("LIST_FILL_INT_CAP"), disasm) + assertEquals(107, scope.eval("calc()").toInt()) + } + @Test fun directLambdaLiteralCallUsesInlineBytecode() = runTest { val scope = Script.newScope() diff --git a/lynglib/src/commonTest/kotlin/net/sergeych/lyng/OptTest.kt b/lynglib/src/commonTest/kotlin/net/sergeych/lyng/OptTest.kt index bc26f27..1cb2d8b 100644 --- a/lynglib/src/commonTest/kotlin/net/sergeych/lyng/OptTest.kt +++ b/lynglib/src/commonTest/kotlin/net/sergeych/lyng/OptTest.kt @@ -22,6 +22,7 @@ import kotlinx.coroutines.test.runTest import net.sergeych.lyng.obj.toInt import kotlin.test.Test import kotlin.test.assertEquals +import kotlin.time.Duration.Companion.milliseconds import kotlin.time.TimeSource class OptTest { @@ -59,4 +60,25 @@ class OptTest { } println("add-to-array best=${bestMs}ms avg=${totalMs / passes}ms after warmup") } + + @Test + fun testAddToArray2() = runTest { + eval( + $$""" + import lyng.time + val n = 700_000 + fun tm(block: ()->T): T { + val t = Instant() + block().also { + println("tm: ${Instant() - t}") + } + } + val x = tm { List.fill(n) { it * 10 + 1 } } + val y = tm { List.fill(n, n + 10) { it * 10 + 1 } } + tm { x.add(-1) } + tm { y.add(-2) } + """.trimIndent() + ) + } } + diff --git a/notes/ai_state.md b/notes/ai_state.md index 9e7a38d..d2412a8 100644 --- a/notes/ai_state.md +++ b/notes/ai_state.md @@ -13,6 +13,11 @@ Current focus Key recent changes - Updated AI helper docs to reflect static typing, type expressions, and compile-time-only name resolution. - Added stdlib random API: `Random` and deterministic `SeededRandom` with `nextInt`, `nextFloat`, and generic `next(range)`. +- Generalized primitive list optimization for compiler-generated `List.fill`: + - `List.fill(size) { intExpr }` and `List.fill(size, capacity) { intExpr }` now both have bytecode fast paths. + - Added `LIST_NEW_INT_CAP` / `LIST_FILL_INT_CAP` for the 3-arg capacity-preserving form. +- Fixed `ObjList.add(...)` to preserve primitive-int backing storage instead of forcing boxing through `.list`. +- `OptTest.testAddToArray2` no longer shows the old 10x anomaly for `List.fill(n, n + 10)` or append-to-extended-list. Known failing tests - None in :lynglib:jvmTest after Random/SeededRandom integration. diff --git a/notes/fast_ops_optimizations_plan.md b/notes/fast_ops_optimizations_plan.md index 00ebc64..5908e13 100644 --- a/notes/fast_ops_optimizations_plan.md +++ b/notes/fast_ops_optimizations_plan.md @@ -22,18 +22,25 @@ Candidates (not started) 6) Box/unbox audit (done) - Unbox ObjInt/ObjReal in assign-op when target is INT/REAL to avoid boxing + obj ops. - MixedCompareBenchmarkTest: 240 ms -> 234 ms. -7) Mixed compare coverage +7) Primitive list fill with capacity (done) + - Extended the compiler/runtime fast path from `List.fill(size) { intExpr }` to `List.fill(size, capacity) { intExpr }`. + - Added `LIST_NEW_INT_CAP` and `LIST_FILL_INT_CAP` so the 3-arg form keeps primitive-int storage instead of falling back to generic stdlib code. + - `OptTest.testAddToArray2`: `List.fill(n, n + 10) { ... }` dropped from the prior anomaly (~10x slower than 2-arg fill) to the same range as `List.fill(n) { ... }`, roughly `56-67 ms` vs `46-75 ms` after warmup. +8) Primitive list append preservation (done) + - Fixed `ObjList.add(...)` to append through the primitive-aware fast path instead of forcing `.list` and boxing the backing storage. + - `OptTest.testAddToArray2`: appending to the pre-extended list dropped from the prior anomaly (~10x slower) to sub-millisecond / low-millisecond timings (`~0.05-0.16 ms` for the extended list path, `~1.6-4.3 ms` for the baseline path, depending on warmup). +9) Mixed compare coverage - Emit CMP_*_REAL when one operand is known ObjReal in more expression forms (not just assign-op). - Verify with disassembly that fast cmp opcodes are emitted. -8) Range-loop invariant hoist +10) Range-loop invariant hoist - Cache range end/step into temps once per loop; avoid repeated slot reads/boxing in body. - Confirm no extra CONST_OBJ in hot path. -9) Boxing elision pass +11) Boxing elision pass - Remove redundant BOX_OBJ when value feeds only primitive ops afterward (local liveness). - Ensure no impact on closures/escaping values. -10) Closed-type fast paths expansion +12) Closed-type fast paths expansion - Apply closed-type trust for ObjBool/ObjInt/ObjReal/ObjString in ternaries and conditional chains. - Guard with exact non-null temp/slot checks only. -11) VM hot op micro-optimizations +13) VM hot op micro-optimizations - Reduce frame reads/writes in ADD_INT, MUL_REAL, CMP_*_INT/REAL when operands are temps. - Compare against baseline; revert if regression after 10-run median.