Optimize primitive list fill capacity and append

This commit is contained in:
Sergey Chernov 2026-04-21 21:37:04 +03:00
parent fbb5688696
commit 953f237ca3
10 changed files with 155 additions and 17 deletions

View File

@ -5038,18 +5038,29 @@ class BytecodeCompiler(
private fun compileListFillIntCall(ref: MethodCallRef): CompiledValue? {
if (ref.name != "fill" || !isListTypeRef(ref.receiver)) return null
if (ref.args.size != 2 || ref.args.any { it.isSplat || it.name != null }) return null
val lambdaRef = ((ref.args[1].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null
if (ref.args.size != 2 && ref.args.size != 3) return null
if (ref.args.any { it.isSplat || it.name != null }) return null
val lambdaArgIndex = ref.args.lastIndex
val lambdaRef = ((ref.args[lambdaArgIndex].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null
if (lambdaRef.inferredReturnClass != ObjInt.type) return null
val size = compileArgValue(ref.args[0].value) ?: return null
if (size.type != SlotType.INT) return null
val capacity = if (ref.args.size == 3) {
val compiled = compileArgValue(ref.args[1].value) ?: return null
if (compiled.type != SlotType.INT) return null
compiled
} else null
lambdaRef.inlineBodyRef?.let { inlineRef ->
return compileInlineListFillInt(size, lambdaRef, inlineRef)
return compileInlineListFillInt(size, capacity, lambdaRef, inlineRef)
}
run {
val callable = ensureObjSlot(compileArgValue(ref.args[1].value) ?: return null)
val callable = ensureObjSlot(compileArgValue(ref.args[lambdaArgIndex].value) ?: return null)
val dst = allocSlot()
builder.emit(Opcode.LIST_FILL_INT, size.slot, callable.slot, dst)
if (capacity != null) {
builder.emit(Opcode.LIST_FILL_INT_CAP, size.slot, capacity.slot, callable.slot, dst)
} else {
builder.emit(Opcode.LIST_FILL_INT, size.slot, callable.slot, dst)
}
updateSlotType(dst, SlotType.OBJ)
slotObjClass[dst] = ObjList.type
listElementClassBySlot[dst] = ObjInt.type
@ -5747,8 +5758,13 @@ class BytecodeCompiler(
}
}
private fun compileInlineListFillInt(size: CompiledValue, lambdaRef: LambdaFnRef, inlineRef: ObjRef): CompiledValue {
if (isImplicitItIdentityRef(inlineRef)) {
private fun compileInlineListFillInt(
size: CompiledValue,
capacity: CompiledValue?,
lambdaRef: LambdaFnRef,
inlineRef: ObjRef
): CompiledValue {
if (capacity == null && isImplicitItIdentityRef(inlineRef)) {
val dst = allocSlot()
builder.emit(Opcode.LIST_IOTA_INT, size.slot, dst)
updateSlotType(dst, SlotType.OBJ)
@ -5758,7 +5774,11 @@ class BytecodeCompiler(
}
val dst = allocSlot()
builder.emit(Opcode.LIST_NEW_INT, size.slot, dst)
if (capacity != null) {
builder.emit(Opcode.LIST_NEW_INT_CAP, size.slot, capacity.slot, dst)
} else {
builder.emit(Opcode.LIST_NEW_INT, size.slot, dst)
}
updateSlotType(dst, SlotType.OBJ)
slotObjClass[dst] = ObjList.type
listElementClassBySlot[dst] = ObjInt.type

View File

@ -239,6 +239,10 @@ class CmdBuilder {
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_IOTA_INT ->
listOf(OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_NEW_INT_CAP ->
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_FILL_INT_CAP ->
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.MAKE_RANGE ->
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_LITERAL ->
@ -844,6 +848,8 @@ class CmdBuilder {
Opcode.LIST_NEW_INT -> CmdListNewInt(operands[0], operands[1])
Opcode.LIST_FILL_INT -> CmdListFillInt(operands[0], operands[1], operands[2])
Opcode.LIST_IOTA_INT -> CmdListIotaInt(operands[0], operands[1])
Opcode.LIST_NEW_INT_CAP -> CmdListNewIntCap(operands[0], operands[1], operands[2])
Opcode.LIST_FILL_INT_CAP -> CmdListFillIntCap(operands[0], operands[1], operands[2], operands[3])
Opcode.LIST_LITERAL -> CmdListLiteral(operands[0], operands[1], operands[2], operands[3])
Opcode.GET_MEMBER_SLOT -> CmdGetMemberSlot(operands[0], operands[1], operands[2], operands[3])
Opcode.SET_MEMBER_SLOT -> CmdSetMemberSlot(operands[0], operands[1], operands[2], operands[3])

View File

@ -498,6 +498,8 @@ object CmdDisassembler {
is CmdListNewInt -> Opcode.LIST_NEW_INT to intArrayOf(cmd.sizeSlot, cmd.dst)
is CmdListFillInt -> Opcode.LIST_FILL_INT to intArrayOf(cmd.sizeSlot, cmd.callableSlot, cmd.dst)
is CmdListIotaInt -> Opcode.LIST_IOTA_INT to intArrayOf(cmd.sizeSlot, cmd.dst)
is CmdListNewIntCap -> Opcode.LIST_NEW_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.dst)
is CmdListFillIntCap -> Opcode.LIST_FILL_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.callableSlot, cmd.dst)
is CmdListLiteral -> Opcode.LIST_LITERAL to intArrayOf(cmd.planId, cmd.baseSlot, cmd.count, cmd.dst)
is CmdGetMemberSlot -> Opcode.GET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.dst)
is CmdSetMemberSlot -> Opcode.SET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.valueSlot)
@ -627,6 +629,10 @@ object CmdDisassembler {
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_IOTA_INT ->
listOf(OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_NEW_INT_CAP ->
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_FILL_INT_CAP ->
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
Opcode.LIST_LITERAL ->
listOf(OperandKind.CONST, OperandKind.SLOT, OperandKind.COUNT, OperandKind.SLOT)
Opcode.GET_MEMBER_SLOT ->

View File

@ -3455,6 +3455,42 @@ class CmdListFillInt(
}
}
class CmdListFillIntCap(
internal val sizeSlot: Int,
internal val capacitySlot: Int,
internal val callableSlot: Int,
internal val dst: Int,
) : Cmd() {
override suspend fun perform(frame: CmdFrame) {
val size = frame.getInt(sizeSlot).toInt()
if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative")
val capacity = frame.getInt(capacitySlot).toInt()
val actualCapacity = maxOf(size, capacity)
if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative")
val callable = frame.storedSlotObj(callableSlot)
val scope = frame.ensureScope()
val result = ObjList(LongArray(actualCapacity), size)
for (i in 0 until size) {
val args = Arguments(ObjInt.of(i.toLong()))
val value = if (callable is BytecodeLambdaCallable && callable.supportsImplicitIntFillFastPath()) {
callable.invokeImplicitIntArgFast(scope, i.toLong()) ?: callable.invokeImplicitIntArg(scope, i.toLong())
} else if (callable is BytecodeArgCallable) {
callable.callWithArgsFast(scope, args) ?: run {
val child = scope.createChildScope(scope.pos, args = args)
(callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child)
}
} else {
val child = scope.createChildScope(scope.pos, args = args)
(callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child)
}
val intValue = (value as? ObjInt)?.value ?: scope.raiseClassCastError("expected Int fill result")
result.setIntAtFast(i, intValue)
}
frame.storeObjResult(dst, result)
return
}
}
private fun decodeMemberId(id: Int): Pair<Int, Boolean> {
return if (id <= -2) {
Pair(-id - 2, true)
@ -3859,6 +3895,22 @@ class CmdListNewInt(
}
}
class CmdListNewIntCap(
internal val sizeSlot: Int,
internal val capacitySlot: Int,
internal val dst: Int,
) : Cmd() {
override suspend fun perform(frame: CmdFrame) {
val size = frame.getInt(sizeSlot).toInt()
if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative")
val capacity = frame.getInt(capacitySlot).toInt()
val actualCapacity = maxOf(size, capacity)
if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative")
frame.storeObjResult(dst, ObjList(LongArray(actualCapacity), size))
return
}
}
class CmdGetIndex(
internal val targetSlot: Int,
internal val indexSlot: Int,

View File

@ -190,6 +190,8 @@ enum class Opcode(val code: Int) {
GET_DYNAMIC_MEMBER(0xAC),
SET_DYNAMIC_MEMBER(0xAD),
CALL_DYNAMIC_MEMBER(0xAE),
LIST_NEW_INT_CAP(0xAF),
LIST_FILL_INT_CAP(0xB0),
RESOLVE_SCOPE_SLOT(0xB1),
LOAD_OBJ_ADDR(0xB2),

View File

@ -165,9 +165,9 @@ open class ObjList(initialList: MutableList<Obj> = mutableListOf()) : Obj() {
}
}
internal constructor(intValues: LongArray) : this(mutableListOf()) {
internal constructor(intValues: LongArray, size: Int = intValues.size) : this(mutableListOf()) {
primitiveIntList = intValues
primitiveIntSize = intValues.size
primitiveIntSize = size
boxedList = null
}
@ -519,8 +519,8 @@ open class ObjList(initialList: MutableList<Obj> = mutableListOf()) : Obj() {
doc = "Append one or more elements to the end of this list.",
moduleName = "lyng.stdlib"
) {
val l = thisAs<ObjList>().list
for (a in args) l.add(a)
val l = thisAs<ObjList>()
for (a in args) l.appendFast(a)
ObjVoid
}
addFnDoc(

View File

@ -213,6 +213,24 @@ class BytecodeRecentOpsTest {
assertEquals(4, scope.eval("calc()").toInt())
}
@Test
fun listFillWithCapacityUsesPrimitiveCapacityBytecode() = runTest {
val scope = Script.newScope()
scope.eval(
"""
fun calc() {
val xs = List.fill(5, 12) { it * 2 }
xs.add(99)
xs[0] + xs[4] + xs[5]
}
""".trimIndent()
)
val disasm = scope.disassembleSymbol("calc")
assertTrue(disasm.contains("LIST_NEW_INT_CAP"), disasm)
assertFalse(disasm.contains("LIST_FILL_INT_CAP"), disasm)
assertEquals(107, scope.eval("calc()").toInt())
}
@Test
fun directLambdaLiteralCallUsesInlineBytecode() = runTest {
val scope = Script.newScope()

View File

@ -22,6 +22,7 @@ import kotlinx.coroutines.test.runTest
import net.sergeych.lyng.obj.toInt
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.time.Duration.Companion.milliseconds
import kotlin.time.TimeSource
class OptTest {
@ -59,4 +60,25 @@ class OptTest {
}
println("add-to-array best=${bestMs}ms avg=${totalMs / passes}ms after warmup")
}
@Test
fun testAddToArray2() = runTest {
eval(
$$"""
import lyng.time
val n = 700_000
fun tm<T>(block: ()->T): T {
val t = Instant()
block().also {
println("tm: ${Instant() - t}")
}
}
val x = tm { List.fill(n) { it * 10 + 1 } }
val y = tm { List.fill(n, n + 10) { it * 10 + 1 } }
tm { x.add(-1) }
tm { y.add(-2) }
""".trimIndent()
)
}
}

View File

@ -13,6 +13,11 @@ Current focus
Key recent changes
- Updated AI helper docs to reflect static typing, type expressions, and compile-time-only name resolution.
- Added stdlib random API: `Random` and deterministic `SeededRandom` with `nextInt`, `nextFloat`, and generic `next(range)`.
- Generalized primitive list optimization for compiler-generated `List.fill`:
- `List.fill(size) { intExpr }` and `List.fill(size, capacity) { intExpr }` now both have bytecode fast paths.
- Added `LIST_NEW_INT_CAP` / `LIST_FILL_INT_CAP` for the 3-arg capacity-preserving form.
- Fixed `ObjList.add(...)` to preserve primitive-int backing storage instead of forcing boxing through `.list`.
- `OptTest.testAddToArray2` no longer shows the old 10x anomaly for `List.fill(n, n + 10)` or append-to-extended-list.
Known failing tests
- None in :lynglib:jvmTest after Random/SeededRandom integration.

View File

@ -22,18 +22,25 @@ Candidates (not started)
6) Box/unbox audit (done)
- Unbox ObjInt/ObjReal in assign-op when target is INT/REAL to avoid boxing + obj ops.
- MixedCompareBenchmarkTest: 240 ms -> 234 ms.
7) Mixed compare coverage
7) Primitive list fill with capacity (done)
- Extended the compiler/runtime fast path from `List.fill(size) { intExpr }` to `List.fill(size, capacity) { intExpr }`.
- Added `LIST_NEW_INT_CAP` and `LIST_FILL_INT_CAP` so the 3-arg form keeps primitive-int storage instead of falling back to generic stdlib code.
- `OptTest.testAddToArray2`: `List.fill(n, n + 10) { ... }` dropped from the prior anomaly (~10x slower than 2-arg fill) to the same range as `List.fill(n) { ... }`, roughly `56-67 ms` vs `46-75 ms` after warmup.
8) Primitive list append preservation (done)
- Fixed `ObjList.add(...)` to append through the primitive-aware fast path instead of forcing `.list` and boxing the backing storage.
- `OptTest.testAddToArray2`: appending to the pre-extended list dropped from the prior anomaly (~10x slower) to sub-millisecond / low-millisecond timings (`~0.05-0.16 ms` for the extended list path, `~1.6-4.3 ms` for the baseline path, depending on warmup).
9) Mixed compare coverage
- Emit CMP_*_REAL when one operand is known ObjReal in more expression forms (not just assign-op).
- Verify with disassembly that fast cmp opcodes are emitted.
8) Range-loop invariant hoist
10) Range-loop invariant hoist
- Cache range end/step into temps once per loop; avoid repeated slot reads/boxing in body.
- Confirm no extra CONST_OBJ in hot path.
9) Boxing elision pass
11) Boxing elision pass
- Remove redundant BOX_OBJ when value feeds only primitive ops afterward (local liveness).
- Ensure no impact on closures/escaping values.
10) Closed-type fast paths expansion
12) Closed-type fast paths expansion
- Apply closed-type trust for ObjBool/ObjInt/ObjReal/ObjString in ternaries and conditional chains.
- Guard with exact non-null temp/slot checks only.
11) VM hot op micro-optimizations
13) VM hot op micro-optimizations
- Reduce frame reads/writes in ADD_INT, MUL_REAL, CMP_*_INT/REAL when operands are temps.
- Compare against baseline; revert if regression after 10-run median.