Optimize primitive list fill capacity and append
This commit is contained in:
parent
fbb5688696
commit
953f237ca3
@ -5038,18 +5038,29 @@ class BytecodeCompiler(
|
||||
|
||||
private fun compileListFillIntCall(ref: MethodCallRef): CompiledValue? {
|
||||
if (ref.name != "fill" || !isListTypeRef(ref.receiver)) return null
|
||||
if (ref.args.size != 2 || ref.args.any { it.isSplat || it.name != null }) return null
|
||||
val lambdaRef = ((ref.args[1].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null
|
||||
if (ref.args.size != 2 && ref.args.size != 3) return null
|
||||
if (ref.args.any { it.isSplat || it.name != null }) return null
|
||||
val lambdaArgIndex = ref.args.lastIndex
|
||||
val lambdaRef = ((ref.args[lambdaArgIndex].value as? ExpressionStatement)?.ref as? LambdaFnRef) ?: return null
|
||||
if (lambdaRef.inferredReturnClass != ObjInt.type) return null
|
||||
val size = compileArgValue(ref.args[0].value) ?: return null
|
||||
if (size.type != SlotType.INT) return null
|
||||
val capacity = if (ref.args.size == 3) {
|
||||
val compiled = compileArgValue(ref.args[1].value) ?: return null
|
||||
if (compiled.type != SlotType.INT) return null
|
||||
compiled
|
||||
} else null
|
||||
lambdaRef.inlineBodyRef?.let { inlineRef ->
|
||||
return compileInlineListFillInt(size, lambdaRef, inlineRef)
|
||||
return compileInlineListFillInt(size, capacity, lambdaRef, inlineRef)
|
||||
}
|
||||
run {
|
||||
val callable = ensureObjSlot(compileArgValue(ref.args[1].value) ?: return null)
|
||||
val callable = ensureObjSlot(compileArgValue(ref.args[lambdaArgIndex].value) ?: return null)
|
||||
val dst = allocSlot()
|
||||
if (capacity != null) {
|
||||
builder.emit(Opcode.LIST_FILL_INT_CAP, size.slot, capacity.slot, callable.slot, dst)
|
||||
} else {
|
||||
builder.emit(Opcode.LIST_FILL_INT, size.slot, callable.slot, dst)
|
||||
}
|
||||
updateSlotType(dst, SlotType.OBJ)
|
||||
slotObjClass[dst] = ObjList.type
|
||||
listElementClassBySlot[dst] = ObjInt.type
|
||||
@ -5747,8 +5758,13 @@ class BytecodeCompiler(
|
||||
}
|
||||
}
|
||||
|
||||
private fun compileInlineListFillInt(size: CompiledValue, lambdaRef: LambdaFnRef, inlineRef: ObjRef): CompiledValue {
|
||||
if (isImplicitItIdentityRef(inlineRef)) {
|
||||
private fun compileInlineListFillInt(
|
||||
size: CompiledValue,
|
||||
capacity: CompiledValue?,
|
||||
lambdaRef: LambdaFnRef,
|
||||
inlineRef: ObjRef
|
||||
): CompiledValue {
|
||||
if (capacity == null && isImplicitItIdentityRef(inlineRef)) {
|
||||
val dst = allocSlot()
|
||||
builder.emit(Opcode.LIST_IOTA_INT, size.slot, dst)
|
||||
updateSlotType(dst, SlotType.OBJ)
|
||||
@ -5758,7 +5774,11 @@ class BytecodeCompiler(
|
||||
}
|
||||
|
||||
val dst = allocSlot()
|
||||
if (capacity != null) {
|
||||
builder.emit(Opcode.LIST_NEW_INT_CAP, size.slot, capacity.slot, dst)
|
||||
} else {
|
||||
builder.emit(Opcode.LIST_NEW_INT, size.slot, dst)
|
||||
}
|
||||
updateSlotType(dst, SlotType.OBJ)
|
||||
slotObjClass[dst] = ObjList.type
|
||||
listElementClassBySlot[dst] = ObjInt.type
|
||||
|
||||
@ -239,6 +239,10 @@ class CmdBuilder {
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_IOTA_INT ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_NEW_INT_CAP ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_FILL_INT_CAP ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.MAKE_RANGE ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_LITERAL ->
|
||||
@ -844,6 +848,8 @@ class CmdBuilder {
|
||||
Opcode.LIST_NEW_INT -> CmdListNewInt(operands[0], operands[1])
|
||||
Opcode.LIST_FILL_INT -> CmdListFillInt(operands[0], operands[1], operands[2])
|
||||
Opcode.LIST_IOTA_INT -> CmdListIotaInt(operands[0], operands[1])
|
||||
Opcode.LIST_NEW_INT_CAP -> CmdListNewIntCap(operands[0], operands[1], operands[2])
|
||||
Opcode.LIST_FILL_INT_CAP -> CmdListFillIntCap(operands[0], operands[1], operands[2], operands[3])
|
||||
Opcode.LIST_LITERAL -> CmdListLiteral(operands[0], operands[1], operands[2], operands[3])
|
||||
Opcode.GET_MEMBER_SLOT -> CmdGetMemberSlot(operands[0], operands[1], operands[2], operands[3])
|
||||
Opcode.SET_MEMBER_SLOT -> CmdSetMemberSlot(operands[0], operands[1], operands[2], operands[3])
|
||||
|
||||
@ -498,6 +498,8 @@ object CmdDisassembler {
|
||||
is CmdListNewInt -> Opcode.LIST_NEW_INT to intArrayOf(cmd.sizeSlot, cmd.dst)
|
||||
is CmdListFillInt -> Opcode.LIST_FILL_INT to intArrayOf(cmd.sizeSlot, cmd.callableSlot, cmd.dst)
|
||||
is CmdListIotaInt -> Opcode.LIST_IOTA_INT to intArrayOf(cmd.sizeSlot, cmd.dst)
|
||||
is CmdListNewIntCap -> Opcode.LIST_NEW_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.dst)
|
||||
is CmdListFillIntCap -> Opcode.LIST_FILL_INT_CAP to intArrayOf(cmd.sizeSlot, cmd.capacitySlot, cmd.callableSlot, cmd.dst)
|
||||
is CmdListLiteral -> Opcode.LIST_LITERAL to intArrayOf(cmd.planId, cmd.baseSlot, cmd.count, cmd.dst)
|
||||
is CmdGetMemberSlot -> Opcode.GET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.dst)
|
||||
is CmdSetMemberSlot -> Opcode.SET_MEMBER_SLOT to intArrayOf(cmd.recvSlot, cmd.fieldId, cmd.methodId, cmd.valueSlot)
|
||||
@ -627,6 +629,10 @@ object CmdDisassembler {
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_IOTA_INT ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_NEW_INT_CAP ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_FILL_INT_CAP ->
|
||||
listOf(OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT, OperandKind.SLOT)
|
||||
Opcode.LIST_LITERAL ->
|
||||
listOf(OperandKind.CONST, OperandKind.SLOT, OperandKind.COUNT, OperandKind.SLOT)
|
||||
Opcode.GET_MEMBER_SLOT ->
|
||||
|
||||
@ -3455,6 +3455,42 @@ class CmdListFillInt(
|
||||
}
|
||||
}
|
||||
|
||||
class CmdListFillIntCap(
|
||||
internal val sizeSlot: Int,
|
||||
internal val capacitySlot: Int,
|
||||
internal val callableSlot: Int,
|
||||
internal val dst: Int,
|
||||
) : Cmd() {
|
||||
override suspend fun perform(frame: CmdFrame) {
|
||||
val size = frame.getInt(sizeSlot).toInt()
|
||||
if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative")
|
||||
val capacity = frame.getInt(capacitySlot).toInt()
|
||||
val actualCapacity = maxOf(size, capacity)
|
||||
if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative")
|
||||
val callable = frame.storedSlotObj(callableSlot)
|
||||
val scope = frame.ensureScope()
|
||||
val result = ObjList(LongArray(actualCapacity), size)
|
||||
for (i in 0 until size) {
|
||||
val args = Arguments(ObjInt.of(i.toLong()))
|
||||
val value = if (callable is BytecodeLambdaCallable && callable.supportsImplicitIntFillFastPath()) {
|
||||
callable.invokeImplicitIntArgFast(scope, i.toLong()) ?: callable.invokeImplicitIntArg(scope, i.toLong())
|
||||
} else if (callable is BytecodeArgCallable) {
|
||||
callable.callWithArgsFast(scope, args) ?: run {
|
||||
val child = scope.createChildScope(scope.pos, args = args)
|
||||
(callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child)
|
||||
}
|
||||
} else {
|
||||
val child = scope.createChildScope(scope.pos, args = args)
|
||||
(callable as? BytecodeCallable)?.callOnFast(child) ?: callable.callOn(child)
|
||||
}
|
||||
val intValue = (value as? ObjInt)?.value ?: scope.raiseClassCastError("expected Int fill result")
|
||||
result.setIntAtFast(i, intValue)
|
||||
}
|
||||
frame.storeObjResult(dst, result)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
private fun decodeMemberId(id: Int): Pair<Int, Boolean> {
|
||||
return if (id <= -2) {
|
||||
Pair(-id - 2, true)
|
||||
@ -3859,6 +3895,22 @@ class CmdListNewInt(
|
||||
}
|
||||
}
|
||||
|
||||
class CmdListNewIntCap(
|
||||
internal val sizeSlot: Int,
|
||||
internal val capacitySlot: Int,
|
||||
internal val dst: Int,
|
||||
) : Cmd() {
|
||||
override suspend fun perform(frame: CmdFrame) {
|
||||
val size = frame.getInt(sizeSlot).toInt()
|
||||
if (size < 0) frame.ensureScope().raiseIllegalArgument("list size must be non-negative")
|
||||
val capacity = frame.getInt(capacitySlot).toInt()
|
||||
val actualCapacity = maxOf(size, capacity)
|
||||
if (actualCapacity < 0) frame.ensureScope().raiseIllegalArgument("list capacity must be non-negative")
|
||||
frame.storeObjResult(dst, ObjList(LongArray(actualCapacity), size))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
class CmdGetIndex(
|
||||
internal val targetSlot: Int,
|
||||
internal val indexSlot: Int,
|
||||
|
||||
@ -190,6 +190,8 @@ enum class Opcode(val code: Int) {
|
||||
GET_DYNAMIC_MEMBER(0xAC),
|
||||
SET_DYNAMIC_MEMBER(0xAD),
|
||||
CALL_DYNAMIC_MEMBER(0xAE),
|
||||
LIST_NEW_INT_CAP(0xAF),
|
||||
LIST_FILL_INT_CAP(0xB0),
|
||||
|
||||
RESOLVE_SCOPE_SLOT(0xB1),
|
||||
LOAD_OBJ_ADDR(0xB2),
|
||||
|
||||
@ -165,9 +165,9 @@ open class ObjList(initialList: MutableList<Obj> = mutableListOf()) : Obj() {
|
||||
}
|
||||
}
|
||||
|
||||
internal constructor(intValues: LongArray) : this(mutableListOf()) {
|
||||
internal constructor(intValues: LongArray, size: Int = intValues.size) : this(mutableListOf()) {
|
||||
primitiveIntList = intValues
|
||||
primitiveIntSize = intValues.size
|
||||
primitiveIntSize = size
|
||||
boxedList = null
|
||||
}
|
||||
|
||||
@ -519,8 +519,8 @@ open class ObjList(initialList: MutableList<Obj> = mutableListOf()) : Obj() {
|
||||
doc = "Append one or more elements to the end of this list.",
|
||||
moduleName = "lyng.stdlib"
|
||||
) {
|
||||
val l = thisAs<ObjList>().list
|
||||
for (a in args) l.add(a)
|
||||
val l = thisAs<ObjList>()
|
||||
for (a in args) l.appendFast(a)
|
||||
ObjVoid
|
||||
}
|
||||
addFnDoc(
|
||||
|
||||
@ -213,6 +213,24 @@ class BytecodeRecentOpsTest {
|
||||
assertEquals(4, scope.eval("calc()").toInt())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun listFillWithCapacityUsesPrimitiveCapacityBytecode() = runTest {
|
||||
val scope = Script.newScope()
|
||||
scope.eval(
|
||||
"""
|
||||
fun calc() {
|
||||
val xs = List.fill(5, 12) { it * 2 }
|
||||
xs.add(99)
|
||||
xs[0] + xs[4] + xs[5]
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
val disasm = scope.disassembleSymbol("calc")
|
||||
assertTrue(disasm.contains("LIST_NEW_INT_CAP"), disasm)
|
||||
assertFalse(disasm.contains("LIST_FILL_INT_CAP"), disasm)
|
||||
assertEquals(107, scope.eval("calc()").toInt())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun directLambdaLiteralCallUsesInlineBytecode() = runTest {
|
||||
val scope = Script.newScope()
|
||||
|
||||
@ -22,6 +22,7 @@ import kotlinx.coroutines.test.runTest
|
||||
import net.sergeych.lyng.obj.toInt
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.time.Duration.Companion.milliseconds
|
||||
import kotlin.time.TimeSource
|
||||
|
||||
class OptTest {
|
||||
@ -59,4 +60,25 @@ class OptTest {
|
||||
}
|
||||
println("add-to-array best=${bestMs}ms avg=${totalMs / passes}ms after warmup")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testAddToArray2() = runTest {
|
||||
eval(
|
||||
$$"""
|
||||
import lyng.time
|
||||
val n = 700_000
|
||||
fun tm<T>(block: ()->T): T {
|
||||
val t = Instant()
|
||||
block().also {
|
||||
println("tm: ${Instant() - t}")
|
||||
}
|
||||
}
|
||||
val x = tm { List.fill(n) { it * 10 + 1 } }
|
||||
val y = tm { List.fill(n, n + 10) { it * 10 + 1 } }
|
||||
tm { x.add(-1) }
|
||||
tm { y.add(-2) }
|
||||
""".trimIndent()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -13,6 +13,11 @@ Current focus
|
||||
Key recent changes
|
||||
- Updated AI helper docs to reflect static typing, type expressions, and compile-time-only name resolution.
|
||||
- Added stdlib random API: `Random` and deterministic `SeededRandom` with `nextInt`, `nextFloat`, and generic `next(range)`.
|
||||
- Generalized primitive list optimization for compiler-generated `List.fill`:
|
||||
- `List.fill(size) { intExpr }` and `List.fill(size, capacity) { intExpr }` now both have bytecode fast paths.
|
||||
- Added `LIST_NEW_INT_CAP` / `LIST_FILL_INT_CAP` for the 3-arg capacity-preserving form.
|
||||
- Fixed `ObjList.add(...)` to preserve primitive-int backing storage instead of forcing boxing through `.list`.
|
||||
- `OptTest.testAddToArray2` no longer shows the old 10x anomaly for `List.fill(n, n + 10)` or append-to-extended-list.
|
||||
|
||||
Known failing tests
|
||||
- None in :lynglib:jvmTest after Random/SeededRandom integration.
|
||||
|
||||
@ -22,18 +22,25 @@ Candidates (not started)
|
||||
6) Box/unbox audit (done)
|
||||
- Unbox ObjInt/ObjReal in assign-op when target is INT/REAL to avoid boxing + obj ops.
|
||||
- MixedCompareBenchmarkTest: 240 ms -> 234 ms.
|
||||
7) Mixed compare coverage
|
||||
7) Primitive list fill with capacity (done)
|
||||
- Extended the compiler/runtime fast path from `List.fill(size) { intExpr }` to `List.fill(size, capacity) { intExpr }`.
|
||||
- Added `LIST_NEW_INT_CAP` and `LIST_FILL_INT_CAP` so the 3-arg form keeps primitive-int storage instead of falling back to generic stdlib code.
|
||||
- `OptTest.testAddToArray2`: `List.fill(n, n + 10) { ... }` dropped from the prior anomaly (~10x slower than 2-arg fill) to the same range as `List.fill(n) { ... }`, roughly `56-67 ms` vs `46-75 ms` after warmup.
|
||||
8) Primitive list append preservation (done)
|
||||
- Fixed `ObjList.add(...)` to append through the primitive-aware fast path instead of forcing `.list` and boxing the backing storage.
|
||||
- `OptTest.testAddToArray2`: appending to the pre-extended list dropped from the prior anomaly (~10x slower) to sub-millisecond / low-millisecond timings (`~0.05-0.16 ms` for the extended list path, `~1.6-4.3 ms` for the baseline path, depending on warmup).
|
||||
9) Mixed compare coverage
|
||||
- Emit CMP_*_REAL when one operand is known ObjReal in more expression forms (not just assign-op).
|
||||
- Verify with disassembly that fast cmp opcodes are emitted.
|
||||
8) Range-loop invariant hoist
|
||||
10) Range-loop invariant hoist
|
||||
- Cache range end/step into temps once per loop; avoid repeated slot reads/boxing in body.
|
||||
- Confirm no extra CONST_OBJ in hot path.
|
||||
9) Boxing elision pass
|
||||
11) Boxing elision pass
|
||||
- Remove redundant BOX_OBJ when value feeds only primitive ops afterward (local liveness).
|
||||
- Ensure no impact on closures/escaping values.
|
||||
10) Closed-type fast paths expansion
|
||||
12) Closed-type fast paths expansion
|
||||
- Apply closed-type trust for ObjBool/ObjInt/ObjReal/ObjString in ternaries and conditional chains.
|
||||
- Guard with exact non-null temp/slot checks only.
|
||||
11) VM hot op micro-optimizations
|
||||
13) VM hot op micro-optimizations
|
||||
- Reduce frame reads/writes in ADD_INT, MUL_REAL, CMP_*_INT/REAL when operands are temps.
|
||||
- Compare against baseline; revert if regression after 10-run median.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user