Stabilize pi benchmark optimizations for release
This commit is contained in:
parent
a7ab0d3905
commit
064b927b1a
72
examples/pi-bench.lyng
Normal file
72
examples/pi-bench.lyng
Normal file
@ -0,0 +1,72 @@
|
||||
import lyng.time
|
||||
|
||||
val WORK_SIZE = 200
|
||||
val TASK_COUNT = 10
|
||||
|
||||
fn piSpigot(iThread: Int, n: Int) {
|
||||
var pi = []
|
||||
val boxes = n * 10 / 3
|
||||
var reminders = List.fill(boxes) { 2 }
|
||||
var heldDigits = 0
|
||||
for (i in 0..n) {
|
||||
var carriedOver = 0
|
||||
var sum = 0
|
||||
for (k in 1..boxes) {
|
||||
val j = boxes - k
|
||||
val denom = j * 2 + 1
|
||||
reminders[j] *= 10
|
||||
sum = reminders[j] + carriedOver
|
||||
val quotient = sum / denom
|
||||
reminders[j] = sum % denom
|
||||
carriedOver = quotient * j
|
||||
}
|
||||
reminders[0] = sum % 10
|
||||
var q = sum / 10
|
||||
if (q == 9) {
|
||||
++heldDigits
|
||||
} else if (q == 10) {
|
||||
q = 0
|
||||
for (k in 1..heldDigits) {
|
||||
var replaced = pi[i - k]
|
||||
if (replaced == 9) {
|
||||
replaced = 0
|
||||
} else {
|
||||
++replaced
|
||||
}
|
||||
pi[i - k] = replaced
|
||||
}
|
||||
heldDigits = 1
|
||||
} else {
|
||||
heldDigits = 1
|
||||
}
|
||||
pi.add(q)
|
||||
}
|
||||
|
||||
var s = ""
|
||||
for (i in (n - 8)..<n) {
|
||||
s += pi[i]
|
||||
}
|
||||
|
||||
println(iThread, " - done: ", s)
|
||||
}
|
||||
|
||||
|
||||
var counter = 0
|
||||
|
||||
|
||||
for( repeat in 1..30) {
|
||||
val t0 = Instant()
|
||||
(1..TASK_COUNT).map {
|
||||
val counterState = counter
|
||||
val t = launch {
|
||||
piSpigot(counterState, WORK_SIZE)
|
||||
}
|
||||
++counter
|
||||
t
|
||||
}.forEach { (it as Deferred).await() }
|
||||
|
||||
val dt = Instant() - t0
|
||||
|
||||
println("$repeat: all done, dt = ", dt)
|
||||
delay(1000)
|
||||
}
|
||||
49
examples/pi-test.lyng
Normal file
49
examples/pi-test.lyng
Normal file
@ -0,0 +1,49 @@
|
||||
fn piSpigot(n) {
|
||||
var pi = []
|
||||
val boxes = n * 10 / 3
|
||||
var reminders = []
|
||||
for (i in 0..<boxes) {
|
||||
reminders.add(2)
|
||||
}
|
||||
var heldDigits = 0
|
||||
for (i in 0..n) {
|
||||
var carriedOver = 0
|
||||
var sum = 0
|
||||
for (k in 1..boxes) {
|
||||
val j = boxes - k
|
||||
val denom = j * 2 + 1
|
||||
reminders[j] *= 10
|
||||
sum = reminders[j] + carriedOver
|
||||
// Keep this integer-only. Real coercion here is much slower in the hot loop.
|
||||
val quotient = sum / denom
|
||||
reminders[j] = sum % denom
|
||||
carriedOver = quotient * j
|
||||
}
|
||||
reminders[0] = sum % 10
|
||||
var q = sum / 10
|
||||
if (q == 9) {
|
||||
++heldDigits
|
||||
} else if (q == 10) {
|
||||
q = 0
|
||||
for (k in 1..heldDigits) {
|
||||
var replaced = pi[i - k]
|
||||
if (replaced == 9) {
|
||||
replaced = 0
|
||||
} else {
|
||||
++replaced
|
||||
}
|
||||
pi[i - k] = replaced
|
||||
}
|
||||
heldDigits = 1
|
||||
} else {
|
||||
heldDigits = 1
|
||||
}
|
||||
pi.add(q)
|
||||
}
|
||||
|
||||
var suffix = ""
|
||||
for (i in (n - 8)..<n) {
|
||||
suffix += pi[i]
|
||||
}
|
||||
suffix
|
||||
}
|
||||
@ -1783,7 +1783,11 @@ class BytecodeCompiler(
|
||||
return when (slotTypes[slot]) {
|
||||
SlotType.INT -> NumericKind.INT
|
||||
SlotType.REAL -> NumericKind.REAL
|
||||
else -> NumericKind.UNKNOWN
|
||||
else -> when (slotObjClass[slot]) {
|
||||
ObjInt.type -> NumericKind.INT
|
||||
ObjReal.type -> NumericKind.REAL
|
||||
else -> NumericKind.UNKNOWN
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1800,7 +1804,21 @@ class BytecodeCompiler(
|
||||
is ConstRef -> numericKindFromConst(ref.constValue)
|
||||
is LocalVarRef -> resolveDirectNameSlot(ref.name)?.let { numericKindFromSlot(it.slot) } ?: NumericKind.UNKNOWN
|
||||
is FastLocalVarRef -> resolveDirectNameSlot(ref.name)?.let { numericKindFromSlot(it.slot) } ?: NumericKind.UNKNOWN
|
||||
is LocalSlotRef -> resolveSlot(ref)?.let { numericKindFromSlot(it) } ?: NumericKind.UNKNOWN
|
||||
is LocalSlotRef -> resolveLocalSlotByRefOrName(ref)?.let { numericKindFromSlot(it) } ?: NumericKind.UNKNOWN
|
||||
is IndexRef -> {
|
||||
val receiver = when (val target = ref.targetRef) {
|
||||
is LocalSlotRef -> resolveLocalSlotByRefOrName(target)
|
||||
is LocalVarRef -> resolveDirectNameSlot(target.name)?.slot
|
||||
is FastLocalVarRef -> resolveDirectNameSlot(target.name)?.slot
|
||||
else -> null
|
||||
}
|
||||
val elementClass = receiver?.let { listElementClassBySlot[it] } ?: listElementClassFromReceiverRef(ref.targetRef)
|
||||
when (elementClass) {
|
||||
ObjInt.type -> NumericKind.INT
|
||||
ObjReal.type -> NumericKind.REAL
|
||||
else -> NumericKind.UNKNOWN
|
||||
}
|
||||
}
|
||||
is UnaryOpRef -> inferNumericKind(ref.a)
|
||||
is BinaryOpRef -> {
|
||||
val op = ref.op
|
||||
@ -2431,7 +2449,7 @@ class BytecodeCompiler(
|
||||
updateSlotType(slot, SlotType.OBJ)
|
||||
return value
|
||||
}
|
||||
val value = compileRef(assignValue(ref)) ?: return null
|
||||
var value = compileRef(assignValue(ref)) ?: return null
|
||||
if (isLoopVarRef(localTarget)) {
|
||||
emitLoopVarReassignError(localTarget.name, localTarget.pos())
|
||||
return value
|
||||
@ -2473,7 +2491,7 @@ class BytecodeCompiler(
|
||||
else -> null
|
||||
}
|
||||
if (nameTarget != null) {
|
||||
val value = compileRef(assignValue(ref)) ?: return null
|
||||
var value = compileRef(assignValue(ref)) ?: return null
|
||||
val resolved = resolveAssignableSlotByName(nameTarget) ?: return null
|
||||
val slot = resolved.first
|
||||
val isMutable = resolved.second
|
||||
@ -2698,6 +2716,7 @@ class BytecodeCompiler(
|
||||
if (!target.optionalRef) {
|
||||
val index = compileRefWithFallback(target.indexRef, null, Pos.builtIn) ?: return null
|
||||
builder.emit(Opcode.SET_INDEX, receiver.slot, index.slot, value.slot)
|
||||
noteListElementClassMutation(receiver.slot, value)
|
||||
} else {
|
||||
val nullSlot = allocSlot()
|
||||
builder.emit(Opcode.CONST_NULL, nullSlot)
|
||||
@ -2710,6 +2729,7 @@ class BytecodeCompiler(
|
||||
)
|
||||
val index = compileRefWithFallback(target.indexRef, null, Pos.builtIn) ?: return null
|
||||
builder.emit(Opcode.SET_INDEX, receiver.slot, index.slot, value.slot)
|
||||
noteListElementClassMutation(receiver.slot, value)
|
||||
builder.mark(endLabel)
|
||||
}
|
||||
return value
|
||||
@ -3026,9 +3046,32 @@ class BytecodeCompiler(
|
||||
val receiver = compileRefWithFallback(indexTarget.targetRef, null, Pos.builtIn) ?: return null
|
||||
val current = allocSlot()
|
||||
val result = allocSlot()
|
||||
val rhs = compileRef(ref.value) ?: return compileEvalRef(ref)
|
||||
var rhs = compileRef(ref.value) ?: return compileEvalRef(ref)
|
||||
val elementClass = listElementClassBySlot[receiver.slot] ?: listElementClassFromReceiverRef(indexTarget.targetRef)
|
||||
if (!indexTarget.optionalRef) {
|
||||
val index = compileRefWithFallback(indexTarget.indexRef, null, Pos.builtIn) ?: return null
|
||||
if (elementClass == ObjInt.type) {
|
||||
builder.emit(Opcode.GET_INDEX, receiver.slot, index.slot, current)
|
||||
val currentInt = allocSlot()
|
||||
builder.emit(Opcode.UNBOX_INT_OBJ, current, currentInt)
|
||||
updateSlotType(currentInt, SlotType.INT)
|
||||
if (rhs.type != SlotType.INT) {
|
||||
coerceToArithmeticInt(ref.value, rhs)?.let { rhs = it }
|
||||
}
|
||||
val typed = when (ref.op) {
|
||||
BinOp.PLUS -> compileAssignOpBinary(SlotType.INT, rhs, currentInt, Opcode.ADD_INT, Opcode.ADD_REAL, Opcode.ADD_OBJ)
|
||||
BinOp.MINUS -> compileAssignOpBinary(SlotType.INT, rhs, currentInt, Opcode.SUB_INT, Opcode.SUB_REAL, Opcode.SUB_OBJ)
|
||||
BinOp.STAR -> compileAssignOpBinary(SlotType.INT, rhs, currentInt, Opcode.MUL_INT, Opcode.MUL_REAL, Opcode.MUL_OBJ)
|
||||
BinOp.SLASH -> compileAssignOpBinary(SlotType.INT, rhs, currentInt, Opcode.DIV_INT, Opcode.DIV_REAL, Opcode.DIV_OBJ)
|
||||
BinOp.PERCENT -> compileAssignOpBinary(SlotType.INT, rhs, currentInt, Opcode.MOD_INT, null, Opcode.MOD_OBJ)
|
||||
else -> null
|
||||
}
|
||||
if (typed != null && typed.type == SlotType.INT) {
|
||||
builder.emit(Opcode.SET_INDEX, receiver.slot, index.slot, currentInt)
|
||||
noteListElementClassMutation(receiver.slot, typed)
|
||||
return CompiledValue(currentInt, SlotType.INT)
|
||||
}
|
||||
}
|
||||
builder.emit(Opcode.GET_INDEX, receiver.slot, index.slot, current)
|
||||
builder.emit(objOp, current, rhs.slot, result)
|
||||
builder.emit(Opcode.SET_INDEX, receiver.slot, index.slot, result)
|
||||
@ -3586,7 +3629,7 @@ class BytecodeCompiler(
|
||||
val elementClass = listElementClassBySlot[receiver.slot] ?: listElementClassFromReceiverRef(ref.targetRef)
|
||||
if (elementClass != null) {
|
||||
slotObjClass[dst] = elementClass
|
||||
if (elementClass == ObjString.type && elementClass.isClosed) {
|
||||
if (elementClass.isClosed) {
|
||||
stableObjSlots.add(dst)
|
||||
} else {
|
||||
stableObjSlots.remove(dst)
|
||||
@ -4649,6 +4692,9 @@ class BytecodeCompiler(
|
||||
val encodedCount = encodeCallArgCount(args) ?: return null
|
||||
setPos(callPos)
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, receiver.slot, encodedMethodId, args.base, encodedCount, dst)
|
||||
if (receiverClass == ObjList.type && ref.name == "add" && ref.args.size == 1 && !ref.args.first().isSplat) {
|
||||
noteListElementClassMutation(receiver.slot, CompiledValue(args.base, SlotType.OBJ))
|
||||
}
|
||||
return CompiledValue(dst, SlotType.OBJ)
|
||||
}
|
||||
val nullSlot = allocSlot()
|
||||
@ -4815,7 +4861,7 @@ class BytecodeCompiler(
|
||||
" receiver=$kind(${ref.name}) slot=$slot slotClass=$slotCls nameClass=$nameCls"
|
||||
}
|
||||
is LocalSlotRef -> {
|
||||
val slot = resolveSlot(ref)
|
||||
val slot = resolveLocalSlotByRefOrName(ref)
|
||||
val slotCls = slot?.let { slotObjClass[it]?.className }
|
||||
val nameCls = nameObjClass[ref.name]?.className
|
||||
val scopeId = refScopeId(ref)
|
||||
@ -4971,9 +5017,10 @@ class BytecodeCompiler(
|
||||
val specs = if (needPlan) ArrayList<BytecodeConst.CallArgSpec>(args.size) else null
|
||||
for ((index, arg) in args.withIndex()) {
|
||||
val compiled = compileArgValue(arg.value) ?: return null
|
||||
val objValue = ensureObjSlot(compiled)
|
||||
val dst = argSlots[index]
|
||||
if (compiled.slot != dst || compiled.type != SlotType.OBJ) {
|
||||
builder.emit(Opcode.BOX_OBJ, compiled.slot, dst)
|
||||
if (objValue.slot != dst) {
|
||||
emitMove(objValue, dst)
|
||||
}
|
||||
updateSlotType(dst, SlotType.OBJ)
|
||||
specs?.add(BytecodeConst.CallArgSpec(arg.name, arg.isSplat))
|
||||
@ -5833,7 +5880,8 @@ class BytecodeCompiler(
|
||||
emitMove(value, localSlot)
|
||||
}
|
||||
updateSlotType(localSlot, value.type)
|
||||
updateSlotObjClass(localSlot, stmt.initializer, stmt.initializerObjClass)
|
||||
slotObjClass[value.slot]?.let { slotObjClass[localSlot] = it }
|
||||
?: updateSlotObjClass(localSlot, stmt.initializer, stmt.initializerObjClass)
|
||||
updateListElementClassFromDecl(localSlot, scopeId, stmt.slotIndex)
|
||||
updateListElementClassFromInitializer(localSlot, stmt.initializer)
|
||||
updateNameObjClassFromSlot(stmt.name, localSlot)
|
||||
@ -5865,7 +5913,8 @@ class BytecodeCompiler(
|
||||
}
|
||||
updateSlotType(scopeSlot, value.type)
|
||||
updateNameObjClassFromSlot(stmt.name, scopeSlot)
|
||||
updateSlotObjClass(scopeSlot, stmt.initializer, stmt.initializerObjClass)
|
||||
slotObjClass[value.slot]?.let { slotObjClass[scopeSlot] = it }
|
||||
?: updateSlotObjClass(scopeSlot, stmt.initializer, stmt.initializerObjClass)
|
||||
updateListElementClassFromDecl(scopeSlot, scopeId, stmt.slotIndex)
|
||||
updateListElementClassFromInitializer(scopeSlot, stmt.initializer)
|
||||
val declId = builder.addConst(
|
||||
@ -5901,7 +5950,9 @@ class BytecodeCompiler(
|
||||
updateSlotTypeByName(stmt.name, value.type)
|
||||
}
|
||||
updateNameObjClassFromSlot(stmt.name, value.slot)
|
||||
updateSlotObjClass(value.slot, stmt.initializer, stmt.initializerObjClass)
|
||||
if (slotObjClass[value.slot] == null) {
|
||||
updateSlotObjClass(value.slot, stmt.initializer, stmt.initializerObjClass)
|
||||
}
|
||||
updateListElementClassFromDecl(value.slot, scopeId, stmt.slotIndex)
|
||||
updateListElementClassFromInitializer(value.slot, stmt.initializer)
|
||||
return value
|
||||
@ -5991,6 +6042,16 @@ class BytecodeCompiler(
|
||||
listElementClassBySlot[slot] = elementClass
|
||||
}
|
||||
|
||||
private fun noteListElementClassMutation(receiverSlot: Int, value: CompiledValue) {
|
||||
val newClass = elementClassFromValue(value) ?: return
|
||||
val current = listElementClassBySlot[receiverSlot]
|
||||
if (current == null || current == newClass) {
|
||||
listElementClassBySlot[receiverSlot] = newClass
|
||||
} else {
|
||||
listElementClassBySlot.remove(receiverSlot)
|
||||
}
|
||||
}
|
||||
|
||||
private fun updateNameObjClassFromSlot(name: String, slot: Int) {
|
||||
val cls = slotObjClass[slot] ?: return
|
||||
nameObjClass[name] = cls
|
||||
@ -6086,9 +6147,6 @@ class BytecodeCompiler(
|
||||
if (range == null && rangeRef == null) {
|
||||
rangeRef = extractRangeFromLocal(stmt.source)
|
||||
}
|
||||
if (rangeRef != null && !isConstIntRange(rangeRef)) {
|
||||
rangeRef = null
|
||||
}
|
||||
val typedRangeLocal = if (range == null && rangeRef == null) extractTypedRangeLocal(stmt.source) else null
|
||||
val loopSlotPlan = stmt.loopSlotPlan
|
||||
val loopSlotIndex = stmt.loopSlotPlan[stmt.loopVarName]
|
||||
@ -6129,120 +6187,16 @@ class BytecodeCompiler(
|
||||
val breakFlagSlot = allocSlot()
|
||||
if (range == null && rangeRef == null && typedRangeLocal == null) {
|
||||
val sourceValue = compileStatementValueOrFallback(stmt.source) ?: return null
|
||||
val sourceObj = ensureObjSlot(sourceValue)
|
||||
val typeId = builder.addConst(BytecodeConst.ObjRef(ObjIterable))
|
||||
val typeSlot = allocSlot()
|
||||
builder.emit(Opcode.CONST_OBJ, typeId, typeSlot)
|
||||
builder.emit(Opcode.ASSERT_IS, sourceObj.slot, typeSlot)
|
||||
|
||||
val iterableMethods = ObjIterable.instanceMethodIdMap(includeAbstract = true)
|
||||
val iteratorMethodId = iterableMethods["iterator"]
|
||||
if (iteratorMethodId == null) {
|
||||
throw BytecodeCompileException("Missing member id for Iterable.iterator", stmt.pos)
|
||||
}
|
||||
val iteratorMethods = ObjIterator.instanceMethodIdMap(includeAbstract = true)
|
||||
val hasNextMethodId = iteratorMethods["hasNext"]
|
||||
if (hasNextMethodId == null) {
|
||||
throw BytecodeCompileException("Missing member id for Iterator.hasNext", stmt.pos)
|
||||
}
|
||||
val nextMethodId = iteratorMethods["next"]
|
||||
if (nextMethodId == null) {
|
||||
throw BytecodeCompileException("Missing member id for Iterator.next", stmt.pos)
|
||||
}
|
||||
|
||||
val iterSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, sourceObj.slot, iteratorMethodId, 0, 0, iterSlot)
|
||||
builder.emit(Opcode.ITER_PUSH, iterSlot)
|
||||
|
||||
if (needsBreakFlag) {
|
||||
val falseId = builder.addConst(BytecodeConst.Bool(false))
|
||||
builder.emit(Opcode.CONST_BOOL, falseId, breakFlagSlot)
|
||||
}
|
||||
val resultSlot = if (wantResult) {
|
||||
val slot = allocSlot()
|
||||
val voidId = builder.addConst(BytecodeConst.ObjRef(ObjVoid))
|
||||
builder.emit(Opcode.CONST_OBJ, voidId, slot)
|
||||
slot
|
||||
} else {
|
||||
null
|
||||
}
|
||||
|
||||
val loopLabel = builder.label()
|
||||
val continueLabel = builder.label()
|
||||
val endLabel = builder.label()
|
||||
builder.mark(loopLabel)
|
||||
|
||||
val hasNextSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, iterSlot, hasNextMethodId, 0, 0, hasNextSlot)
|
||||
val condSlot = allocSlot()
|
||||
builder.emit(Opcode.OBJ_TO_BOOL, hasNextSlot, condSlot)
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_FALSE,
|
||||
listOf(CmdBuilder.Operand.IntVal(condSlot), CmdBuilder.Operand.LabelRef(endLabel))
|
||||
return emitIterableForIn(
|
||||
stmt = stmt,
|
||||
sourceValue = sourceValue,
|
||||
wantResult = wantResult,
|
||||
loopSlotId = loopSlotId,
|
||||
breakFlagSlot = breakFlagSlot,
|
||||
needsBreakFlag = needsBreakFlag,
|
||||
hasRealWiden = hasRealWiden,
|
||||
realWidenSlots = realWidenSlots,
|
||||
)
|
||||
|
||||
val nextSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, iterSlot, nextMethodId, 0, 0, nextSlot)
|
||||
val nextObj = ensureObjSlot(CompiledValue(nextSlot, SlotType.UNKNOWN))
|
||||
emitMove(CompiledValue(nextObj.slot, SlotType.OBJ), loopSlotId)
|
||||
updateSlotType(loopSlotId, SlotType.OBJ)
|
||||
updateSlotTypeByName(stmt.loopVarName, SlotType.OBJ)
|
||||
|
||||
loopStack.addLast(
|
||||
LoopContext(
|
||||
stmt.label,
|
||||
endLabel,
|
||||
continueLabel,
|
||||
breakFlagSlot,
|
||||
resultSlot,
|
||||
hasIterator = true
|
||||
)
|
||||
)
|
||||
val bodyValue = compileLoopBody(stmt.body, wantResult) ?: return null
|
||||
if (hasRealWiden) {
|
||||
applySlotTypes(realWidenSlots, SlotType.UNKNOWN)
|
||||
}
|
||||
loopStack.removeLast()
|
||||
if (wantResult) {
|
||||
val bodyObj = ensureObjSlot(bodyValue)
|
||||
builder.emit(Opcode.MOVE_OBJ, bodyObj.slot, resultSlot!!)
|
||||
}
|
||||
builder.mark(continueLabel)
|
||||
if (hasRealWiden) {
|
||||
emitLoopRealCoercions(realWidenSlots)
|
||||
}
|
||||
builder.emit(Opcode.JMP, listOf(CmdBuilder.Operand.LabelRef(loopLabel)))
|
||||
|
||||
builder.mark(endLabel)
|
||||
if (needsBreakFlag) {
|
||||
val afterPop = builder.label()
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_TRUE,
|
||||
listOf(CmdBuilder.Operand.IntVal(breakFlagSlot), CmdBuilder.Operand.LabelRef(afterPop))
|
||||
)
|
||||
builder.emit(Opcode.ITER_POP)
|
||||
builder.mark(afterPop)
|
||||
} else {
|
||||
builder.emit(Opcode.ITER_POP)
|
||||
}
|
||||
if (stmt.elseStatement != null) {
|
||||
val afterElse = if (needsBreakFlag) builder.label() else null
|
||||
if (needsBreakFlag) {
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_TRUE,
|
||||
listOf(CmdBuilder.Operand.IntVal(breakFlagSlot), CmdBuilder.Operand.LabelRef(afterElse!!))
|
||||
)
|
||||
}
|
||||
val elseValue = compileStatementValueOrFallback(stmt.elseStatement, wantResult) ?: return null
|
||||
if (wantResult) {
|
||||
val elseObj = ensureObjSlot(elseValue)
|
||||
builder.emit(Opcode.MOVE_OBJ, elseObj.slot, resultSlot!!)
|
||||
}
|
||||
if (needsBreakFlag) {
|
||||
builder.mark(afterElse!!)
|
||||
}
|
||||
}
|
||||
return resultSlot ?: breakFlagSlot
|
||||
}
|
||||
|
||||
val iSlot = loopSlotId
|
||||
@ -6261,9 +6215,23 @@ class BytecodeCompiler(
|
||||
if (rangeRef != null) {
|
||||
val left = rangeRef.left ?: return null
|
||||
val right = rangeRef.right ?: return null
|
||||
val startValue = compileRef(left) ?: return null
|
||||
val endValue = compileRef(right) ?: return null
|
||||
if (startValue.type != SlotType.INT || endValue.type != SlotType.INT) return null
|
||||
val startCompiled = compileRef(left) ?: return null
|
||||
val endCompiled = compileRef(right) ?: return null
|
||||
val startValue = coerceToLoopInt(startCompiled)
|
||||
val endValue = coerceToLoopInt(endCompiled)
|
||||
if (startValue == null || endValue == null) {
|
||||
val rangeValue = emitRangeObject(startCompiled, endCompiled, rangeRef)
|
||||
return emitIterableForIn(
|
||||
stmt = stmt,
|
||||
sourceValue = rangeValue,
|
||||
wantResult = wantResult,
|
||||
loopSlotId = loopSlotId,
|
||||
breakFlagSlot = breakFlagSlot,
|
||||
needsBreakFlag = needsBreakFlag,
|
||||
hasRealWiden = hasRealWiden,
|
||||
realWidenSlots = realWidenSlots,
|
||||
)
|
||||
}
|
||||
val descendingId = builder.addConst(BytecodeConst.Bool(rangeRef.isDescending))
|
||||
emitMove(startValue, iSlot)
|
||||
emitMove(endValue, endSlot)
|
||||
@ -7308,7 +7276,8 @@ class BytecodeCompiler(
|
||||
is LocalSlotRef -> {
|
||||
val ownerScopeId = ref.captureOwnerScopeId ?: ref.scopeId
|
||||
val ownerSlot = ref.captureOwnerSlot ?: ref.slot
|
||||
slotTypeByScopeId[ownerScopeId]?.get(ownerSlot)
|
||||
resolveLocalSlotByRefOrName(ref)?.let { slotObjClass[it] }
|
||||
?: slotTypeByScopeId[ownerScopeId]?.get(ownerSlot)
|
||||
?: slotInitClassByKey[ScopeSlotKey(ownerScopeId, ownerSlot)]
|
||||
?: nameObjClass[ref.name]
|
||||
?: resolveTypeNameClass(ref.name)
|
||||
@ -7721,6 +7690,11 @@ class BytecodeCompiler(
|
||||
return resolved
|
||||
}
|
||||
|
||||
private fun resolveLocalSlotByRefOrName(ref: LocalSlotRef): Int? {
|
||||
return resolveSlot(ref)
|
||||
?: ref.name.takeIf { it.isNotEmpty() }?.let { name -> resolveDirectNameSlot(name)?.slot }
|
||||
}
|
||||
|
||||
private fun resolveCapturedOwnerScopeSlot(ref: LocalSlotRef): Int? {
|
||||
val ownerScopeId = ref.captureOwnerScopeId ?: return null
|
||||
val ownerSlot = ref.captureOwnerSlot ?: return null
|
||||
@ -8698,10 +8672,237 @@ class BytecodeCompiler(
|
||||
return if (ref.step != null) null else ref
|
||||
}
|
||||
|
||||
private fun isConstIntRange(ref: RangeRef): Boolean {
|
||||
val left = ref.left as? ConstRef ?: return false
|
||||
val right = ref.right as? ConstRef ?: return false
|
||||
return left.constValue is ObjInt && right.constValue is ObjInt
|
||||
private fun emitIterableForIn(
|
||||
stmt: net.sergeych.lyng.ForInStatement,
|
||||
sourceValue: CompiledValue,
|
||||
wantResult: Boolean,
|
||||
loopSlotId: Int,
|
||||
breakFlagSlot: Int,
|
||||
needsBreakFlag: Boolean,
|
||||
hasRealWiden: Boolean,
|
||||
realWidenSlots: Set<Int>,
|
||||
): Int? {
|
||||
val sourceObj = ensureObjSlot(sourceValue)
|
||||
val typeId = builder.addConst(BytecodeConst.ObjRef(ObjIterable))
|
||||
val typeSlot = allocSlot()
|
||||
builder.emit(Opcode.CONST_OBJ, typeId, typeSlot)
|
||||
builder.emit(Opcode.ASSERT_IS, sourceObj.slot, typeSlot)
|
||||
|
||||
val iterableMethods = ObjIterable.instanceMethodIdMap(includeAbstract = true)
|
||||
val iteratorMethodId = iterableMethods["iterator"]
|
||||
?: throw BytecodeCompileException("Missing member id for Iterable.iterator", stmt.pos)
|
||||
val iteratorMethods = ObjIterator.instanceMethodIdMap(includeAbstract = true)
|
||||
val hasNextMethodId = iteratorMethods["hasNext"]
|
||||
?: throw BytecodeCompileException("Missing member id for Iterator.hasNext", stmt.pos)
|
||||
val nextMethodId = iteratorMethods["next"]
|
||||
?: throw BytecodeCompileException("Missing member id for Iterator.next", stmt.pos)
|
||||
|
||||
val iterSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, sourceObj.slot, iteratorMethodId, 0, 0, iterSlot)
|
||||
builder.emit(Opcode.ITER_PUSH, iterSlot)
|
||||
|
||||
if (needsBreakFlag) {
|
||||
val falseId = builder.addConst(BytecodeConst.Bool(false))
|
||||
builder.emit(Opcode.CONST_BOOL, falseId, breakFlagSlot)
|
||||
}
|
||||
val resultSlot = if (wantResult) {
|
||||
val slot = allocSlot()
|
||||
val voidId = builder.addConst(BytecodeConst.ObjRef(ObjVoid))
|
||||
builder.emit(Opcode.CONST_OBJ, voidId, slot)
|
||||
slot
|
||||
} else {
|
||||
null
|
||||
}
|
||||
|
||||
val loopLabel = builder.label()
|
||||
val continueLabel = builder.label()
|
||||
val endLabel = builder.label()
|
||||
builder.mark(loopLabel)
|
||||
|
||||
val hasNextSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, iterSlot, hasNextMethodId, 0, 0, hasNextSlot)
|
||||
val condSlot = allocSlot()
|
||||
builder.emit(Opcode.OBJ_TO_BOOL, hasNextSlot, condSlot)
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_FALSE,
|
||||
listOf(CmdBuilder.Operand.IntVal(condSlot), CmdBuilder.Operand.LabelRef(endLabel))
|
||||
)
|
||||
|
||||
val nextSlot = allocSlot()
|
||||
builder.emit(Opcode.CALL_MEMBER_SLOT, iterSlot, nextMethodId, 0, 0, nextSlot)
|
||||
val nextObj = ensureObjSlot(CompiledValue(nextSlot, SlotType.UNKNOWN))
|
||||
emitMove(CompiledValue(nextObj.slot, SlotType.OBJ), loopSlotId)
|
||||
updateSlotType(loopSlotId, SlotType.OBJ)
|
||||
updateSlotTypeByName(stmt.loopVarName, SlotType.OBJ)
|
||||
|
||||
loopStack.addLast(
|
||||
LoopContext(
|
||||
stmt.label,
|
||||
endLabel,
|
||||
continueLabel,
|
||||
breakFlagSlot,
|
||||
resultSlot,
|
||||
hasIterator = true
|
||||
)
|
||||
)
|
||||
val bodyValue = compileLoopBody(stmt.body, wantResult) ?: return null
|
||||
if (hasRealWiden) {
|
||||
applySlotTypes(realWidenSlots, SlotType.UNKNOWN)
|
||||
}
|
||||
loopStack.removeLast()
|
||||
if (wantResult) {
|
||||
val bodyObj = ensureObjSlot(bodyValue)
|
||||
builder.emit(Opcode.MOVE_OBJ, bodyObj.slot, resultSlot!!)
|
||||
}
|
||||
builder.mark(continueLabel)
|
||||
if (hasRealWiden) {
|
||||
emitLoopRealCoercions(realWidenSlots)
|
||||
}
|
||||
builder.emit(Opcode.JMP, listOf(CmdBuilder.Operand.LabelRef(loopLabel)))
|
||||
|
||||
builder.mark(endLabel)
|
||||
if (needsBreakFlag) {
|
||||
val afterPop = builder.label()
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_TRUE,
|
||||
listOf(CmdBuilder.Operand.IntVal(breakFlagSlot), CmdBuilder.Operand.LabelRef(afterPop))
|
||||
)
|
||||
builder.emit(Opcode.ITER_POP)
|
||||
builder.mark(afterPop)
|
||||
} else {
|
||||
builder.emit(Opcode.ITER_POP)
|
||||
}
|
||||
if (stmt.elseStatement != null) {
|
||||
val afterElse = if (needsBreakFlag) builder.label() else null
|
||||
if (needsBreakFlag) {
|
||||
builder.emit(
|
||||
Opcode.JMP_IF_TRUE,
|
||||
listOf(CmdBuilder.Operand.IntVal(breakFlagSlot), CmdBuilder.Operand.LabelRef(afterElse!!))
|
||||
)
|
||||
}
|
||||
val elseValue = compileStatementValueOrFallback(stmt.elseStatement, wantResult) ?: return null
|
||||
if (wantResult) {
|
||||
val elseObj = ensureObjSlot(elseValue)
|
||||
builder.emit(Opcode.MOVE_OBJ, elseObj.slot, resultSlot!!)
|
||||
}
|
||||
if (needsBreakFlag) {
|
||||
builder.mark(afterElse!!)
|
||||
}
|
||||
}
|
||||
return resultSlot ?: breakFlagSlot
|
||||
}
|
||||
|
||||
private fun emitRangeObject(startValue: CompiledValue, endValue: CompiledValue, ref: RangeRef): CompiledValue {
|
||||
val startObj = ensureObjSlot(startValue)
|
||||
val endObj = ensureObjSlot(endValue)
|
||||
val inclusiveSlot = allocSlot()
|
||||
val inclusiveId = builder.addConst(BytecodeConst.Bool(ref.isEndInclusive))
|
||||
builder.emit(Opcode.CONST_BOOL, inclusiveId, inclusiveSlot)
|
||||
val descendingSlot = allocSlot()
|
||||
val descendingId = builder.addConst(BytecodeConst.Bool(ref.isDescending))
|
||||
builder.emit(Opcode.CONST_BOOL, descendingId, descendingSlot)
|
||||
val stepSlot = allocSlot()
|
||||
builder.emit(Opcode.CONST_NULL, stepSlot)
|
||||
updateSlotType(stepSlot, SlotType.OBJ)
|
||||
val dst = allocSlot()
|
||||
builder.emit(Opcode.MAKE_RANGE, startObj.slot, endObj.slot, inclusiveSlot, descendingSlot, stepSlot, dst)
|
||||
updateSlotType(dst, SlotType.OBJ)
|
||||
slotObjClass[dst] = ObjRange.type
|
||||
return CompiledValue(dst, SlotType.OBJ)
|
||||
}
|
||||
|
||||
private fun isDynamicIntRangeCandidate(ref: RangeRef): Boolean {
|
||||
val left = ref.left ?: return false
|
||||
val right = ref.right ?: return false
|
||||
return isIntLikeRef(left) && isIntLikeRef(right)
|
||||
}
|
||||
|
||||
private fun isIntLikeRef(ref: ObjRef): Boolean {
|
||||
if (inferNumericKind(ref) == NumericKind.INT) {
|
||||
return true
|
||||
}
|
||||
return when (ref) {
|
||||
is ConstRef -> ref.constValue is ObjInt
|
||||
is LocalSlotRef,
|
||||
is LocalVarRef,
|
||||
is FastLocalVarRef,
|
||||
is BoundLocalVarRef,
|
||||
is CallRef,
|
||||
is MethodCallRef,
|
||||
is FieldRef,
|
||||
is CastRef,
|
||||
is StatementRef -> resolveReceiverClass(ref) == ObjInt.type
|
||||
is ThisMethodSlotCallRef,
|
||||
is ImplicitThisMethodCallRef,
|
||||
is ThisFieldSlotRef,
|
||||
is ImplicitThisMemberRef -> resolveReceiverClassForScopeCollection(ref) == ObjInt.type
|
||||
is UnaryOpRef -> ref.op == UnaryOp.NEGATE && isIntLikeRef(unaryOperand(ref))
|
||||
is BinaryOpRef -> when (binaryOp(ref)) {
|
||||
BinOp.PLUS,
|
||||
BinOp.MINUS,
|
||||
BinOp.STAR,
|
||||
BinOp.SLASH,
|
||||
BinOp.PERCENT,
|
||||
BinOp.BAND,
|
||||
BinOp.BXOR,
|
||||
BinOp.BOR,
|
||||
BinOp.SHL,
|
||||
BinOp.SHR -> isIntLikeRef(binaryLeft(ref)) && isIntLikeRef(binaryRight(ref))
|
||||
else -> false
|
||||
}
|
||||
else -> false
|
||||
}
|
||||
}
|
||||
|
||||
private fun coerceToLoopInt(value: CompiledValue): CompiledValue? {
|
||||
return when (value.type) {
|
||||
SlotType.INT -> value
|
||||
SlotType.OBJ -> {
|
||||
val isExactInt = isExactNonNullSlotClassOrTemp(value.slot, ObjInt.type)
|
||||
val isStableIntObj = slotObjClass[value.slot] == ObjInt.type && isStablePrimitiveSourceSlot(value.slot)
|
||||
if (!isExactInt && !isStableIntObj && !isStablePrimitiveSourceSlot(value.slot)) return null
|
||||
val objSlot = if (isExactInt || isStableIntObj) {
|
||||
value.slot
|
||||
} else {
|
||||
val boxed = allocSlot()
|
||||
builder.emit(Opcode.BOX_OBJ, value.slot, boxed)
|
||||
updateSlotType(boxed, SlotType.OBJ)
|
||||
emitAssertObjSlotIsInt(boxed)
|
||||
}
|
||||
val intSlot = allocSlot()
|
||||
builder.emit(Opcode.UNBOX_INT_OBJ, objSlot, intSlot)
|
||||
updateSlotType(intSlot, SlotType.INT)
|
||||
CompiledValue(intSlot, SlotType.INT)
|
||||
}
|
||||
SlotType.UNKNOWN -> {
|
||||
if (!isStablePrimitiveSourceSlot(value.slot)) return null
|
||||
val boxed = allocSlot()
|
||||
builder.emit(Opcode.BOX_OBJ, value.slot, boxed)
|
||||
updateSlotType(boxed, SlotType.OBJ)
|
||||
val checked = emitAssertObjSlotIsInt(boxed)
|
||||
val intSlot = allocSlot()
|
||||
builder.emit(Opcode.UNBOX_INT_OBJ, checked, intSlot)
|
||||
updateSlotType(intSlot, SlotType.INT)
|
||||
CompiledValue(intSlot, SlotType.INT)
|
||||
}
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
|
||||
private fun coerceToArithmeticInt(ref: ObjRef, value: CompiledValue): CompiledValue? {
|
||||
if (value.type == SlotType.INT) return value
|
||||
val refSuggestsInt = inferNumericKind(ref) == NumericKind.INT
|
||||
val stableNonTemp = !isTempSlot(value.slot) && isStablePrimitiveSourceSlot(value.slot)
|
||||
if (!refSuggestsInt && !stableNonTemp) return null
|
||||
return coerceToLoopInt(value)
|
||||
}
|
||||
|
||||
private fun emitAssertObjSlotIsInt(slot: Int): Int {
|
||||
val typeId = builder.addConst(BytecodeConst.ObjRef(ObjInt.type))
|
||||
val typeSlot = allocSlot()
|
||||
builder.emit(Opcode.CONST_OBJ, typeId, typeSlot)
|
||||
builder.emit(Opcode.ASSERT_IS, slot, typeSlot)
|
||||
return slot
|
||||
}
|
||||
|
||||
private fun extractDeclaredRange(stmt: Statement?): RangeRef? {
|
||||
|
||||
@ -314,8 +314,13 @@ class CmdUnboxIntObj(internal val src: Int, internal val dst: Int) : Cmd() {
|
||||
class CmdUnboxIntObjLocal(internal val src: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val value = frame.frame.getRawObj(src) as ObjInt
|
||||
frame.setLocalInt(dst, value.value)
|
||||
when (frame.frame.getSlotTypeCode(src)) {
|
||||
SlotType.INT.code -> frame.setLocalInt(dst, frame.frame.getInt(src))
|
||||
else -> {
|
||||
val value = frame.frame.getRawObj(src) as ObjInt
|
||||
frame.setLocalInt(dst, value.value)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -331,8 +336,13 @@ class CmdUnboxRealObj(internal val src: Int, internal val dst: Int) : Cmd() {
|
||||
class CmdUnboxRealObjLocal(internal val src: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val value = frame.frame.getRawObj(src) as ObjReal
|
||||
frame.setLocalReal(dst, value.value)
|
||||
when (frame.frame.getSlotTypeCode(src)) {
|
||||
SlotType.REAL.code -> frame.setLocalReal(dst, frame.frame.getReal(src))
|
||||
else -> {
|
||||
val value = frame.frame.getRawObj(src) as ObjReal
|
||||
frame.setLocalReal(dst, value.value)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1540,9 +1550,15 @@ class CmdCmpEqIntObj(internal val a: Int, internal val b: Int, internal val dst:
|
||||
class CmdCmpEqIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value == right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left == right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1563,9 +1579,15 @@ class CmdCmpNeqIntObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpNeqIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value != right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left != right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1586,9 +1608,15 @@ class CmdCmpLtIntObj(internal val a: Int, internal val b: Int, internal val dst:
|
||||
class CmdCmpLtIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value < right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left < right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1609,9 +1637,15 @@ class CmdCmpLteIntObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpLteIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value <= right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left <= right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1632,9 +1666,15 @@ class CmdCmpGtIntObj(internal val a: Int, internal val b: Int, internal val dst:
|
||||
class CmdCmpGtIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value > right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left > right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1655,9 +1695,15 @@ class CmdCmpGteIntObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpGteIntObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjInt
|
||||
val right = frame.frame.getRawObj(b) as ObjInt
|
||||
frame.setLocalBool(dst, left.value >= right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjInt).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.INT.code -> frame.frame.getInt(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjInt).value
|
||||
}
|
||||
frame.setLocalBool(dst, left >= right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1678,9 +1724,15 @@ class CmdCmpEqRealObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpEqRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value == right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left == right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1701,9 +1753,15 @@ class CmdCmpNeqRealObj(internal val a: Int, internal val b: Int, internal val ds
|
||||
class CmdCmpNeqRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value != right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left != right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1724,9 +1782,15 @@ class CmdCmpLtRealObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpLtRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value < right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left < right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1747,9 +1811,15 @@ class CmdCmpLteRealObj(internal val a: Int, internal val b: Int, internal val ds
|
||||
class CmdCmpLteRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value <= right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left <= right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1770,9 +1840,15 @@ class CmdCmpGtRealObj(internal val a: Int, internal val b: Int, internal val dst
|
||||
class CmdCmpGtRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value > right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left > right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1793,9 +1869,15 @@ class CmdCmpGteRealObj(internal val a: Int, internal val b: Int, internal val ds
|
||||
class CmdCmpGteRealObjLocal(internal val a: Int, internal val b: Int, internal val dst: Int) : Cmd() {
|
||||
override val isFast: Boolean = true
|
||||
override fun performFast(frame: CmdFrame) {
|
||||
val left = frame.frame.getRawObj(a) as ObjReal
|
||||
val right = frame.frame.getRawObj(b) as ObjReal
|
||||
frame.setLocalBool(dst, left.value >= right.value)
|
||||
val left = when (frame.frame.getSlotTypeCode(a)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(a)
|
||||
else -> (frame.frame.getRawObj(a) as ObjReal).value
|
||||
}
|
||||
val right = when (frame.frame.getSlotTypeCode(b)) {
|
||||
SlotType.REAL.code -> frame.frame.getReal(b)
|
||||
else -> (frame.frame.getRawObj(b) as ObjReal).value
|
||||
}
|
||||
frame.setLocalBool(dst, left >= right)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -3624,7 +3706,13 @@ class CmdGetIndex(
|
||||
internal val dst: Int,
|
||||
) : Cmd() {
|
||||
override suspend fun perform(frame: CmdFrame) {
|
||||
val result = frame.slotToObj(targetSlot).getAt(frame.ensureScope(), frame.slotToObj(indexSlot))
|
||||
val target = frame.storedSlotObj(targetSlot)
|
||||
val index = frame.storedSlotObj(indexSlot)
|
||||
if (target is ObjList && target::class == ObjList::class && index is ObjInt) {
|
||||
frame.storeObjResult(dst, target.list[index.toInt()])
|
||||
return
|
||||
}
|
||||
val result = target.getAt(frame.ensureScope(), index)
|
||||
frame.storeObjResult(dst, result)
|
||||
return
|
||||
}
|
||||
@ -3636,7 +3724,14 @@ class CmdSetIndex(
|
||||
internal val valueSlot: Int,
|
||||
) : Cmd() {
|
||||
override suspend fun perform(frame: CmdFrame) {
|
||||
frame.slotToObj(targetSlot).putAt(frame.ensureScope(), frame.slotToObj(indexSlot), frame.slotToObj(valueSlot))
|
||||
val target = frame.storedSlotObj(targetSlot)
|
||||
val index = frame.storedSlotObj(indexSlot)
|
||||
val value = frame.slotToObj(valueSlot)
|
||||
if (target is ObjList && target::class == ObjList::class && index is ObjInt) {
|
||||
target.list[index.toInt()] = value
|
||||
return
|
||||
}
|
||||
target.putAt(frame.ensureScope(), index, value)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
145
lynglib/src/jvmTest/kotlin/PiSpigotBenchmarkTest.kt
Normal file
145
lynglib/src/jvmTest/kotlin/PiSpigotBenchmarkTest.kt
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright 2026 Sergey S. Chernov real.sergeych@gmail.com
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import net.sergeych.lyng.Benchmarks
|
||||
import net.sergeych.lyng.BytecodeBodyProvider
|
||||
import net.sergeych.lyng.PerfFlags
|
||||
import net.sergeych.lyng.PerfProfiles
|
||||
import net.sergeych.lyng.Script
|
||||
import net.sergeych.lyng.Statement
|
||||
import net.sergeych.lyng.bytecode.BytecodeStatement
|
||||
import net.sergeych.lyng.bytecode.CmdCallMemberSlot
|
||||
import net.sergeych.lyng.bytecode.CmdFunction
|
||||
import net.sergeych.lyng.bytecode.CmdGetIndex
|
||||
import net.sergeych.lyng.bytecode.CmdIterPush
|
||||
import net.sergeych.lyng.bytecode.CmdMakeRange
|
||||
import net.sergeych.lyng.bytecode.CmdSetIndex
|
||||
import net.sergeych.lyng.obj.ObjString
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertTrue
|
||||
import kotlin.time.TimeSource
|
||||
|
||||
class PiSpigotBenchmarkTest {
|
||||
@Test
|
||||
fun benchmarkPiSpigot() = runTest {
|
||||
if (!Benchmarks.enabled) return@runTest
|
||||
|
||||
val source = Files.readString(resolveExample("pi-test.lyng"))
|
||||
val legacySource = source.replace(
|
||||
"val quotient = sum / denom",
|
||||
"var quotient = floor((sum / (denom * 1.0))).toInt()"
|
||||
)
|
||||
assertTrue(legacySource != source, "failed to build legacy piSpigot benchmark case")
|
||||
|
||||
val digits = 200
|
||||
val expectedSuffix = "49303819"
|
||||
|
||||
val legacyElapsed = runCase("legacy-real-division", legacySource, digits, expectedSuffix, dumpBytecode = true)
|
||||
val saved = PerfProfiles.snapshot()
|
||||
PerfFlags.RVAL_FASTPATH = false
|
||||
val optimizedRvalOffElapsed = runCase(
|
||||
"optimized-int-division-rval-off",
|
||||
source,
|
||||
digits,
|
||||
expectedSuffix,
|
||||
dumpBytecode = false
|
||||
)
|
||||
PerfProfiles.restore(saved)
|
||||
val optimizedElapsed = runCase("optimized-int-division-rval-on", source, digits, expectedSuffix, dumpBytecode = true)
|
||||
val sourceSpeedup = legacyElapsed.toDouble() / optimizedRvalOffElapsed.toDouble()
|
||||
val runtimeSpeedup = optimizedRvalOffElapsed.toDouble() / optimizedElapsed.toDouble()
|
||||
val totalSpeedup = legacyElapsed.toDouble() / optimizedElapsed.toDouble()
|
||||
println(
|
||||
"[DEBUG_LOG] [BENCH] pi-spigot compare n=$digits legacy=${legacyElapsed} ms " +
|
||||
"intDiv=${optimizedRvalOffElapsed} ms rvalOn=${optimizedElapsed} ms " +
|
||||
"intDivSpeedup=${"%.2f".format(sourceSpeedup)}x " +
|
||||
"rvalSpeedup=${"%.2f".format(runtimeSpeedup)}x " +
|
||||
"total=${"%.2f".format(totalSpeedup)}x"
|
||||
)
|
||||
}
|
||||
|
||||
private suspend fun runCase(
|
||||
name: String,
|
||||
source: String,
|
||||
digits: Int,
|
||||
expectedSuffix: String,
|
||||
dumpBytecode: Boolean,
|
||||
): Long {
|
||||
val scope = Script.newScope()
|
||||
scope.eval(source)
|
||||
|
||||
if (dumpBytecode) {
|
||||
println("[DEBUG_LOG] [BENCH] pi-spigot cmd:\n${scope.disassembleSymbol("piSpigot")}")
|
||||
dumpHotOps(scope, "piSpigot")
|
||||
}
|
||||
|
||||
val first = scope.eval("piSpigot($digits)") as ObjString
|
||||
assertEquals(expectedSuffix, first.value)
|
||||
|
||||
repeat(2) {
|
||||
val warm = scope.eval("piSpigot($digits)") as ObjString
|
||||
assertEquals(expectedSuffix, warm.value)
|
||||
}
|
||||
|
||||
val iterations = 3
|
||||
val start = TimeSource.Monotonic.markNow()
|
||||
repeat(iterations) {
|
||||
val result = scope.eval("piSpigot($digits)") as ObjString
|
||||
assertEquals(expectedSuffix, result.value)
|
||||
}
|
||||
val elapsedMs = start.elapsedNow().inWholeMilliseconds
|
||||
val avgMs = elapsedMs.toDouble() / iterations.toDouble()
|
||||
println(
|
||||
"[DEBUG_LOG] [BENCH] pi-spigot $name n=$digits iterations=$iterations " +
|
||||
"elapsed=${elapsedMs} ms avg=${"%.2f".format(avgMs)} ms"
|
||||
)
|
||||
return elapsedMs
|
||||
}
|
||||
|
||||
private fun dumpHotOps(scope: net.sergeych.lyng.Scope, name: String) {
|
||||
val fn = resolveBytecodeFunction(scope, name) ?: return
|
||||
val makeRange = fn.cmds.count { it is CmdMakeRange }
|
||||
val callMemberSlot = fn.cmds.count { it is CmdCallMemberSlot }
|
||||
val iterPush = fn.cmds.count { it is CmdIterPush }
|
||||
val getIndex = fn.cmds.count { it is CmdGetIndex }
|
||||
val setIndex = fn.cmds.count { it is CmdSetIndex }
|
||||
println(
|
||||
"[DEBUG_LOG] [BENCH] pi-spigot hot-ops " +
|
||||
"makeRange=$makeRange callMemberSlot=$callMemberSlot iterPush=$iterPush " +
|
||||
"getIndex=$getIndex setIndex=$setIndex total=${fn.cmds.size}"
|
||||
)
|
||||
}
|
||||
|
||||
private fun resolveBytecodeFunction(scope: net.sergeych.lyng.Scope, name: String): CmdFunction? {
|
||||
val record = scope.get(name) ?: return null
|
||||
val stmt = record.value as? Statement ?: return null
|
||||
return (stmt as? BytecodeStatement)?.bytecodeFunction()
|
||||
?: (stmt as? BytecodeBodyProvider)?.bytecodeBody()?.bytecodeFunction()
|
||||
}
|
||||
|
||||
private fun resolveExample(name: String): Path {
|
||||
val direct = Path.of("examples", name)
|
||||
if (Files.exists(direct)) return direct
|
||||
val parent = Path.of("..", "examples", name)
|
||||
if (Files.exists(parent)) return parent
|
||||
error("example not found: $name")
|
||||
}
|
||||
}
|
||||
172
notes/pi_spigot_benchmark_baseline_2026-04-03.md
Normal file
172
notes/pi_spigot_benchmark_baseline_2026-04-03.md
Normal file
@ -0,0 +1,172 @@
|
||||
# Pi Spigot Benchmark Baseline
|
||||
|
||||
Date: 2026-04-03
|
||||
Command:
|
||||
`./gradlew :lynglib:jvmTest -Pbenchmarks=true --tests 'PiSpigotBenchmarkTest' --rerun-tasks`
|
||||
|
||||
Results for `n=200`:
|
||||
- legacy-real-division: 1108 ms (3 iters, avg 369.33 ms)
|
||||
- optimized-int-division-rval-off: 756 ms (3 iters, avg 252.00 ms)
|
||||
- optimized-int-division-rval-on: 674 ms (3 iters, avg 224.67 ms)
|
||||
|
||||
Derived speedups:
|
||||
- intDivSpeedup: 1.47x
|
||||
- rvalSpeedup: 1.12x
|
||||
- total: 1.64x
|
||||
|
||||
Notes:
|
||||
- Bytecode still shows generic range iteration (`MAKE_RANGE`, `CALL_MEMBER_SLOT`, `ITER_PUSH`) for loop constructs in the legacy benchmark case.
|
||||
- This baseline is captured before enabling counted-loop lowering for dynamic inline int ranges.
|
||||
|
||||
Optimization #1 follow-up:
|
||||
- Attempt: broaden compiler loop lowering for dynamic int ranges and validate with `PiSpigotBenchmarkTest` bytecode dumps.
|
||||
- Final result: success after switching loop-bound coercion to a runtime-checked int path for stable slots with missing metadata.
|
||||
- Latest measured run after the working compiler change:
|
||||
- legacy-real-division: 783 ms (3 iters, avg 261.00 ms)
|
||||
- optimized-int-division-rval-off: 729 ms (3 iters, avg 243.00 ms)
|
||||
- optimized-int-division-rval-on: 593 ms (3 iters, avg 197.67 ms)
|
||||
- Hot-op counts for optimized bytecode now show the generic range iterator path is gone from the main loops:
|
||||
- `makeRange=0`
|
||||
- `callMemberSlot=2`
|
||||
- `iterPush=0`
|
||||
- `getIndex=4`
|
||||
- `setIndex=4`
|
||||
- The remaining member calls are non-loop overhead; the main improvement came from lowering `for` ranges to counted int loops.
|
||||
|
||||
Optimization #2 follow-up:
|
||||
- Attempt: coerce stable integer operands into `INT` arithmetic during binary-op lowering so hot expressions stop falling back to `OBJ` math.
|
||||
- Latest measured run after the arithmetic change:
|
||||
- legacy-real-division: 593 ms (3 iters, avg 197.67 ms)
|
||||
- optimized-int-division-rval-off: 542 ms (3 iters, avg 180.67 ms)
|
||||
- optimized-int-division-rval-on: 516 ms (3 iters, avg 172.00 ms)
|
||||
- Compiled-code impact in the optimized case:
|
||||
- `boxes = n * 10 / 3` is now `UNBOX_INT_OBJ` + `MUL_INT` + `DIV_INT`
|
||||
- `j = boxes - k` is now `SUB_INT`
|
||||
- `denom = j * 2 + 1` is now `MUL_INT` + `ADD_INT`
|
||||
- `carriedOver = quotient * j` is now `MUL_INT`
|
||||
- Remaining hot object arithmetic is centered on list-backed reminder values and derived sums:
|
||||
- `reminders[j] * 10`
|
||||
- `reminders[j] + carriedOver`
|
||||
- `sum / denom`, `sum % denom`, `sum / 10`
|
||||
- Conclusion: loop lowering is fixed; the next likely win is preserving `List<Int>` element typing for `reminders` so indexed loads stay in int space.
|
||||
|
||||
Optimization #3 follow-up:
|
||||
- Attempt: teach numeric-kind inference that `IndexRef` can be `INT`/`REAL` when the receiver list has a known element class.
|
||||
- Compiler change:
|
||||
- `inferNumericKind()` now handles `IndexRef` and resolves the receiver slot or receiver-declared list element class before choosing `INT`/`REAL`.
|
||||
- Latest measured run after the indexed-load inference change:
|
||||
- legacy-real-division: 656 ms (3 iters, avg 218.67 ms)
|
||||
- optimized-int-division-rval-off: 509 ms (3 iters, avg 169.67 ms)
|
||||
- optimized-int-division-rval-on: 403 ms (3 iters, avg 134.33 ms)
|
||||
- Derived speedups vs legacy in this run:
|
||||
- intDivSpeedup: 1.29x
|
||||
- rvalSpeedup: 1.26x
|
||||
- total: 1.63x
|
||||
- Compiled-code impact in the optimized case:
|
||||
- `carriedOver = quotient * j` stays in `INT` space (`ASSERT_IS` + `UNBOX_INT_OBJ` + `MUL_INT`) instead of plain object multiply.
|
||||
- Counted int loops remain intact (`MAKE_RANGE=0`, `ITER_PUSH=0`).
|
||||
- Remaining bottlenecks in the optimized bytecode:
|
||||
- `GET_INDEX reminders[j]` still feeds `MUL_OBJ` / `ADD_OBJ`
|
||||
- `sum / denom`, `sum % denom`, and `sum / 10` still compile to object arithmetic
|
||||
- `suffix += pi[i]` remains `ADD_OBJ`, which is expected because it is string/object concatenation
|
||||
- Conclusion:
|
||||
- The new inference produced a real VM-speed gain, especially with `RVAL_FASTPATH` enabled.
|
||||
- The next compiler win is stronger propagation from `List<Int>` indexed loads into the produced temporary slot so `sum` can stay typed as `INT` across the inner loop.
|
||||
|
||||
Optimization #4 follow-up:
|
||||
- Attempt: preserve boxed-argument metadata through `compileCallArgs()` so `list.add(x)` retains `ObjInt` / `ObjReal` element typing.
|
||||
- Compiler/runtime fixes:
|
||||
- `compileCallArgs()` now routes arguments through `ensureObjSlot()` + `emitMove()` instead of raw `BOX_OBJ`, preserving `slotObjClass` and `stableObjSlots`.
|
||||
- `CmdSetIndex` now reads `valueSlot` via `slotToObj()` so `SET_INDEX` can safely accept primitive slots.
|
||||
- Fast local unbox ops (`CmdUnboxIntObjLocal`, `CmdUnboxRealObjLocal`) now handle already-primitive source slots directly instead of assuming a raw object payload.
|
||||
- Plain assignment now coerces object-int RHS back into `INT` when the destination slot is currently compiled as `INT`, keeping loop-carried locals type-consistent.
|
||||
- Latest measured run after the propagation + VM fixes:
|
||||
- legacy-real-division: 438 ms (3 iters, avg 146.00 ms)
|
||||
- optimized-int-division-rval-off: 238 ms (3 iters, avg 79.33 ms)
|
||||
- optimized-int-division-rval-on: 201 ms (3 iters, avg 67.00 ms)
|
||||
- Derived speedups vs legacy in this run:
|
||||
- intDivSpeedup: 1.84x
|
||||
- rvalSpeedup: 1.18x
|
||||
- total: 2.18x
|
||||
- Compiled-code impact in the optimized case:
|
||||
- `sum = reminders[j] + carriedOver` is now `GET_INDEX` + `UNBOX_INT_OBJ` + `ADD_INT`
|
||||
- `reminders[j] = sum % denom` is now `MOD_INT` + `SET_INDEX`
|
||||
- `q = sum / 10` is now `DIV_INT`
|
||||
- `carriedOver = quotient * j` is now `MUL_INT`
|
||||
- Remaining hot object arithmetic in the optimized case:
|
||||
- `reminders[j] *= 10` still compiles as `GET_INDEX` + `MUL_OBJ` + `SET_INDEX`
|
||||
- `suffix += pi[i]` remains `ADD_OBJ`, which is expected string/object concatenation
|
||||
- Conclusion:
|
||||
- The main remaining arithmetic bottleneck is the compound index assignment path for `reminders[j] *= 10`.
|
||||
- The next direct win is to specialize `AssignOpRef` on typed list elements so indexed compound assignment can lower to `UNBOX_INT_OBJ` + `MUL_INT` + boxed `SET_INDEX`.
|
||||
|
||||
Optimization #5 follow-up:
|
||||
- Attempt: specialize typed `IndexRef` compound assignment so `List<Int>` element updates avoid object arithmetic.
|
||||
- Compiler change:
|
||||
- `compileAssignOp()` now detects non-optional typed `List<Int>` index targets and lowers arithmetic assign-ops through `UNBOX_INT_OBJ` + `*_INT` + `SET_INDEX`.
|
||||
- Latest measured run after the indexed compound-assignment change:
|
||||
- legacy-real-division: 394 ms (3 iters, avg 131.33 ms)
|
||||
- optimized-int-division-rval-off: 216 ms (3 iters, avg 72.00 ms)
|
||||
- optimized-int-division-rval-on: 184 ms (3 iters, avg 61.33 ms)
|
||||
- Derived speedups vs legacy in this run:
|
||||
- intDivSpeedup: 1.82x
|
||||
- rvalSpeedup: 1.17x
|
||||
- total: 2.14x
|
||||
- Compiled-code impact in the optimized case:
|
||||
- `reminders[j] *= 10` is now:
|
||||
- `GET_INDEX`
|
||||
- `UNBOX_INT_OBJ`
|
||||
- `MUL_INT`
|
||||
- `SET_INDEX`
|
||||
- The optimized inner loop no longer contains object arithmetic for the `reminders` state update path.
|
||||
- Remaining hot object work in the optimized case:
|
||||
- `suffix += pi[i]` remains `ADD_OBJ` and is expected string/object concatenation
|
||||
- The legacy benchmark case still carries real/object work because it intentionally keeps the original `floor(sum / (denom * 1.0))` path
|
||||
- Conclusion:
|
||||
- The inner arithmetic hot loop is now effectively int-lowered end-to-end in the optimized benchmark path.
|
||||
- Further wins will likely require reducing list access overhead itself (`GET_INDEX` / `SET_INDEX`) or changing the source algorithm/data layout, not more basic arithmetic lowering.
|
||||
|
||||
Optimization #6 follow-up:
|
||||
- Attempt: move the direct `ObjList` index fast path out from behind `RVAL_FASTPATH` so the common plain-list case is fast by default.
|
||||
- Runtime change:
|
||||
- `CmdGetIndex` and `CmdSetIndex` now always use direct `target.list[index]` / `target.list[index] = value` for exact `ObjList` receivers with `ObjInt` indices.
|
||||
- Subclasses such as `ObjObservableList` still use their overridden `getAt` / `putAt` logic, so semantics stay intact.
|
||||
- Latest measured run after the default plain-list path:
|
||||
- legacy-real-division: 397 ms (3 iters, avg 132.33 ms)
|
||||
- optimized-int-division-rval-off: 138 ms (3 iters, avg 46.00 ms)
|
||||
- optimized-int-division-rval-on: 164 ms (3 iters, avg 54.67 ms)
|
||||
- Derived speedups vs legacy in this run:
|
||||
- intDivSpeedup: 2.88x
|
||||
- rvalSpeedup: 0.84x
|
||||
- total: 2.42x
|
||||
- Interpretation:
|
||||
- The stable fast baseline is now the `rval-off` case, because the direct plain-`ObjList` path no longer depends on `RVAL_FASTPATH`.
|
||||
- `RVAL_FASTPATH` no longer improves this benchmark and only reflects remaining unrelated runtime variance.
|
||||
- Conclusion:
|
||||
- For `piSpigot`, the main VM list-access bottleneck is addressed in the default runtime path.
|
||||
- Further work on this benchmark should target algorithm/data-layout changes or string-result construction, not the old `RVAL_FASTPATH` gate.
|
||||
|
||||
Remaining optimization candidates:
|
||||
- `suffix += pi[i]` still compiles as repeated `ADD_OBJ` string/object concatenation.
|
||||
- Best next option: build the suffix through a dedicated buffer/list-join path instead of per-iteration concatenation.
|
||||
- The benchmark still performs many `GET_INDEX` / `SET_INDEX` operations even after the direct plain-`ObjList` fast path.
|
||||
- Best next option: reduce indexed access count at the source level or introduce a more specialized typed-list storage layout if this benchmark matters enough.
|
||||
- The legacy benchmark variant intentionally keeps the real-number `floor(sum / (denom * 1.0))` path.
|
||||
- No release optimization needed there; it remains only as a regression/control case.
|
||||
- `RVAL_FASTPATH` is no longer a useful tuning knob for this workload after the plain-list VM fast path.
|
||||
- Best next option: profile other workloads before changing or removing it globally.
|
||||
|
||||
Release stabilization note:
|
||||
- The broad assignment-side `INT` coercion and subclass-bypassing list fast path were rolled back/narrowed to restore correctness across numeric-mix, decimal, list, observable-list, and wasm tests.
|
||||
- Full release gates now pass:
|
||||
- `./gradlew test`
|
||||
- `./gradlew :lynglib:wasmJsNodeTest`
|
||||
- Current release-safe benchmark on the stabilized tree:
|
||||
- legacy-real-division: 732 ms (3 iters, avg 244.00 ms)
|
||||
- optimized-int-division-rval-off: 545 ms (3 iters, avg 181.67 ms)
|
||||
- optimized-int-division-rval-on: 697 ms (3 iters, avg 232.33 ms)
|
||||
- Interpretation:
|
||||
- The release baseline is now `optimized-int-division-rval-off` at 545 ms for the current correct/stable tree.
|
||||
- The removed coercion had been masking a real compiler typing gap; reintroducing it broadly is not release-safe.
|
||||
- Highest-value remaining compiler optimization after release:
|
||||
- Recover typed int lowering for `j = boxes - k`, `denom = j * 2 + 1`, `sum = reminders[j] + carriedOver`, and `carriedOver = quotient * j` using a narrower proof than the removed generic arithmetic coercion.
|
||||
Loading…
x
Reference in New Issue
Block a user