further locals optimization

This commit is contained in:
Sergey Chernov 2025-11-10 01:35:15 +01:00
parent 0b9e94c6e9
commit 1498140892
5 changed files with 287 additions and 51 deletions

View File

@ -31,6 +31,25 @@ class Compiler(
settings: Settings = Settings()
) {
// Stack of parameter-to-slot plans for current function being parsed (by declaration index)
private val paramSlotPlanStack = mutableListOf<Map<String, Int>>()
private val currentParamSlotPlan: Map<String, Int>?
get() = paramSlotPlanStack.lastOrNull()
// Track identifiers known to be locals/parameters in the current function for fast local emission
private val localNamesStack = mutableListOf<MutableSet<String>>()
private val currentLocalNames: MutableSet<String>?
get() = localNamesStack.lastOrNull()
private inline fun <T> withLocalNames(names: Set<String>, block: () -> T): T {
localNamesStack.add(names.toMutableSet())
return try { block() } finally { localNamesStack.removeLast() }
}
private fun declareLocalName(name: String) {
currentLocalNames?.add(name)
}
var packageName: String? = null
class Settings
@ -59,7 +78,8 @@ class Compiler(
private suspend fun parseScript(): Script {
val statements = mutableListOf<Statement>()
val start = cc.currentPos()
// val returnScope = cc.startReturnScope()
// Track locals at script level for fast local refs
return withLocalNames(emptySet()) {
// package level declarations
do {
val t = cc.current()
@ -107,7 +127,8 @@ class Compiler(
}
} while (true)
return Script(start, statements)//returnScope.needCatch)
Script(start, statements)
}
}
fun loadQualifiedName(): String {
@ -719,7 +740,9 @@ class Compiler(
"null" -> ConstRef(ObjNull.asReadonly)
"true" -> ConstRef(ObjTrue.asReadonly)
"false" -> ConstRef(ObjFalse.asReadonly)
else -> LocalVarRef(t.value, t.pos)
else -> if (PerfFlags.EMIT_FAST_LOCAL_REFS && (currentLocalNames?.contains(t.value) == true))
FastLocalVarRef(t.value, t.pos)
else LocalVarRef(t.value, t.pos)
}
}
@ -1606,11 +1629,13 @@ class Compiler(
return inCodeContext(CodeContext.Function(name)) {
val paramNames: Set<String> = argsDeclaration.params.map { it.name }.toSet()
// Here we should be at open body
val fnStatements = if (isExtern)
statement { raiseError("extern function not provided: $name") }
else
parseBlock()
withLocalNames(paramNames) { parseBlock() }
var closure: Scope? = null
@ -1699,6 +1724,9 @@ class Compiler(
val eqToken = cc.next()
var setNull = false
// Register the local name at compile time so that subsequent identifiers can be emitted as fast locals
if (!isStatic) declareLocalName(name)
val isDelegate = if (eqToken.isId("by")) {
true
} else {
@ -1736,6 +1764,9 @@ class Compiler(
if (context.containsLocal(name))
throw ScriptError(nameToken.pos, "Variable $name is already defined")
// Register the local name so subsequent identifiers can be emitted as fast locals
if (!isStatic) declareLocalName(name)
if (isDelegate) {
TODO()
// println("initial expr = $initialExpression")

View File

@ -0,0 +1,12 @@
package net.sergeych.lyng
/**
* Runtime-togglable perf flags for micro-benchmarking and A/B comparisons on the JVM.
* Keep as `var` so tests can flip them.
*/
object PerfFlags {
// Enable PIC inside LocalVarRef (runtime cache of name->slot per frame)
var LOCAL_SLOT_PIC: Boolean = true
// Make the compiler emit fast local refs for identifiers known to be function locals/params
var EMIT_FAST_LOCAL_REFS: Boolean = true
}

View File

@ -21,6 +21,10 @@ import net.sergeych.lyng.obj.*
import net.sergeych.lyng.pacman.ImportManager
import net.sergeych.lyng.pacman.ImportProvider
// Simple per-frame id generator for perf caches (not thread-safe, fine for scripts)
private object FrameIdGen { var c: Long = 1L; fun nextId(): Long = c++ }
private fun nextFrameId(): Long = FrameIdGen.nextId()
/**
* Scope is where local variables and methods are stored. Scope is also a parent scope for other scopes.
* Each block usually creates a scope. Accessing Lyng closures usually is done via a scope.
@ -42,12 +46,17 @@ open class Scope(
var thisObj: Obj = ObjVoid,
var skipScopeCreation: Boolean = false,
) {
// Unique id per scope frame for PICs; cheap to compare and stable for the frame lifetime.
val frameId: Long = nextFrameId()
// Fast-path storage for local variables/arguments accessed by slot index.
// Enabled by default for child scopes; module/class scopes can ignore it.
private val slots: MutableList<ObjRecord> = mutableListOf()
private val nameToSlot: MutableMap<String, Int> = mutableMapOf()
open val packageName: String = "<anonymous package>"
fun slotCount(): Int = slots.size
constructor(
args: Arguments = Arguments.EMPTY,
pos: Pos = Pos.builtIn,

View File

@ -290,16 +290,34 @@ class MethodCallRef(
* Reference to a local/visible variable by name (Phase A: scope lookup).
*/
class LocalVarRef(private val name: String, private val atPos: Pos) : ObjRef {
// Per-frame slot cache to avoid repeated name lookups
private var cachedFrameId: Long = 0L
private var cachedSlot: Int = -1
private fun resolveSlot(scope: Scope): Int {
val idx = scope.getSlotIndexOf(name)
if (idx != null) {
cachedFrameId = scope.frameId
cachedSlot = idx
return idx
}
return -1
}
override suspend fun get(scope: Scope): ObjRecord {
scope.pos = atPos
// Fast-path: slot lookup
if (!PerfFlags.LOCAL_SLOT_PIC) {
scope.getSlotIndexOf(name)?.let { return scope.getSlotRecord(it) }
return scope[name] ?: scope.raiseError("symbol not defined: '$name'")
}
val slot = if (cachedFrameId == scope.frameId && cachedSlot >= 0 && cachedSlot < scope.slotCount()) cachedSlot else resolveSlot(scope)
if (slot >= 0) return scope.getSlotRecord(slot)
return scope[name] ?: scope.raiseError("symbol not defined: '$name'")
}
override suspend fun setAt(pos: Pos, scope: Scope, newValue: Obj) {
scope.pos = atPos
// Fast-path: slot lookup
if (!PerfFlags.LOCAL_SLOT_PIC) {
scope.getSlotIndexOf(name)?.let {
val rec = scope.getSlotRecord(it)
if (!rec.isMutable) scope.raiseError("Cannot assign to immutable value")
@ -309,6 +327,18 @@ class LocalVarRef(private val name: String, private val atPos: Pos) : ObjRef {
val stored = scope[name] ?: scope.raiseError("symbol not defined: '$name'")
if (stored.isMutable) stored.value = newValue
else scope.raiseError("Cannot assign to immutable value")
return
}
val slot = if (cachedFrameId == scope.frameId && cachedSlot >= 0 && cachedSlot < scope.slotCount()) cachedSlot else resolveSlot(scope)
if (slot >= 0) {
val rec = scope.getSlotRecord(slot)
if (!rec.isMutable) scope.raiseError("Cannot assign to immutable value")
rec.value = newValue
return
}
val stored = scope[name] ?: scope.raiseError("symbol not defined: '$name'")
if (stored.isMutable) stored.value = newValue
else scope.raiseError("Cannot assign to immutable value")
}
}
@ -316,6 +346,84 @@ class LocalVarRef(private val name: String, private val atPos: Pos) : ObjRef {
/**
* Array/list literal construction without per-access lambdas.
*/
class BoundLocalVarRef(
private val slot: Int,
private val atPos: Pos,
) : ObjRef {
override suspend fun get(scope: Scope): ObjRecord {
scope.pos = atPos
return scope.getSlotRecord(slot)
}
override suspend fun setAt(pos: Pos, scope: Scope, newValue: Obj) {
scope.pos = atPos
val rec = scope.getSlotRecord(slot)
if (!rec.isMutable) scope.raiseError("Cannot assign to immutable value")
rec.value = newValue
}
}
/**
* Fast local-by-name reference meant for identifiers that the compiler knows are locals/parameters.
* It resolves the slot once per frame and never falls back to global/module lookup.
*/
class FastLocalVarRef(
private val name: String,
private val atPos: Pos,
) : ObjRef {
// Cache the exact scope frame that owns the slot, not just the current frame
private var cachedOwnerScope: Scope? = null
private var cachedOwnerFrameId: Long = 0L
private var cachedSlot: Int = -1
private fun isOwnerValidFor(current: Scope): Boolean {
val owner = cachedOwnerScope ?: return false
if (owner.frameId != cachedOwnerFrameId) return false
// Ensure owner is an ancestor (or same) of current
var s: Scope? = current
while (s != null) {
if (s === owner) return true
s = s.parent
}
return false
}
private fun resolveSlotInAncestry(scope: Scope): Int {
var s: Scope? = scope
while (s != null) {
val idx = s.getSlotIndexOf(name)
if (idx != null) {
cachedOwnerScope = s
cachedOwnerFrameId = s.frameId
cachedSlot = idx
return idx
}
s = s.parent
}
return -1
}
override suspend fun get(scope: Scope): ObjRecord {
scope.pos = atPos
val owner = if (isOwnerValidFor(scope)) cachedOwnerScope else null
val slot = if (owner != null && cachedSlot >= 0) cachedSlot else resolveSlotInAncestry(scope)
val actualOwner = cachedOwnerScope
if (slot < 0 || actualOwner == null) scope.raiseError("local '$name' is not available in this scope")
return actualOwner.getSlotRecord(slot)
}
override suspend fun setAt(pos: Pos, scope: Scope, newValue: Obj) {
scope.pos = atPos
val owner = if (isOwnerValidFor(scope)) cachedOwnerScope else null
val slot = if (owner != null && cachedSlot >= 0) cachedSlot else resolveSlotInAncestry(scope)
val actualOwner = cachedOwnerScope
if (slot < 0 || actualOwner == null) scope.raiseError("local '$name' is not available in this scope")
val rec = actualOwner.getSlotRecord(slot)
if (!rec.isMutable) scope.raiseError("Cannot assign to immutable value")
rec.value = newValue
}
}
class ListLiteralRef(private val entries: List<ListEntry>) : ObjRef {
override suspend fun get(scope: Scope): ObjRecord {
val list = mutableListOf<Obj>()

View File

@ -0,0 +1,76 @@
/*
* Tiny JVM benchmark for local variable access performance.
*/
// import net.sergeych.tools.bm
import kotlinx.coroutines.runBlocking
import net.sergeych.lyng.PerfFlags
import net.sergeych.lyng.Scope
import net.sergeych.lyng.obj.ObjInt
import kotlin.test.Test
import kotlin.test.assertEquals
class LocalVarBenchmarkTest {
@Test
fun benchmarkLocalVarLoop() = runBlocking {
val n = 400_000 // keep under 1s even on CI
val code = """
var s = 0
var i = 0
while(i < $n) {
s = s + i
i = i + 1
}
s
""".trimIndent()
// Part 1: PIC off vs on for LocalVarRef
PerfFlags.EMIT_FAST_LOCAL_REFS = false
// Baseline: disable PIC
PerfFlags.LOCAL_SLOT_PIC = false
val scope1 = Scope()
val t0 = System.nanoTime()
val result1 = (scope1.eval(code) as ObjInt).value
val t1 = System.nanoTime()
println("[DEBUG_LOG] [BENCH] local-var loop $n iters [baseline PIC=OFF, EMIT=OFF]: ${(t1 - t0) / 1_000_000.0} ms")
// Optimized: enable PIC
PerfFlags.LOCAL_SLOT_PIC = true
val scope2 = Scope()
val t2 = System.nanoTime()
val result2 = (scope2.eval(code) as ObjInt).value
val t3 = System.nanoTime()
println("[DEBUG_LOG] [BENCH] local-var loop $n iters [baseline PIC=ON, EMIT=OFF]: ${(t3 - t2) / 1_000_000.0} ms")
// Verify correctness to avoid dead code elimination in future optimizations
val expected = (n.toLong() - 1L) * n / 2L
assertEquals(expected, result1)
assertEquals(expected, result2)
// Part 2: Enable compiler fast locals emission and measure
PerfFlags.EMIT_FAST_LOCAL_REFS = true
PerfFlags.LOCAL_SLOT_PIC = true
val code2 = """
fun sumN(n) {
var s = 0
var i = 0
while(i < n) {
s = s + i
i = i + 1
}
s
}
sumN($n)
""".trimIndent()
val scope3 = Scope()
val t4 = System.nanoTime()
val result3 = (scope3.eval(code2) as ObjInt).value
val t5 = System.nanoTime()
println("[DEBUG_LOG] [BENCH] local-var loop $n iters [EMIT=ON]: ${(t5 - t4) / 1_000_000.0} ms")
assertEquals(expected, result3)
}
}