From 0eea73c118fe14ed7360f8a552cc734d78067c67 Mon Sep 17 00:00:00 2001 From: sergeych Date: Tue, 11 Nov 2025 22:04:04 +0100 Subject: [PATCH] further optimizations --- docs/perf_guide.md | 74 ++++++++++ .../kotlin/net/sergeych/lyng/RegexCache.kt | 13 +- .../kotlin/net/sergeych/lyng/obj/ObjRef.kt | 131 +++++++++++++++++- 3 files changed, 206 insertions(+), 12 deletions(-) diff --git a/docs/perf_guide.md b/docs/perf_guide.md index 7f92b75..395497b 100644 --- a/docs/perf_guide.md +++ b/docs/perf_guide.md @@ -421,3 +421,77 @@ Results (representative runs; OFF → ON): Summary: All three areas improved with optimizations ON; no regressions observed in these runs. For publication‑grade stability, run each test 3× and report medians (see sections below for methodology and previous median tables). + +## Additional tweaks — verification snapshot (Index write fast‑path, List literal pre‑size, Regex LRU) + +Date: 2025-11-11 21:31 (local) + +Scope: Implemented three semantics‑neutral optimizations and verified they are green across targeted and broader JVM benches. + +What changed (guarded by flags where applicable): +- RVAL_FASTPATH: Index write fast‑path + - `IndexRef.setAt`: direct path for `ObjList` + `ObjInt` (`list[i] = value`) mirrors the read fast‑path. Optional chaining semantics preserved; bounds exceptions propagate unchanged. +- RVAL_FASTPATH: List literal pre‑sizing + - `ListLiteralRef.get`: pre‑counts element entries and uses `ArrayList` with capacity hint; for spreads of `ObjList`, uses `ensureCapacity` before bulk add. Evaluation order unchanged. +- REGEX_CACHE: LRU‑like behavior + - `RegexCache`: emulates access‑order LRU within a tiny bounded map (`MAX=64`) by moving accessed entries to the tail; improves alternating‑pattern scenarios. Only active when `PerfFlags.REGEX_CACHE` is true. + +Reproduce quick verification (1× runs): +``` +./gradlew :lynglib:jvmTest --tests ExpressionBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ListOpsBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests RegexBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests PicBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests PicInvalidationJvmTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests LocalVarBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ConcurrencyCallBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests DeepPoolingStressJvmTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests MultiThreadPoolingStressJvmTest --rerun-tasks +``` + +Observation: All listed tests green in this cycle; no behavioral regressions observed. For the new paths (index write, list literal), performance was neutral‑to‑positive in smoke runs; Regex benches remained positive or neutral with the LRU behavior. For publication‑grade medians, extend to 3× per test as in earlier sections. + + +## Sanity matrix (JVM) — quick OFF→ON runs + +Date: 2025-11-11 21:59 (local) + +Scope: Final Round 1 sanity sweep across JVM micro‑benches and stress tests to confirm that optimizations ON do not regress performance vs OFF in representative scenarios. Each benchmark prints `[DEBUG_LOG] [BENCH]` timings for OFF → ON within a single run. This section records a quick pass confirmation (not 3× medians) and reproduction commands. + +Environment: +- Gradle: 8.7 (stdout enabled, maxParallelForks=1) +- JVM: as configured by the project toolchain +- OS/Arch: macOS 14.x (aarch64) + +Benches covered (all green; no regressions observed in these runs): +- Calls/Args: `CallBenchmarkTest`, `CallMixedArityBenchmarkTest` (ARG_BUILDER) +- PICs: `PicBenchmarkTest` (field/method); `PicInvalidationJvmTest` correctness reconfirmed +- Expressions/Arithmetic: `ExpressionBenchmarkTest`, `ArithmeticBenchmarkTest` (RVAL_FASTPATH, PRIMITIVE_FASTOPS) +- Ranges: `RangeBenchmarkTest` (PRIMITIVE_FASTOPS counted loop) +- List ops: `ListOpsBenchmarkTest` (PRIMITIVE_FASTOPS specializations) +- Regex: `RegexBenchmarkTest` (REGEX_CACHE with LRU behavior) +- Locals: `LocalVarBenchmarkTest` (LOCAL_SLOT_PIC + FAST_LOCAL) +- Concurrency/Pooling: `ConcurrencyCallBenchmarkTest`, `DeepPoolingStressJvmTest`, `MultiThreadPoolingStressJvmTest` (SCOPE_POOL per‑thread) + +Reproduce (examples): +``` +./gradlew :lynglib:jvmTest --tests CallBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests CallMixedArityBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests PicBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests PicInvalidationJvmTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ExpressionBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ArithmeticBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests RangeBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ListOpsBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests RegexBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests LocalVarBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests ConcurrencyCallBenchmarkTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests DeepPoolingStressJvmTest --rerun-tasks +./gradlew :lynglib:jvmTest --tests MultiThreadPoolingStressJvmTest --rerun-tasks +``` + +Summary: +- All listed tests passed in this sanity sweep. +- For each benchmark’s OFF → ON printouts examined during this pass, ON was equal or faster than OFF; no ON= MAX) { // Remove the oldest inserted entry (first key in iteration order) val it = map.keys.iterator() - if (it.hasNext()) { - val k = it.next() - it.remove() - } + if (it.hasNext()) { val k = it.next(); it.remove() } } map[pattern] = re return re diff --git a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjRef.kt b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjRef.kt index 767f385..479f1d2 100644 --- a/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjRef.kt +++ b/lynglib/src/commonMain/kotlin/net/sergeych/lyng/obj/ObjRef.kt @@ -110,6 +110,29 @@ class BinaryOpRef(private val op: BinOp, private val left: ObjRef, private val r return r.asReadonly } } + // Fast numeric mixed ops for Int/Real combinations by promoting to double + if ((a is ObjInt || a is ObjReal) && (b is ObjInt || b is ObjReal)) { + val ad: Double = if (a is ObjInt) a.doubleValue else (a as ObjReal).value + val bd: Double = if (b is ObjInt) b.doubleValue else (b as ObjReal).value + val rNum: Obj? = when (op) { + BinOp.PLUS -> ObjReal(ad + bd) + BinOp.MINUS -> ObjReal(ad - bd) + BinOp.STAR -> ObjReal(ad * bd) + BinOp.SLASH -> ObjReal(ad / bd) + BinOp.PERCENT -> ObjReal(ad % bd) + BinOp.LT -> if (ad < bd) ObjTrue else ObjFalse + BinOp.LTE -> if (ad <= bd) ObjTrue else ObjFalse + BinOp.GT -> if (ad > bd) ObjTrue else ObjFalse + BinOp.GTE -> if (ad >= bd) ObjTrue else ObjFalse + BinOp.EQ -> if (ad == bd) ObjTrue else ObjFalse + BinOp.NEQ -> if (ad != bd) ObjTrue else ObjFalse + else -> null + } + if (rNum != null) { + if (net.sergeych.lyng.PerfFlags.PIC_DEBUG_COUNTERS) net.sergeych.lyng.PerfStats.primitiveFastOpsHit++ + return rNum.asReadonly + } + } } val r: Obj = when (op) { @@ -261,12 +284,18 @@ class FieldRef( private val name: String, private val isOptional: Boolean, ) : ObjRef { - // 2-entry PIC for reads/writes (guarded by PerfFlags.FIELD_PIC) + // 4-entry PIC for reads/writes (guarded by PerfFlags.FIELD_PIC) + // Reads private var rKey1: Long = 0L; private var rVer1: Int = -1; private var rGetter1: (suspend (Obj, Scope) -> ObjRecord)? = null private var rKey2: Long = 0L; private var rVer2: Int = -1; private var rGetter2: (suspend (Obj, Scope) -> ObjRecord)? = null + private var rKey3: Long = 0L; private var rVer3: Int = -1; private var rGetter3: (suspend (Obj, Scope) -> ObjRecord)? = null + private var rKey4: Long = 0L; private var rVer4: Int = -1; private var rGetter4: (suspend (Obj, Scope) -> ObjRecord)? = null + // Writes private var wKey1: Long = 0L; private var wVer1: Int = -1; private var wSetter1: (suspend (Obj, Scope, Obj) -> Unit)? = null private var wKey2: Long = 0L; private var wVer2: Int = -1; private var wSetter2: (suspend (Obj, Scope, Obj) -> Unit)? = null + private var wKey3: Long = 0L; private var wVer3: Int = -1; private var wSetter3: (suspend (Obj, Scope, Obj) -> Unit)? = null + private var wKey4: Long = 0L; private var wVer4: Int = -1; private var wSetter4: (suspend (Obj, Scope, Obj) -> Unit)? = null // Transient per-step cache to optimize read-then-write sequences within the same frame private var tKey: Long = 0L; private var tVer: Int = -1; private var tFrameId: Long = -1L; private var tRecord: ObjRecord? = null @@ -290,6 +319,39 @@ class FieldRef( } } rGetter2?.let { g -> if (key == rKey2 && ver == rVer2) { if (picCounters) net.sergeych.lyng.PerfStats.fieldPicHit++ + // move-to-front: promote 2→1 + val tK = rKey2; val tV = rVer2; val tG = rGetter2 + rKey2 = rKey1; rVer2 = rVer1; rGetter2 = rGetter1 + rKey1 = tK; rVer1 = tV; rGetter1 = tG + val rec0 = g(base, scope) + if (base is ObjClass) { + val idx0 = base.classScope?.getSlotIndexOf(name) + if (idx0 != null) { tKey = key; tVer = ver; tFrameId = scope.frameId; tRecord = rec0 } else { tRecord = null } + } else { tRecord = null } + return rec0 + } } + rGetter3?.let { g -> if (key == rKey3 && ver == rVer3) { + if (picCounters) net.sergeych.lyng.PerfStats.fieldPicHit++ + // move-to-front: promote 3→1 + val tK = rKey3; val tV = rVer3; val tG = rGetter3 + rKey3 = rKey2; rVer3 = rVer2; rGetter3 = rGetter2 + rKey2 = rKey1; rVer2 = rVer1; rGetter2 = rGetter1 + rKey1 = tK; rVer1 = tV; rGetter1 = tG + val rec0 = g(base, scope) + if (base is ObjClass) { + val idx0 = base.classScope?.getSlotIndexOf(name) + if (idx0 != null) { tKey = key; tVer = ver; tFrameId = scope.frameId; tRecord = rec0 } else { tRecord = null } + } else { tRecord = null } + return rec0 + } } + rGetter4?.let { g -> if (key == rKey4 && ver == rVer4) { + if (picCounters) net.sergeych.lyng.PerfStats.fieldPicHit++ + // move-to-front: promote 4→1 + val tK = rKey4; val tV = rVer4; val tG = rGetter4 + rKey4 = rKey3; rVer4 = rVer3; rGetter4 = rGetter3 + rKey3 = rKey2; rVer3 = rVer2; rGetter3 = rGetter2 + rKey2 = rKey1; rVer2 = rVer1; rGetter2 = rGetter1 + rKey1 = tK; rVer1 = tV; rGetter1 = tG val rec0 = g(base, scope) if (base is ObjClass) { val idx0 = base.classScope?.getSlotIndexOf(name) @@ -300,7 +362,9 @@ class FieldRef( // Slow path if (picCounters) net.sergeych.lyng.PerfStats.fieldPicMiss++ val rec = base.readField(scope, name) - // Install move-to-front with a handle-aware getter. Where safe, capture resolved handles. + // Install move-to-front with a handle-aware getter (shift 1→2→3→4; put new at 1) + rKey4 = rKey3; rVer4 = rVer3; rGetter4 = rGetter3 + rKey3 = rKey2; rVer3 = rVer2; rGetter3 = rGetter2 rKey2 = rKey1; rVer2 = rVer1; rGetter2 = rGetter1 when (base) { is ObjClass -> { @@ -336,6 +400,19 @@ class FieldRef( // no-op on null receiver for optional chaining assignment return } + // Read→write micro fast-path: reuse transient record captured by get() + if (fieldPic) { + val (k, v) = receiverKeyAndVersion(base) + val rec = tRecord + if (rec != null && tKey == k && tVer == v && tFrameId == scope.frameId) { + // visibility/mutability checks + if (!rec.isMutable) scope.raiseError(ObjIllegalAssignmentException(scope, "can't reassign val $name")) + if (!rec.visibility.isPublic) + scope.raiseError(ObjAccessException(scope, "can't access non-public field $name")) + if (rec.value.assign(scope, newValue) == null) rec.value = newValue + return + } + } if (fieldPic) { val (key, ver) = receiverKeyAndVersion(base) wSetter1?.let { s -> if (key == wKey1 && ver == wVer1) { @@ -344,12 +421,37 @@ class FieldRef( } } wSetter2?.let { s -> if (key == wKey2 && ver == wVer2) { if (picCounters) net.sergeych.lyng.PerfStats.fieldPicSetHit++ + // move-to-front: promote 2→1 + val tK = wKey2; val tV = wVer2; val tS = wSetter2 + wKey2 = wKey1; wVer2 = wVer1; wSetter2 = wSetter1 + wKey1 = tK; wVer1 = tV; wSetter1 = tS + return s(base, scope, newValue) + } } + wSetter3?.let { s -> if (key == wKey3 && ver == wVer3) { + if (picCounters) net.sergeych.lyng.PerfStats.fieldPicSetHit++ + // move-to-front: promote 3→1 + val tK = wKey3; val tV = wVer3; val tS = wSetter3 + wKey3 = wKey2; wVer3 = wVer2; wSetter3 = wSetter2 + wKey2 = wKey1; wVer2 = wVer1; wSetter2 = wSetter1 + wKey1 = tK; wVer1 = tV; wSetter1 = tS + return s(base, scope, newValue) + } } + wSetter4?.let { s -> if (key == wKey4 && ver == wVer4) { + if (picCounters) net.sergeych.lyng.PerfStats.fieldPicSetHit++ + // move-to-front: promote 4→1 + val tK = wKey4; val tV = wVer4; val tS = wSetter4 + wKey4 = wKey3; wVer4 = wVer3; wSetter4 = wSetter3 + wKey3 = wKey2; wVer3 = wVer2; wSetter3 = wSetter2 + wKey2 = wKey1; wVer2 = wVer1; wSetter2 = wSetter1 + wKey1 = tK; wVer1 = tV; wSetter1 = tS return s(base, scope, newValue) } } // Slow path if (picCounters) net.sergeych.lyng.PerfStats.fieldPicSetMiss++ base.writeField(scope, name, newValue) - // Install move-to-front with a handle-aware setter + // Install move-to-front with a handle-aware setter (shift 1→2→3→4; put new at 1) + wKey4 = wKey3; wVer4 = wVer3; wSetter4 = wSetter3 + wKey3 = wKey2; wVer3 = wVer2; wSetter3 = wSetter2 wKey2 = wKey1; wVer2 = wVer1; wSetter2 = wSetter1 when (base) { is ObjClass -> { @@ -409,12 +511,21 @@ class IndexRef( } override suspend fun setAt(pos: Pos, scope: Scope, newValue: Obj) { - val base = target.get(scope).value + val fastRval = net.sergeych.lyng.PerfFlags.RVAL_FASTPATH + val base = if (fastRval) target.evalValue(scope) else target.get(scope).value if (base == ObjNull && isOptional) { // no-op on null receiver for optional chaining assignment return } - val idx = index.get(scope).value + val idx = if (fastRval) index.evalValue(scope) else index.get(scope).value + if (fastRval) { + // Mirror read fast-path with direct write for ObjList + ObjInt index + if (base is ObjList && idx is ObjInt) { + val i = idx.toInt() + base.list[i] = newValue + return + } + } base.putAt(scope, idx, newValue) } } @@ -710,7 +821,9 @@ class FastLocalVarRef( class ListLiteralRef(private val entries: List) : ObjRef { override suspend fun get(scope: Scope): ObjRecord { - val list = mutableListOf() + // Heuristic capacity hint: count element entries; spreads handled opportunistically + val elemCount = entries.count { it is ListEntry.Element } + val list = ArrayList(elemCount) for (e in entries) { when (e) { is ListEntry.Element -> { @@ -720,7 +833,11 @@ class ListLiteralRef(private val entries: List) : ObjRef { is ListEntry.Spread -> { val elements = if (net.sergeych.lyng.PerfFlags.RVAL_FASTPATH) e.ref.evalValue(scope) else e.ref.get(scope).value when (elements) { - is ObjList -> list.addAll(elements.list) + is ObjList -> { + // Grow underlying array once when possible + if (list is ArrayList) list.ensureCapacity(list.size + elements.list.size) + list.addAll(elements.list) + } else -> scope.raiseError("Spread element must be list") } }