list serialization (optimized, homogenous and heterogeneous

This commit is contained in:
Sergey Chernov 2025-08-03 13:29:33 +03:00
parent 12b209c724
commit d7bd159fcb
9 changed files with 164 additions and 31 deletions

1
.gitignore vendored
View File

@ -14,3 +14,4 @@ xcuserdata
.gigaide .gigaide
/kotlin-js-store/yarn.lock /kotlin-js-store/yarn.lock
/test.lyng /test.lyng
/sample_texts/1.txt.gz

View File

@ -366,6 +366,7 @@ object ObjNull : Obj() {
override suspend fun lynonType(): LynonType { override suspend fun lynonType(): LynonType {
return LynonType.Null return LynonType.Null
} }
override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) { override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
if (lynonType == null) { if (lynonType == null) {
encoder.putBit(0) encoder.putBit(0)

View File

@ -82,9 +82,7 @@ open class ObjBuffer(val byteArray: UByteArray) : Obj() {
override suspend fun lynonType(): LynonType = LynonType.Buffer override suspend fun lynonType(): LynonType = LynonType.Buffer
override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) { override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
encoder.encodeCached(byteArray) { encoder.encodeCachedBytes(byteArray.asByteArray())
bout.compress(byteArray.asByteArray())
}
} }
companion object { companion object {

View File

@ -117,6 +117,7 @@ class ObjInt(var value: Long, override val isConst: Boolean = false) : Obj(), Nu
LynonType.Int0 -> {} LynonType.Int0 -> {}
LynonType.IntPositive -> encoder.encodeUnsigned(value.toULong()) LynonType.IntPositive -> encoder.encodeUnsigned(value.toULong())
LynonType.IntNegative -> encoder.encodeUnsigned((-value).toULong()) LynonType.IntNegative -> encoder.encodeUnsigned((-value).toULong())
LynonType.IntSigned -> encoder.encodeSigned(value)
else -> scope.raiseIllegalArgument("Unsupported lynon type code for Int: $lynonType") else -> scope.raiseIllegalArgument("Unsupported lynon type code for Int: $lynonType")
} }
} }
@ -131,6 +132,7 @@ class ObjInt(var value: Long, override val isConst: Boolean = false) : Obj(), Nu
LynonType.Int0 -> Zero LynonType.Int0 -> Zero
LynonType.IntPositive -> ObjInt(decoder.unpackUnsigned().toLong()) LynonType.IntPositive -> ObjInt(decoder.unpackUnsigned().toLong())
LynonType.IntNegative -> ObjInt(-decoder.unpackUnsigned().toLong()) LynonType.IntNegative -> ObjInt(-decoder.unpackUnsigned().toLong())
LynonType.IntSigned -> ObjInt(decoder.unpackSigned())
else -> scope.raiseIllegalState("illegal type code for Int: $lynonType") else -> scope.raiseIllegalState("illegal type code for Int: $lynonType")
} }
} }

View File

@ -2,6 +2,9 @@ package net.sergeych.lyng.obj
import net.sergeych.lyng.Scope import net.sergeych.lyng.Scope
import net.sergeych.lyng.statement import net.sergeych.lyng.statement
import net.sergeych.lynon.LynonDecoder
import net.sergeych.lynon.LynonEncoder
import net.sergeych.lynon.LynonType
class ObjList(val list: MutableList<Obj> = mutableListOf()) : Obj() { class ObjList(val list: MutableList<Obj> = mutableListOf()) : Obj() {
@ -125,9 +128,18 @@ class ObjList(val list: MutableList<Obj> = mutableListOf()) : Obj() {
return list == other.list return list == other.list
} }
companion object { override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
val type = ObjClass("List", ObjArray).apply { encoder.encodeAnyList(scope,list)
}
override suspend fun lynonType(): LynonType = LynonType.List
companion object {
val type = object : ObjClass("List", ObjArray) {
override suspend fun deserialize(scope: Scope, decoder: LynonDecoder, lynonType: LynonType?): Obj {
return ObjList(decoder.decodeAnyList(scope))
}
}.apply {
createField("size", createField("size",
statement { statement {
(thisObj as ObjList).list.size.toObj() (thisObj as ObjList).list.size.toObj()

View File

@ -84,17 +84,14 @@ data class ObjString(val value: String) : Obj() {
override suspend fun lynonType(): LynonType = LynonType.String override suspend fun lynonType(): LynonType = LynonType.String
override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) { override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
val data = value.encodeToByteArray() encoder.encodeBinaryData(value.encodeToByteArray())
encoder.encodeCached(data) { encoder.encodeBinaryData(data) }
} }
companion object { companion object {
val type = object : ObjClass("String") { val type = object : ObjClass("String") {
override suspend fun deserialize(scope: Scope, decoder: LynonDecoder, lynonType: LynonType?): Obj = override suspend fun deserialize(scope: Scope, decoder: LynonDecoder, lynonType: LynonType?): Obj =
decoder.decodeCached {
ObjString(decoder.unpackBinaryData().decodeToString()) ObjString(decoder.unpackBinaryData().decodeToString())
}
}.apply { }.apply {
addFn("toInt") { addFn("toInt") {
ObjInt(thisAs<ObjString>().value.toLong()) ObjInt(thisAs<ObjString>().value.toLong())

View File

@ -17,10 +17,11 @@ open class LynonDecoder(val bin: BitInput, val settings: LynonSettings = LynonSe
val cache = mutableListOf<Any>() val cache = mutableListOf<Any>()
inline fun <T : Any>decodeCached(f: LynonDecoder.() -> T): T { inline fun <reified T : Any>decodeCached(f: LynonDecoder.() -> T): T {
return if (bin.getBit() == 0) { return if (bin.getBit() == 0) {
// unpack and cache // unpack and cache
f().also { f().also {
// println("decode: cache miss: ${cache.size}: $it:${it::class.simpleName}")
if (settings.shouldCache(it)) cache.add(it) if (settings.shouldCache(it)) cache.add(it)
} }
} else { } else {
@ -29,7 +30,8 @@ open class LynonDecoder(val bin: BitInput, val settings: LynonSettings = LynonSe
val id = bin.getBitsOrNull(size)?.toInt() val id = bin.getBitsOrNull(size)?.toInt()
?: throw RuntimeException("Invalid object id: unexpected end of stream") ?: throw RuntimeException("Invalid object id: unexpected end of stream")
if (id >= cache.size) throw RuntimeException("Invalid object id: $id should be in 0..<${cache.size}") if (id >= cache.size) throw RuntimeException("Invalid object id: $id should be in 0..<${cache.size}")
@Suppress("UNCHECKED_CAST") // println("decode: cache hit ${id}: ${cache[id]}:${cache[id]::class.simpleName}")
// @Suppress("UNCHECKED_CAST")
cache[id] as T cache[id] as T
} }
} }
@ -39,8 +41,29 @@ open class LynonDecoder(val bin: BitInput, val settings: LynonSettings = LynonSe
type.objClass.deserialize(scope, this, type) type.objClass.deserialize(scope, this, type)
} }
suspend fun decodeObject(scope: Scope, type: ObjClass): Obj { suspend fun decodeAnyList(scope: Scope): MutableList<Obj> {
return decodeCached { type.deserialize(scope, this, null) } return if( bin.getBit() == 1) {
// homogenous
val type = LynonType.entries[getBitsAsInt(4)]
val size = bin.unpackUnsigned().toInt()
println("detected homogenous list type $type, $size items")
val list = mutableListOf<Obj>()
val objClass = type.objClass
for( i in 0 ..< size) {
list += decodeObject(scope, objClass, type).also {
println("decoded: $it")
}
}
list
}
else {
val size = unpackUnsigned().toInt()
(0..<size).map { decodeAny(scope) }.toMutableList()
}
}
suspend fun decodeObject(scope: Scope, type: ObjClass,overrideType: LynonType?=null): Obj {
return decodeCached { type.deserialize(scope, this, overrideType) }
} }
fun unpackBinaryData(): ByteArray = bin.decompress() fun unpackBinaryData(): ByteArray = bin.decompress()

View File

@ -4,21 +4,45 @@ import net.sergeych.bintools.ByteChunk
import net.sergeych.lyng.Scope import net.sergeych.lyng.Scope
import net.sergeych.lyng.obj.* import net.sergeych.lyng.obj.*
enum class LynonType(val objClass: ObjClass,val defaultFrequency: Int = 1) { enum class LynonType(val objClass: ObjClass, val defaultFrequency: Int = 1) {
Null(ObjNull.objClass, 80), Null(ObjNull.objClass, 80),
Int0(ObjInt.type, 70), Int0(ObjInt.type, 70),
IntNegative(ObjInt.type, 50), IntNegative(ObjInt.type, 50),
IntPositive(ObjInt.type, 100), IntPositive(ObjInt.type, 100),
IntSigned(ObjInt.type, 30),
String(ObjString.type, 100), String(ObjString.type, 100),
Real(ObjReal.type), Real(ObjReal.type),
Bool(ObjBool.type, 80), Bool(ObjBool.type, 80),
List(ObjList.type, 70), List(ObjList.type, 70),
Map(ObjMap.type,40), Map(ObjMap.type, 40),
Set(ObjSet.type), Set(ObjSet.type),
Buffer(ObjBuffer.type, 50), Buffer(ObjBuffer.type, 50),
Instant(ObjInstant.type, 30), Instant(ObjInstant.type, 30),
Duration(ObjDuration.type), Duration(ObjDuration.type),
Other(Obj.rootObjectType,60); Other(Obj.rootObjectType, 60);
fun generalizeTo(other: LynonType): LynonType? {
if (this == other) return this
return (if (this.isInt && other.isInt) {
when {
this == Int0 -> other // upgrade 0 to some other int
other == Int0 -> this // 0 is member of our class, ignore
// different signum propagate to signed
else -> IntSigned
}
} else
// impossible to generalize
null
).also { println("Gen $this + $other -> $it") }
}
val isInt by lazy {
when (this) {
Int0, IntSigned, IntPositive, IntNegative -> true
else -> false
}
}
} }
open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = LynonSettings.default) { open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = LynonSettings.default) {
@ -29,14 +53,20 @@ open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = Lynon
suspend fun serializeAndCache(key: Any = item) { suspend fun serializeAndCache(key: Any = item) {
cache[key]?.let { cacheId -> cache[key]?.let { cacheId ->
// println("encode: Cache hit: ${cacheId}: $item: ${item::class.simpleName}")
val size = sizeInBits(cache.size) val size = sizeInBits(cache.size)
bout.putBit(1) bout.putBit(1)
bout.putBits(cacheId.toULong(), size) bout.putBits(cacheId.toULong(), size)
} ?: run { } ?: run {
bout.putBit(0) bout.putBit(0)
if (settings.shouldCache(item)) if (settings.shouldCache(item)) {
cache[key] = cache.size // println("encode add cache: ${cache.size}: $item: ${item::class.simpleName}")
packer() packer()
cache[key] = cache.size
} else {
// println("encode but not cache $item")
packer()
}
} }
} }
@ -53,21 +83,66 @@ open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = Lynon
* *
* Caching is used automatically. * Caching is used automatically.
*/ */
suspend fun encodeAny(scope: Scope, value: Obj) { suspend fun encodeAny(scope: Scope, obj: Obj) {
encodeCached(value) { encodeCached(obj) {
val type = value.lynonType() val type = putTypeRecord(obj, obj.lynonType())
putType(type) obj.serialize(scope, this, type)
value.serialize(scope, this, type)
} }
} }
private fun putTypeRecord(obj: Obj, type: LynonType): LynonType {
putType(type)
return type
}
private fun putType(type: LynonType) { private fun putType(type: LynonType) {
bout.putBits(type.ordinal.toULong(), 4) bout.putBits(type.ordinal.toULong(), 4)
} }
suspend fun encodeObject(scope: Scope, obj: Obj) { /**
* AnyList could be homogenous (first bit=1) and heterogeneous (first bit=0). Homogenous list
* has a single type record that precedes the list, heterogeneous hash typed record
* for each item.
*
*/
suspend fun encodeAnyList(scope: Scope, list: List<Obj>) {
val objClass = list[0].objClass
var type = list[0].lynonType()
var isHomogeneous = true
for (i in list.drop(1))
if (i.objClass != objClass) {
isHomogeneous = false
break
} else {
// same class but type might need generalization
type = type.generalizeTo(i.lynonType())
?: scope.raiseError("inner error: can't generalize lynon type $type to ${i.lynonType()}")
}
if (isHomogeneous) {
putBit(1)
putTypeRecord(list[0], type)
encodeUnsigned(list.size.toULong())
for (i in list) encodeObject(scope, i, type)
} else {
putBit(0)
encodeUnsigned(list.size.toULong())
for (i in list) encodeAny(scope, i)
}
}
/**
* Write object _with no type record_: type is known
*/
suspend fun encodeObject(scope: Scope, obj: Obj,overrideType: LynonType? = null) {
encodeCached(obj) { encodeCached(obj) {
obj.serialize(scope, this, null) obj.serialize(scope, this, overrideType)
}
}
suspend fun encodeCachedBytes(bytes: ByteArray) {
encodeCached(bytes) {
bout.compress(bytes)
} }
} }

View File

@ -1,4 +1,5 @@
import junit.framework.TestCase.* import junit.framework.TestCase.assertNotSame
import junit.framework.TestCase.assertSame
import kotlinx.coroutines.test.runTest import kotlinx.coroutines.test.runTest
import net.sergeych.bintools.encodeToHex import net.sergeych.bintools.encodeToHex
import net.sergeych.lyng.Scope import net.sergeych.lyng.Scope
@ -9,6 +10,8 @@ import java.nio.file.Files
import java.nio.file.Path import java.nio.file.Path
import kotlin.test.Test import kotlin.test.Test
import kotlin.test.assertContentEquals import kotlin.test.assertContentEquals
import kotlin.test.assertEquals
class LynonTests { class LynonTests {
@Test @Test
@ -144,6 +147,15 @@ class LynonTests {
assertEquals(1471792L, bin.unpackSigned()) assertEquals(1471792L, bin.unpackSigned())
} }
@Test
fun testObjStringAndStringKeys() = runTest {
val s = "foo"
val sobj = ObjString("foo")
val map = mutableMapOf(s to 1, sobj to 2)
assertEquals(1, map[s])
assertEquals(2, map[sobj])
}
@Test @Test
fun testCache1() = runTest { fun testCache1() = runTest {
val bout = MemoryBitOutput() val bout = MemoryBitOutput()
@ -398,14 +410,12 @@ class LynonTests {
val alphabet = object : Huffman.Alphabet<LynonType> { val alphabet = object : Huffman.Alphabet<LynonType> {
override val maxOrdinal = LynonType.entries.size override val maxOrdinal = LynonType.entries.size
// val bitSize = sizeInBits(maxOrdinal)
override fun decodeOrdinalTo(bout: BitOutput, ordinal: Int) { override fun decodeOrdinalTo(bout: BitOutput, ordinal: Int) {
TODO("Not yet implemented") throw NotImplementedError()
} }
override fun get(ordinal: Int): LynonType { override fun get(ordinal: Int): LynonType {
TODO("Not yet implemented") return LynonType.entries[ordinal]
} }
override fun ordinalOf(value: LynonType): Int = value.ordinal override fun ordinalOf(value: LynonType): Int = value.ordinal
@ -474,5 +484,19 @@ class LynonTests {
assertEquals(src3, bin.decompressString()) assertEquals(src3, bin.decompressString())
} }
@Test
fun testIntList() = runTest {
testScope().eval("""
// testEncode([1,2,3])
// testEncode([-1,-2,-3])
// testEncode([1,-2,-3])
// testEncode([0,1])
// testEncode([0,0,0])
// testEncode(["the", "the", "wall", "the", "wall", "wall"])
testEncode([1,2,3, "the", "wall", "wall"])
""".trimIndent())
}
} }