refs #35 bits in BitArray/input/output reordered for better performance; started typed serialization

This commit is contained in:
Sergey Chernov 2025-07-17 12:33:32 +03:00
parent 7aee25ffef
commit cffe4eaffc
13 changed files with 312 additions and 60 deletions

View File

@ -8,6 +8,7 @@ kotlinx-coroutines = "1.10.1"
mp_bintools = "0.1.12"
firebaseCrashlyticsBuildtools = "3.0.3"
okioVersion = "3.10.2"
compiler = "3.2.0-alpha11"
[libraries]
clikt = { module = "com.github.ajalt.clikt:clikt", version.ref = "clikt" }
@ -19,6 +20,7 @@ mp_bintools = { module = "net.sergeych:mp_bintools", version.ref = "mp_bintools"
firebase-crashlytics-buildtools = { group = "com.google.firebase", name = "firebase-crashlytics-buildtools", version.ref = "firebaseCrashlyticsBuildtools" }
okio = { module = "com.squareup.okio:okio", version.ref = "okioVersion" }
okio-fakefilesystem = { module = "com.squareup.okio:okio-fakefilesystem", version.ref = "okioVersion" }
compiler = { group = "androidx.databinding", name = "compiler", version.ref = "compiler" }
[plugins]
androidLibrary = { id = "com.android.library", version.ref = "agp" }

View File

@ -99,6 +99,7 @@ android {
}
dependencies {
implementation(libs.firebase.crashlytics.buildtools)
implementation(libs.compiler)
}
publishing {

View File

@ -3,6 +3,7 @@ package net.sergeych.lyng
import kotlinx.coroutines.delay
import net.sergeych.lyng.obj.*
import net.sergeych.lyng.pacman.ImportManager
import net.sergeych.lynon.ObjLynonClass
import kotlin.math.*
class Script(
@ -181,6 +182,9 @@ class Script(
it.addConst("Buffer", ObjBuffer.type)
it.addConst("MutableBuffer", ObjMutableBuffer.type)
}
addPackage("lyng.serialization") {
it.addConst("Lynon", ObjLynonClass)
}
addPackage("lyng.time") {
it.addConst("Instant", ObjInstant.type)
it.addConst("Duration", ObjDuration.type)

View File

@ -2,6 +2,7 @@ package net.sergeych.lyng.obj
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.flow.toList
import net.sergeych.bintools.toDump
import net.sergeych.lyng.Scope
import net.sergeych.lyng.statement
import kotlin.math.min
@ -138,6 +139,12 @@ open class ObjBuffer(val byteArray: UByteArray) : Obj() {
requireNoArgs()
ObjMutableBuffer(thisAs<ObjBuffer>().byteArray.copyOf())
}
addFn("toDump") {
requireNoArgs()
ObjString(
thisAs<ObjBuffer>().byteArray.toByteArray().toDump()
)
}
}
}
}

View File

@ -69,6 +69,9 @@ open class ObjClass(
fun addConst(name: String, value: Obj) = createField(name, value, isMutable = false)
fun addClassConst(name: String, value: Obj) = createClassField(name, value)
fun addClassFn(name: String, isOpen: Boolean = false, code: suspend Scope.() -> Obj) {
createClassField(name, statement { code() }, isOpen)
}
/**
@ -91,6 +94,10 @@ open class ObjClass(
return super.readField(scope, name)
}
override suspend fun invokeInstanceMethod(scope: Scope, name: String, args: Arguments): Obj {
return classMembers[name]?.value?.invoke(scope, this, args) ?: super.invokeInstanceMethod(scope, name, args)
}
open fun deserialize(scope: Scope, decoder: LynonDecoder): Obj = scope.raiseNotImplemented()
}

View File

@ -57,7 +57,10 @@ data class ObjString(val value: String) : Obj() {
}
override suspend fun callOn(scope: Scope): Obj {
return ObjString(this.value.sprintf(*scope.args.toKotlinList(scope).toTypedArray()))
return ObjString(this.value.sprintf(*scope.args
.toKotlinList(scope)
.map { if( it == null) "null" else it }
.toTypedArray()))
}
override suspend fun contains(scope: Scope, other: Obj): Boolean {

View File

@ -3,6 +3,8 @@ package net.sergeych.lynon
import net.sergeych.lyng.Scope
import net.sergeych.lyng.obj.Obj
import net.sergeych.lyng.obj.ObjClass
import net.sergeych.lyng.obj.ObjInt
import net.sergeych.lyng.obj.ObjNull
open class LynonDecoder(val bin: BitInput,val settings: LynonSettings = LynonSettings.default) {
@ -24,6 +26,17 @@ open class LynonDecoder(val bin: BitInput,val settings: LynonSettings = LynonSet
}
}
fun decodeAny(scope: Scope): Obj = decodeCached {
val type = LynonType.entries[bin.getBits(4).toInt()]
return when(type) {
LynonType.Null -> ObjNull
LynonType.Int0 -> ObjInt.Zero
else -> {
scope.raiseNotImplemented("lynon type $type")
}
}
}
fun unpackObject(scope: Scope, type: ObjClass): Obj {
return decodeCached { type.deserialize(scope, this) }
}

View File

@ -2,24 +2,82 @@ package net.sergeych.lynon
import net.sergeych.lyng.Scope
import net.sergeych.lyng.obj.Obj
import net.sergeych.lyng.obj.ObjInt
import net.sergeych.lyng.obj.ObjNull
enum class LynonType {
Null,
Int0,
IntNegative,
IntPositive,
String,
Real,
Bool,
List,
Map,
Set,
Buffer,
Instant,
Duration,
Other;
}
open class LynonEncoder(val bout: BitOutput,val settings: LynonSettings = LynonSettings.default) {
val cache = mutableMapOf<Any, Int>()
private inline fun encodeCached(item: Any, packer: LynonEncoder.() -> Unit) {
if (item is Obj) {
cache[item]?.let { cacheId ->
private suspend fun encodeCached(item: Any, packer: suspend LynonEncoder.() -> Unit) {
suspend fun serializeAndCache(key: Any=item) {
bout.putBit(0)
if( settings.shouldCache(item) )
cache[key] = cache.size
packer()
}
when(item) {
is Obj -> cache[item]?.let { cacheId ->
val size = sizeInBits(cache.size)
bout.putBit(1)
bout.putBits(cacheId.toULong(), size)
} ?: run {
bout.putBit(0)
if (settings.shouldCache(item))
cache[item] = cache.size
packer()
} ?: serializeAndCache()
is ByteArray, is UByteArray -> serializeAndCache()
}
}
/**
* Encode any Lyng object [Obj], which can be serialized, using type record. This allow to
* encode any object with the overhead of type record.
*
* Caching is used automatically.
*/
suspend fun encodeAny(scope: Scope,value: Obj) {
encodeCached(value) {
when(value) {
is ObjNull -> putType(LynonType.Null)
is ObjInt -> {
when {
value.value == 0L -> putType(LynonType.Int0)
value.value < 0 -> {
putType(LynonType.IntNegative)
encodeUnsigned((-value.value).toULong())
}
else -> {
putType(LynonType.IntPositive)
encodeUnsigned(value.value.toULong())
}
}
}
else -> {
TODO()
}
}
}
}
private fun putType(type: LynonType) {
bout.putBits(type.ordinal.toULong(), 4)
}
suspend fun encodeObj(scope: Scope, obj: Obj) {

View File

@ -1,16 +1,20 @@
package net.sergeych.lynon
import net.sergeych.lyng.obj.Obj
import net.sergeych.lyng.obj.ObjBool
import net.sergeych.lyng.obj.ObjChar
import net.sergeych.lyng.obj.ObjInt
import net.sergeych.lyng.obj.ObjNull
import kotlin.math.absoluteValue
open class LynonSettings() {
open fun shouldCache(obj: Obj): Boolean = when (obj) {
open fun shouldCache(obj: Any): Boolean = when (obj) {
is ObjChar -> false
is ObjInt -> obj.value > 0x10000FF
is ObjInt -> obj.value.absoluteValue > 0x10000FF
is ObjBool -> false
is ObjNull -> false
is ByteArray -> obj.size > 2
is UByteArray -> obj.size > 2
else -> true
}

View File

@ -8,30 +8,31 @@ class MemoryBitInput(val packedBits: UByteArray, val lastByteBits: Int) : BitInp
private var index = 0
private var isEndOfStream: Boolean = packedBits.isEmpty() || (packedBits.size == 1 && lastByteBits == 0)
private set
/**
* Return next byte, int in 0..255 range, or -1 if end of stream reached
*/
private var accumulator = 0
private var accumulator = if( isEndOfStream ) 0 else packedBits[0].toInt()
private var isEndOfStream: Boolean = false
private set
private var mask = 0
private var bitCounter = 0
override fun getBitOrNull(): Int? {
if (isEndOfStream) return null
if (mask == 0) {
if (index < packedBits.size) {
accumulator = packedBits[index++].toInt()
val n = if (index == packedBits.size) lastByteBits else 8
mask = 1 shl (n - 1)
} else {
val result = accumulator and 1
accumulator = accumulator shr 1
bitCounter++
// is end?
if( index == packedBits.lastIndex && bitCounter == lastByteBits ) {
isEndOfStream = true
return null
}
else {
if( bitCounter == 8 ) {
bitCounter = 0
accumulator = packedBits[++index].toInt()
}
}
val result = if (0 == accumulator and mask) 0 else 1
mask = mask shr 1
return result
}

View File

@ -4,11 +4,13 @@ import kotlin.math.min
/**
* BitList implementation as fixed suze array of bits; indexing works exactly same as if
* [MemoryBitInput] is used with [MemoryBitInput.getBit].
* [MemoryBitInput] is used with [MemoryBitInput.getBit]. See [MemoryBitOutput] for
* bits order and more information.
*/
class BitArray(val bytes: UByteArray, val lastByteBits: Int) : BitList {
val bytesSize: Int get() = bytes.size
override val size by lazy { bytes.size * 8L - (8 - lastByteBits) }
override val indices by lazy { 0..<size }
@ -23,15 +25,9 @@ class BitArray(val bytes: UByteArray, val lastByteBits: Int) : BitList {
if (byteIndex !in bytes.indices)
throw IndexOutOfBoundsException("$bitIndex is out of bounds")
val i = (bitIndex % 8).toInt()
return byteIndex to (
if (byteIndex == bytes.lastIndex) {
if (i >= lastByteBits)
if (byteIndex == bytes.lastIndex && i >= lastByteBits)
throw IndexOutOfBoundsException("$bitIndex is out of bounds (last)")
1 shl (lastByteBits - i - 1)
} else {
1 shl (7 - i)
}
)
return byteIndex to (1 shl i)
}
override operator fun get(bitIndex: Long): Int =
@ -56,6 +52,10 @@ class BitArray(val bytes: UByteArray, val lastByteBits: Int) : BitList {
return result.toString()
}
fun asByteArray(): ByteArray = bytes.asByteArray()
fun asUbyteArray(): UByteArray = bytes
companion object {
fun withBitSize(size: Long): BitArray {
@ -75,35 +75,40 @@ class BitArray(val bytes: UByteArray, val lastByteBits: Int) : BitList {
}
/**
* [BitOutput] implementation that writes to a memory buffer, LSB first.
*
* Bits are stored in the least significant bits of the bytes. E.g. the first bit
* added by [putBit] will be stored in the bit 0x01 of the first byte, the second bit
* in the bit 0x02 of the first byte, etc.
*
* This allow automatic fill of the last byte with zeros. This is important when
* using bytes stored from [asByteArray] or [asUbyteArray]. When converting to
* bytes, automatic padding to byte size is applied. With such bit order, constrinting
* [BitInput] to read from [asByteArray] result only provides 0 to 7 extra zeroes bits
* at teh end which is often acceptable. To avoid this, use [toBitArray]; the [BitArray]
* stores exact number of bits and [BitArray.toBitInput] provides [BitInput] that
* decodes exactly same bits.
*
*/
class MemoryBitOutput : BitOutput {
private val buffer = mutableListOf<UByte>()
private var accumulator = 0
/**
* Number of bits in accumulator. After output is closed by [close] this value is
* not changed and represents the number of bits in the last byte; this should
* be used to properly calculate end of the bit stream
*/
private var accumulatorBits = 0
private set
// /**
// * When [close] is called, represents the number of used bits in the last byte;
// * bits after this number are the garbage and should be ignored
// */
// val lastByteBits: Int
// get() {
// if (!isClosed) throw IllegalStateException("BitOutput is not closed")
// return accumulatorBits
// }
private var mask = 1
override fun putBit(bit: Int) {
accumulator = (accumulator shl 1) or bit
if (++accumulatorBits >= 8) {
when (bit) {
0 -> {}
1 -> accumulator = accumulator or mask
else -> throw IllegalArgumentException("Bit must be 0 or 1")
}
mask = mask shl 1
if(mask == 0x100) {
mask = 1
outputByte(accumulator.toUByte())
accumulator = accumulator shr 8
accumulatorBits = 0
}
}
@ -112,19 +117,34 @@ class MemoryBitOutput : BitOutput {
fun close(): BitArray {
if (!isClosed) {
if (accumulatorBits > 0) {
if (mask != 0x01) {
outputByte(accumulator.toUByte())
} else accumulatorBits = 8
}
isClosed = true
}
return toBitArray()
}
fun lastBits(): Int {
check(isClosed)
return when(mask) {
0x01 -> 8 // means that all bits of the last byte are in use
0x02 -> 1
0x04 -> 2
0x08 -> 3
0x10 -> 4
0x20 -> 5
0x40 -> 6
0x80 -> 7
else -> throw IllegalStateException("Invalid state, mask=${mask.toString(16)}")
}
}
fun toBitArray(): BitArray {
if (!isClosed) {
close()
}
return BitArray(buffer.toTypedArray().toUByteArray(), accumulatorBits)
return BitArray(buffer.toTypedArray().toUByteArray(), lastBits())
}
fun toBitInput(): BitInput = toBitArray().toBitInput()

View File

@ -0,0 +1,35 @@
package net.sergeych.lynon
import net.sergeych.lyng.Scope
import net.sergeych.lyng.obj.Obj
import net.sergeych.lyng.obj.ObjBuffer
import net.sergeych.lyng.obj.ObjClass
import net.sergeych.lyng.obj.ObjString
// Most often used types:
val ObjLynonClass = object : ObjClass("Lynon") {
suspend fun Scope.encodeAny(obj: Obj): Obj {
val bout = MemoryBitOutput()
val serializer = LynonEncoder(bout)
serializer.encodeAny(this, obj)
return ObjBuffer(bout.toBitArray().bytes)
}
suspend fun Scope.decodeAny(buffer: ObjBuffer): Obj {
val bin = BitArray(buffer.byteArray,8).toInput()
val deserializer = LynonDecoder(bin)
return deserializer.decodeAny(this)
}
}.apply {
addClassConst("test", ObjString("test_const"))
addClassFn("encode") {
encodeAny(requireOnlyArg<Obj>())
}
addClassFn("decode") {
decodeAny(requireOnlyArg<ObjBuffer>())
}
}

View File

@ -1,6 +1,8 @@
import junit.framework.TestCase.*
import kotlinx.coroutines.test.runTest
import net.sergeych.bintools.encodeToHex
import net.sergeych.lyng.Scope
import net.sergeych.lyng.eval
import net.sergeych.lyng.obj.*
import net.sergeych.lynon.*
import java.nio.file.Files
@ -30,6 +32,62 @@ class LynonTests {
assertEquals(4, sizeInBits(15u))
}
@Test
fun testBitOutputSmall() {
val bout = MemoryBitOutput()
bout.putBit(1)
bout.putBit(1)
bout.putBit(0)
bout.putBit(1)
val x = bout.toBitArray()
assertEquals(1, x[0])
assertEquals(1, x[1])
assertEquals(0, x[2])
assertEquals(1, x[3])
assertEquals(4, x.size)
assertEquals("1101", x.toString())
val bin = MemoryBitInput(x)
assertEquals(1, bin.getBit())
assertEquals(1, bin.getBit())
assertEquals(0, bin.getBit())
assertEquals(1, bin.getBit())
assertEquals(null, bin.getBitOrNull())
}
@Test
fun testBitOutputMedium() {
val bout = MemoryBitOutput()
bout.putBit(1)
bout.putBit(1)
bout.putBit(0)
bout.putBit(1)
bout.putBits( 0, 7)
bout.putBits( 3, 2)
val x = bout.toBitArray()
assertEquals(1, x[0])
assertEquals(1, x[1])
assertEquals(0, x[2])
assertEquals(1, x[3])
assertEquals(13, x.size)
assertEquals("1101000000011", x.toString())
println(x.bytes.encodeToHex())
val bin = MemoryBitInput(x)
assertEquals(1, bin.getBit())
assertEquals(1, bin.getBit())
assertEquals(0, bin.getBit())
assertEquals(1, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
// assertEquals(0, bin.getBit())
assertEquals(0UL, bin.getBits(7))
assertEquals(3UL, bin.getBits(2))
assertEquals(null, bin.getBitOrNull())
}
@Test
fun testBitStreams() {
@ -213,6 +271,45 @@ class LynonTests {
val original = Files.readString(Path.of("../sample_texts/dikkens_hard_times.txt"))
@Test
fun testEncodeNullsAndInts() = runTest{
testScope().eval("""
testEncode(null)
testEncode(0)
""".trimIndent())
}
@Test
fun testBufferEncoderInterop() = runTest{
val bout = MemoryBitOutput()
bout.putBits(0, 1)
bout.putBits(1, 4)
val bin = MemoryBitInput(bout.toBitArray().bytes, 8)
assertEquals(0UL, bin.getBits(1))
assertEquals(1UL, bin.getBits(4))
}
suspend fun testScope() =
Scope().apply { eval("""
import lyng.serialization
fun testEncode(value) {
val encoded = Lynon.encode(value)
println(encoded.toDump())
println("Encoded size %d: %s"(encoded.size, value))
assertEquals( value, Lynon.decode(encoded) )
}
""".trimIndent())
}
@Test
fun testIntsNulls() = runTest{
eval("""
import lyng.serialization
assertEquals( null, Lynon.decode(Lynon.encode(null)) )
""".trimIndent())
}
@Test
fun testLzw() {
// Example usage