diff --git a/README.md b/README.md index 2cac10f..b9a7556 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,10 @@ Goals: - Pack/unpack byte tools - 2 flavors of space-effecient varint packing - CRCs and CRC-protected blocks -- Async variants \ No newline at end of file +- Async variants + +# Some publics + +## SmartInt codec + +Variable-length signed and unsigned integer codec, see `object SmartInt`. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte. \ No newline at end of file diff --git a/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt b/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt index 76c9724..28c8af8 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt @@ -1,8 +1,8 @@ package net.sergeych.bintools -abstract class DataSink { +interface DataSink { - abstract fun writeByte(data: Byte) + fun writeByte(data: Byte) fun writeByte(data: Int) = writeByte(data.toByte()) @@ -16,7 +16,7 @@ abstract class DataSink { } } -class ArrayDataSink : DataSink() { +class ArrayDataSink : DataSink { private val result = mutableListOf() override fun writeByte(data: Byte) { diff --git a/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt b/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt index 4338e2e..91fc31a 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt @@ -6,16 +6,16 @@ package net.sergeych.bintools * like multiplatform version of DataInput * */ -abstract class DataSource { +interface DataSource { - abstract fun readByte(): Byte + fun readByte(): Byte - abstract val position: Int + val position: Int - open fun readUByte() = readByte().toUByte() + fun readUByte() = readByte().toUByte() @Suppress("unused") - open fun readBytes(size: Int): ByteArray = + fun readBytes(size: Int): ByteArray = ByteArray(size).also { a -> for( i in 0..size) a[i] = readByte() @@ -23,7 +23,7 @@ abstract class DataSource { } fun ByteArray.toDataSource(): DataSource = - object : DataSource() { + object : DataSource { override var position = 0 override fun readByte(): Byte = this@toDataSource[position++] diff --git a/src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt b/src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt new file mode 100644 index 0000000..ce0528f --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt @@ -0,0 +1,87 @@ +package net.sergeych.bintools + +import com.icodici.ubdata.Varint +import kotlin.reflect.typeOf + +/** + * The common interface to whatever variable (or even fixed) length integer encoder. + * Implementation can just override [encodeUnsigned] and [decodeUnsigned] pair and + * get the rest (incliding signed codec) out of the box. DRY for [Smartint] and [Varint] + * codecs. + */ +interface IntCodec { + fun encodeUnsigned(value: ULong,sink: DataSink) + + fun decodeUnsigned(source: DataSource): ULong + + /** + * Default signed codec uses bit 0 as a sign (to keep packed as small as possible) + */ + fun encodeSigned(value: Long, sink: DataSink): Unit { + var sigBit: ULong + var x: ULong + if (value < 0) { + x = (-value).toULong() + sigBit = 1u + } else { + x = value.toULong() + sigBit = 0u + } + encodeUnsigned((x shl 1) or sigBit, sink) + } + + /** + * Default signed codec uses bit 0 as a sign (to keep packed as small as possible) + */ + fun decodeSigned(source: DataSource): Long { + val x = decodeUnsigned(source) + val result = (x shr 1).toLong() + return if ((x and 1u).toInt() != 0) -result else result + } + + fun encodeUnsigned(value: ULong): ByteArray { + return ArrayDataSink().also { encodeUnsigned(value, it) }.toByteArray() + } + + fun decodeUnsigned(packed: ByteArray) = decodeUnsigned(packed.toDataSource()) + + fun encodeSigned(value: Long): ByteArray { + return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray() + } + + fun decodeSigned(data: ByteArray): Long { + return decodeSigned(data.toDataSource()) + } +} + +inline fun IntCodec.decode(source: ByteArray): T { + return decode(source.toDataSource()) +} + +inline fun IntCodec.decode(source: DataSource): T { + return when (typeOf()) { + typeOf() -> decodeUnsigned(source).toUByte() + typeOf() -> decodeUnsigned(source).toUInt() + typeOf() -> decodeUnsigned(source).toULong() + typeOf() -> decodeSigned(source).toByte() + typeOf() -> decodeSigned(source).toInt() + typeOf() -> decodeSigned(source).toLong() + else -> + throw IllegalArgumentException("can't decode to ${T::class.simpleName}") + } as T +} + +inline fun IntCodec.encode(x: T, dout: DataSink) { + when (x) { + is UByte -> encodeUnsigned(x.toULong(), dout) + is UInt -> encodeUnsigned(x.toULong(), dout) + is ULong -> encodeUnsigned(x, dout) + is Byte -> encodeSigned(x.toLong(), dout) + is Int -> encodeSigned(x.toLong(), dout) + is Long -> encodeSigned(x, dout) + else -> throw IllegalArgumentException("can't encode with varitn ${x::class.simpleName}: $x") + } +} + +inline fun IntCodec.encode(x: T): ByteArray = + ArrayDataSink().also { encode(x, it) }.toByteArray() diff --git a/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt index a66a877..6d599cc 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt @@ -29,11 +29,12 @@ private val hexDigits = "0123456789ABCDEF" fun Long.encodeToHex(length: Int = 0): String { var result = "" var value = this - if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)") + var end = if( value >= 0 ) 0L else -1L +// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)") do { result = hexDigits[(value and 0x0f).toInt()] + result value = value shr 4 - } while (value > 0) + } while (value != end) while (result.length < length) result = "0" + result return result } diff --git a/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt b/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt index 8ff7623..45d675c 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt @@ -2,13 +2,12 @@ package com.icodici.ubdata -import net.sergeych.bintools.ArrayDataSink -import net.sergeych.bintools.DataSink -import net.sergeych.bintools.DataSource -import net.sergeych.bintools.toDataSource +import net.sergeych.bintools.* /** - * Smart variable-length long encoding tools, async. + * Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers + * so it is very useful when encoding big numbers or at least very bui long values. In other cases + * [Varint] works faster, and extra bits it uses does not play * * | Bytes sz | varint bits | smartint bits | * |:-----:|:------:|:---------:| @@ -49,13 +48,13 @@ import net.sergeych.bintools.toDataSource * sequence. * */ -object Smartint { +object Smartint : IntCodec { private val v0limit: ULong = (1L shl 6).toULong() private val v1limit = (1L shl 14).toULong() private val v2limit = (1L shl 22).toULong() - fun encode(value: ULong, sink: DataSink) { + override fun encodeUnsigned(value: ULong, sink: DataSink) { when { value < v0limit -> encodeSeq(sink, 0, value) @@ -82,7 +81,7 @@ object Smartint { (value shr 6) and 0xFFu, (value shr 14) and 0xFFu, ) - Varint.encode(value shr 22, sink) + Varint.encodeUnsigned(value shr 22, sink) } } } @@ -98,7 +97,7 @@ object Smartint { } } - fun decode(source: DataSource): ULong { + override fun decodeUnsigned(source: DataSource): ULong { fun get(): ULong = source.readUByte().toULong() val first = get().toUInt() var type = (first and 3u).toInt() @@ -112,39 +111,7 @@ object Smartint { result = result or (get() shl 14) if (type == 0) return result // type 2 - return result or (Varint.decode(source) shl 22) + return result or (Varint.decodeUnsigned(source) shl 22) } - fun encodeSigned(value: Long, sink: DataSink) { - val sigBit: ULong - val x: ULong - if (value < 0) { - x = (-value).toULong() - sigBit = 1u - } else { - x = value.toULong() - sigBit = 0u - } - Varint.encode((x shl 1) or sigBit, sink) - } - - fun decodeSigned(source: DataSource): Long { - val x = Varint.decode(source) - val result = (x shr 1).toLong() - return if ((x and 1u).toInt() != 0) -result else result - } - - fun encode(value: ULong): ByteArray { - return ArrayDataSink().also { encode(value, it) }.toByteArray() - } - - fun decode(packed: ByteArray) = decode(packed.toDataSource()) - - fun encodeSigned(value: Long): ByteArray { - return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray() - } - - fun decodeSigned(data: ByteArray): Long { - return decodeSigned(data.toDataSource()) - } } diff --git a/src/commonMain/kotlin/net.sergeych.bintools/varint.kt b/src/commonMain/kotlin/net.sergeych.bintools/varint.kt index 7869533..7aef94e 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/varint.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/varint.kt @@ -2,10 +2,7 @@ package com.icodici.ubdata -import net.sergeych.bintools.ArrayDataSink -import net.sergeych.bintools.DataSink -import net.sergeych.bintools.DataSource -import net.sergeych.bintools.toDataSource +import net.sergeych.bintools.* /** * Variable-length long integer encoding. the MSB (0x80) bit of each byte flags @@ -16,21 +13,8 @@ import net.sergeych.bintools.toDataSource * encoding numbers that needs more than 22 bits. With smaller numbers its either * same or even worse, see [Smartint] docs. */ -object Varint { - fun encodeSigned(value: Long, sink: DataSink): Unit { - var sigBit: ULong - var x: ULong - if (value < 0) { - x = (-value).toULong() - sigBit = 1u - } else { - x = value.toULong() - sigBit = 0u - } - encode((x shl 1) or sigBit, sink) - } - - fun encode(value: ULong, dout: DataSink) { +object Varint: IntCodec { + override fun encodeUnsigned(value: ULong, dout: DataSink) { var rest = value do { val x = (rest and 127u).toInt() @@ -43,7 +27,7 @@ object Varint { } while (rest > 0u) } - fun decode(source: DataSource): ULong { + override fun decodeUnsigned(source: DataSource): ULong { var result: ULong = 0u var count = 0 while (true) { @@ -55,28 +39,4 @@ object Varint { } return result } - - - fun decodeSigned(source: DataSource): Long { - val x = decode(source) - val result = (x shr 1).toLong() - return if ((x and 1u).toInt() != 0) -result else result - } - - fun encode(value: ULong): ByteArray { - return ArrayDataSink().also { encode(value, it) }.toByteArray() - } - - fun decode(packed: ByteArray) = decode(packed.toDataSource()) - - fun encodeSigned(value: Long): ByteArray { - return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray() - } - - fun decodeSigned(data: ByteArray): Long { - return decodeSigned(data.toDataSource()) - } - - - } diff --git a/src/commonTest/kotlin/bintools/SmartintTest.kt b/src/commonTest/kotlin/bintools/SmartintTest.kt index 802faa6..85c870a 100644 --- a/src/commonTest/kotlin/bintools/SmartintTest.kt +++ b/src/commonTest/kotlin/bintools/SmartintTest.kt @@ -1,16 +1,18 @@ package bintools import com.icodici.ubdata.Smartint +import com.icodici.ubdata.Varint +import net.sergeych.bintools.decode +import net.sergeych.bintools.encode import net.sergeych.bintools.encodeToHex import kotlin.test.Test import kotlin.test.assertEquals class SmartintTest { - fun testValue(x: Long) { - assertEquals(x.toULong(), Smartint.decode(Smartint.encode(x.toULong()))) - assertEquals(x, Smartint.decodeSigned(Smartint.encodeSigned(x))) - println("+ ${x}: ${Smartint.encode(x.toULong()).encodeToHex()}") + inline fun testValue(x: T) { + assertEquals(x, Smartint.decode(Smartint.encode(x))) + println("+ ${x}: ${Smartint.encode(x).encodeToHex()}") } fun testAround(bits: Int) { @@ -19,10 +21,12 @@ class SmartintTest { var median: Long = (1.toULong() shl bits).toLong() for( x in (median-2)..(median+2)) { testValue(x) + testValue(-x) } median = median * 3 / 2 for( x in (median-5)..(median+5)) { testValue(x) + testValue(-x) } } @@ -39,6 +43,13 @@ class SmartintTest { } @Test - fun decode() { + fun compareTest() { + for( x in listOf(0uL, 1uL, 66uL, 129uL, 219uL, 0x1122uL, 0xFFEEuL, 0xAAbbCCdduL, + 0x1111222233334444uL, (1UL shl 63))) { + // 1--12--23--34--4 + println("--- $x / 0x${x.encodeToHex(8)}") + println("V: ${Varint.encode(x).encodeToHex()}") + println("S: ${Smartint.encode(x).encodeToHex()}") + } } } \ No newline at end of file diff --git a/src/commonTest/kotlin/bintools/VarintTest.kt b/src/commonTest/kotlin/bintools/VarintTest.kt index 8a7de1d..258628d 100644 --- a/src/commonTest/kotlin/bintools/VarintTest.kt +++ b/src/commonTest/kotlin/bintools/VarintTest.kt @@ -1,16 +1,18 @@ package bintools import com.icodici.ubdata.Varint +import net.sergeych.bintools.decode +import net.sergeych.bintools.encode import net.sergeych.bintools.encodeToHex import kotlin.test.Test import kotlin.test.assertEquals class VarintTest { - fun testValue(x: Long) { - assertEquals(x.toULong(), Varint.decode(Varint.encode(x.toULong()))) - assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x))) - println("+ ${x}: ${Varint.encode(x.toULong()).encodeToHex()}") + inline fun testValue(x: T) { + assertEquals(x, Varint.decode(Varint.encode(x))) +// assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x))) + println("+ ${x}: ${Varint.encode(x).encodeToHex()}") } fun testAround(bits: Int) { @@ -18,16 +20,19 @@ class VarintTest { var median: Long = (1.toULong() shl bits).toLong() for( x in (median-5)..(median+5)) { testValue(x) + testValue(-x) } median = median * 3 / 2 for( x in (median-5)..(median+5)) { testValue(x) + testValue(-x) } } @Test fun encode() { // for( i in 0..300) testValue(i.toLong()) + testAround(7) testAround(7) testAround(14) testAround(21) @@ -35,6 +40,9 @@ class VarintTest { } @Test - fun decode() { + fun negative() { + testValue(-1) + testValue(-2) + testValue(-3) } } \ No newline at end of file