refactored int codecs to DRY and easy support signed/unsigned values

This commit is contained in:
Sergey Chernov 2023-03-11 14:35:37 +01:00
parent 0f9702b1dc
commit 8fb052c4f9
9 changed files with 148 additions and 108 deletions

View File

@ -9,3 +9,9 @@ Goals:
- 2 flavors of space-effecient varint packing
- CRCs and CRC-protected blocks
- Async variants
# Some publics
## SmartInt codec
Variable-length signed and unsigned integer codec, see `object SmartInt`. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte.

View File

@ -1,8 +1,8 @@
package net.sergeych.bintools
abstract class DataSink {
interface DataSink {
abstract fun writeByte(data: Byte)
fun writeByte(data: Byte)
fun writeByte(data: Int) = writeByte(data.toByte())
@ -16,7 +16,7 @@ abstract class DataSink {
}
}
class ArrayDataSink : DataSink() {
class ArrayDataSink : DataSink {
private val result = mutableListOf<Byte>()
override fun writeByte(data: Byte) {

View File

@ -6,16 +6,16 @@ package net.sergeych.bintools
* like multiplatform version of DataInput
*
*/
abstract class DataSource {
interface DataSource {
abstract fun readByte(): Byte
fun readByte(): Byte
abstract val position: Int
val position: Int
open fun readUByte() = readByte().toUByte()
fun readUByte() = readByte().toUByte()
@Suppress("unused")
open fun readBytes(size: Int): ByteArray =
fun readBytes(size: Int): ByteArray =
ByteArray(size).also { a ->
for( i in 0..size)
a[i] = readByte()
@ -23,7 +23,7 @@ abstract class DataSource {
}
fun ByteArray.toDataSource(): DataSource =
object : DataSource() {
object : DataSource {
override var position = 0
override fun readByte(): Byte = this@toDataSource[position++]

View File

@ -0,0 +1,87 @@
package net.sergeych.bintools
import com.icodici.ubdata.Varint
import kotlin.reflect.typeOf
/**
* The common interface to whatever variable (or even fixed) length integer encoder.
* Implementation can just override [encodeUnsigned] and [decodeUnsigned] pair and
* get the rest (incliding signed codec) out of the box. DRY for [Smartint] and [Varint]
* codecs.
*/
interface IntCodec {
fun encodeUnsigned(value: ULong,sink: DataSink)
fun decodeUnsigned(source: DataSource): ULong
/**
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
*/
fun encodeSigned(value: Long, sink: DataSink): Unit {
var sigBit: ULong
var x: ULong
if (value < 0) {
x = (-value).toULong()
sigBit = 1u
} else {
x = value.toULong()
sigBit = 0u
}
encodeUnsigned((x shl 1) or sigBit, sink)
}
/**
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
*/
fun decodeSigned(source: DataSource): Long {
val x = decodeUnsigned(source)
val result = (x shr 1).toLong()
return if ((x and 1u).toInt() != 0) -result else result
}
fun encodeUnsigned(value: ULong): ByteArray {
return ArrayDataSink().also { encodeUnsigned(value, it) }.toByteArray()
}
fun decodeUnsigned(packed: ByteArray) = decodeUnsigned(packed.toDataSource())
fun encodeSigned(value: Long): ByteArray {
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
}
fun decodeSigned(data: ByteArray): Long {
return decodeSigned(data.toDataSource())
}
}
inline fun <reified T : Any> IntCodec.decode(source: ByteArray): T {
return decode(source.toDataSource())
}
inline fun <reified T : Any> IntCodec.decode(source: DataSource): T {
return when (typeOf<T>()) {
typeOf<UByte>() -> decodeUnsigned(source).toUByte()
typeOf<UInt>() -> decodeUnsigned(source).toUInt()
typeOf<ULong>() -> decodeUnsigned(source).toULong()
typeOf<Byte>() -> decodeSigned(source).toByte()
typeOf<Int>() -> decodeSigned(source).toInt()
typeOf<Long>() -> decodeSigned(source).toLong()
else ->
throw IllegalArgumentException("can't decode to ${T::class.simpleName}")
} as T
}
inline fun <reified T : Any> IntCodec.encode(x: T, dout: DataSink) {
when (x) {
is UByte -> encodeUnsigned(x.toULong(), dout)
is UInt -> encodeUnsigned(x.toULong(), dout)
is ULong -> encodeUnsigned(x, dout)
is Byte -> encodeSigned(x.toLong(), dout)
is Int -> encodeSigned(x.toLong(), dout)
is Long -> encodeSigned(x, dout)
else -> throw IllegalArgumentException("can't encode with varitn ${x::class.simpleName}: $x")
}
}
inline fun <reified T : Any> IntCodec.encode(x: T): ByteArray =
ArrayDataSink().also { encode(x, it) }.toByteArray()

View File

@ -29,11 +29,12 @@ private val hexDigits = "0123456789ABCDEF"
fun Long.encodeToHex(length: Int = 0): String {
var result = ""
var value = this
if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
var end = if( value >= 0 ) 0L else -1L
// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
do {
result = hexDigits[(value and 0x0f).toInt()] + result
value = value shr 4
} while (value > 0)
} while (value != end)
while (result.length < length) result = "0" + result
return result
}

View File

@ -2,13 +2,12 @@
package com.icodici.ubdata
import net.sergeych.bintools.ArrayDataSink
import net.sergeych.bintools.DataSink
import net.sergeych.bintools.DataSource
import net.sergeych.bintools.toDataSource
import net.sergeych.bintools.*
/**
* Smart variable-length long encoding tools, async.
* Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers
* so it is very useful when encoding big numbers or at least very bui long values. In other cases
* [Varint] works faster, and extra bits it uses does not play
*
* | Bytes sz | varint bits | smartint bits |
* |:-----:|:------:|:---------:|
@ -49,13 +48,13 @@ import net.sergeych.bintools.toDataSource
* sequence.
*
*/
object Smartint {
object Smartint : IntCodec {
private val v0limit: ULong = (1L shl 6).toULong()
private val v1limit = (1L shl 14).toULong()
private val v2limit = (1L shl 22).toULong()
fun encode(value: ULong, sink: DataSink) {
override fun encodeUnsigned(value: ULong, sink: DataSink) {
when {
value < v0limit -> encodeSeq(sink, 0, value)
@ -82,7 +81,7 @@ object Smartint {
(value shr 6) and 0xFFu,
(value shr 14) and 0xFFu,
)
Varint.encode(value shr 22, sink)
Varint.encodeUnsigned(value shr 22, sink)
}
}
}
@ -98,7 +97,7 @@ object Smartint {
}
}
fun decode(source: DataSource): ULong {
override fun decodeUnsigned(source: DataSource): ULong {
fun get(): ULong = source.readUByte().toULong()
val first = get().toUInt()
var type = (first and 3u).toInt()
@ -112,39 +111,7 @@ object Smartint {
result = result or (get() shl 14)
if (type == 0) return result // type 2
return result or (Varint.decode(source) shl 22)
return result or (Varint.decodeUnsigned(source) shl 22)
}
fun encodeSigned(value: Long, sink: DataSink) {
val sigBit: ULong
val x: ULong
if (value < 0) {
x = (-value).toULong()
sigBit = 1u
} else {
x = value.toULong()
sigBit = 0u
}
Varint.encode((x shl 1) or sigBit, sink)
}
fun decodeSigned(source: DataSource): Long {
val x = Varint.decode(source)
val result = (x shr 1).toLong()
return if ((x and 1u).toInt() != 0) -result else result
}
fun encode(value: ULong): ByteArray {
return ArrayDataSink().also { encode(value, it) }.toByteArray()
}
fun decode(packed: ByteArray) = decode(packed.toDataSource())
fun encodeSigned(value: Long): ByteArray {
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
}
fun decodeSigned(data: ByteArray): Long {
return decodeSigned(data.toDataSource())
}
}

View File

@ -2,10 +2,7 @@
package com.icodici.ubdata
import net.sergeych.bintools.ArrayDataSink
import net.sergeych.bintools.DataSink
import net.sergeych.bintools.DataSource
import net.sergeych.bintools.toDataSource
import net.sergeych.bintools.*
/**
* Variable-length long integer encoding. the MSB (0x80) bit of each byte flags
@ -16,21 +13,8 @@ import net.sergeych.bintools.toDataSource
* encoding numbers that needs more than 22 bits. With smaller numbers its either
* same or even worse, see [Smartint] docs.
*/
object Varint {
fun encodeSigned(value: Long, sink: DataSink): Unit {
var sigBit: ULong
var x: ULong
if (value < 0) {
x = (-value).toULong()
sigBit = 1u
} else {
x = value.toULong()
sigBit = 0u
}
encode((x shl 1) or sigBit, sink)
}
fun encode(value: ULong, dout: DataSink) {
object Varint: IntCodec {
override fun encodeUnsigned(value: ULong, dout: DataSink) {
var rest = value
do {
val x = (rest and 127u).toInt()
@ -43,7 +27,7 @@ object Varint {
} while (rest > 0u)
}
fun decode(source: DataSource): ULong {
override fun decodeUnsigned(source: DataSource): ULong {
var result: ULong = 0u
var count = 0
while (true) {
@ -55,28 +39,4 @@ object Varint {
}
return result
}
fun decodeSigned(source: DataSource): Long {
val x = decode(source)
val result = (x shr 1).toLong()
return if ((x and 1u).toInt() != 0) -result else result
}
fun encode(value: ULong): ByteArray {
return ArrayDataSink().also { encode(value, it) }.toByteArray()
}
fun decode(packed: ByteArray) = decode(packed.toDataSource())
fun encodeSigned(value: Long): ByteArray {
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
}
fun decodeSigned(data: ByteArray): Long {
return decodeSigned(data.toDataSource())
}
}

View File

@ -1,16 +1,18 @@
package bintools
import com.icodici.ubdata.Smartint
import com.icodici.ubdata.Varint
import net.sergeych.bintools.decode
import net.sergeych.bintools.encode
import net.sergeych.bintools.encodeToHex
import kotlin.test.Test
import kotlin.test.assertEquals
class SmartintTest {
fun testValue(x: Long) {
assertEquals(x.toULong(), Smartint.decode(Smartint.encode(x.toULong())))
assertEquals(x, Smartint.decodeSigned(Smartint.encodeSigned(x)))
println("+ ${x}: ${Smartint.encode(x.toULong()).encodeToHex()}")
inline fun <reified T:Number>testValue(x: T) {
assertEquals(x, Smartint.decode(Smartint.encode(x)))
println("+ ${x}: ${Smartint.encode(x).encodeToHex()}")
}
fun testAround(bits: Int) {
@ -19,10 +21,12 @@ class SmartintTest {
var median: Long = (1.toULong() shl bits).toLong()
for( x in (median-2)..(median+2)) {
testValue(x)
testValue(-x)
}
median = median * 3 / 2
for( x in (median-5)..(median+5)) {
testValue(x)
testValue(-x)
}
}
@ -39,6 +43,13 @@ class SmartintTest {
}
@Test
fun decode() {
fun compareTest() {
for( x in listOf<ULong>(0uL, 1uL, 66uL, 129uL, 219uL, 0x1122uL, 0xFFEEuL, 0xAAbbCCdduL,
0x1111222233334444uL, (1UL shl 63))) {
// 1--12--23--34--4
println("--- $x / 0x${x.encodeToHex(8)}")
println("V: ${Varint.encode(x).encodeToHex()}")
println("S: ${Smartint.encode(x).encodeToHex()}")
}
}
}

View File

@ -1,16 +1,18 @@
package bintools
import com.icodici.ubdata.Varint
import net.sergeych.bintools.decode
import net.sergeych.bintools.encode
import net.sergeych.bintools.encodeToHex
import kotlin.test.Test
import kotlin.test.assertEquals
class VarintTest {
fun testValue(x: Long) {
assertEquals(x.toULong(), Varint.decode(Varint.encode(x.toULong())))
assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
println("+ ${x}: ${Varint.encode(x.toULong()).encodeToHex()}")
inline fun <reified T: Any>testValue(x: T) {
assertEquals(x, Varint.decode(Varint.encode(x)))
// assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
println("+ ${x}: ${Varint.encode(x).encodeToHex()}")
}
fun testAround(bits: Int) {
@ -18,16 +20,19 @@ class VarintTest {
var median: Long = (1.toULong() shl bits).toLong()
for( x in (median-5)..(median+5)) {
testValue(x)
testValue(-x)
}
median = median * 3 / 2
for( x in (median-5)..(median+5)) {
testValue(x)
testValue(-x)
}
}
@Test
fun encode() {
// for( i in 0..300) testValue(i.toLong())
testAround(7)
testAround(7)
testAround(14)
testAround(21)
@ -35,6 +40,9 @@ class VarintTest {
}
@Test
fun decode() {
fun negative() {
testValue(-1)
testValue(-2)
testValue(-3)
}
}