refactored int codecs to DRY and easy support signed/unsigned values
This commit is contained in:
parent
0f9702b1dc
commit
8fb052c4f9
@ -9,3 +9,9 @@ Goals:
|
||||
- 2 flavors of space-effecient varint packing
|
||||
- CRCs and CRC-protected blocks
|
||||
- Async variants
|
||||
|
||||
# Some publics
|
||||
|
||||
## SmartInt codec
|
||||
|
||||
Variable-length signed and unsigned integer codec, see `object SmartInt`. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte.
|
@ -1,8 +1,8 @@
|
||||
package net.sergeych.bintools
|
||||
|
||||
abstract class DataSink {
|
||||
interface DataSink {
|
||||
|
||||
abstract fun writeByte(data: Byte)
|
||||
fun writeByte(data: Byte)
|
||||
|
||||
fun writeByte(data: Int) = writeByte(data.toByte())
|
||||
|
||||
@ -16,7 +16,7 @@ abstract class DataSink {
|
||||
}
|
||||
}
|
||||
|
||||
class ArrayDataSink : DataSink() {
|
||||
class ArrayDataSink : DataSink {
|
||||
private val result = mutableListOf<Byte>()
|
||||
|
||||
override fun writeByte(data: Byte) {
|
||||
|
@ -6,16 +6,16 @@ package net.sergeych.bintools
|
||||
* like multiplatform version of DataInput
|
||||
*
|
||||
*/
|
||||
abstract class DataSource {
|
||||
interface DataSource {
|
||||
|
||||
abstract fun readByte(): Byte
|
||||
fun readByte(): Byte
|
||||
|
||||
abstract val position: Int
|
||||
val position: Int
|
||||
|
||||
open fun readUByte() = readByte().toUByte()
|
||||
fun readUByte() = readByte().toUByte()
|
||||
|
||||
@Suppress("unused")
|
||||
open fun readBytes(size: Int): ByteArray =
|
||||
fun readBytes(size: Int): ByteArray =
|
||||
ByteArray(size).also { a ->
|
||||
for( i in 0..size)
|
||||
a[i] = readByte()
|
||||
@ -23,7 +23,7 @@ abstract class DataSource {
|
||||
}
|
||||
|
||||
fun ByteArray.toDataSource(): DataSource =
|
||||
object : DataSource() {
|
||||
object : DataSource {
|
||||
override var position = 0
|
||||
|
||||
override fun readByte(): Byte = this@toDataSource[position++]
|
||||
|
87
src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt
Normal file
87
src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt
Normal file
@ -0,0 +1,87 @@
|
||||
package net.sergeych.bintools
|
||||
|
||||
import com.icodici.ubdata.Varint
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
/**
|
||||
* The common interface to whatever variable (or even fixed) length integer encoder.
|
||||
* Implementation can just override [encodeUnsigned] and [decodeUnsigned] pair and
|
||||
* get the rest (incliding signed codec) out of the box. DRY for [Smartint] and [Varint]
|
||||
* codecs.
|
||||
*/
|
||||
interface IntCodec {
|
||||
fun encodeUnsigned(value: ULong,sink: DataSink)
|
||||
|
||||
fun decodeUnsigned(source: DataSource): ULong
|
||||
|
||||
/**
|
||||
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
|
||||
*/
|
||||
fun encodeSigned(value: Long, sink: DataSink): Unit {
|
||||
var sigBit: ULong
|
||||
var x: ULong
|
||||
if (value < 0) {
|
||||
x = (-value).toULong()
|
||||
sigBit = 1u
|
||||
} else {
|
||||
x = value.toULong()
|
||||
sigBit = 0u
|
||||
}
|
||||
encodeUnsigned((x shl 1) or sigBit, sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
|
||||
*/
|
||||
fun decodeSigned(source: DataSource): Long {
|
||||
val x = decodeUnsigned(source)
|
||||
val result = (x shr 1).toLong()
|
||||
return if ((x and 1u).toInt() != 0) -result else result
|
||||
}
|
||||
|
||||
fun encodeUnsigned(value: ULong): ByteArray {
|
||||
return ArrayDataSink().also { encodeUnsigned(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decodeUnsigned(packed: ByteArray) = decodeUnsigned(packed.toDataSource())
|
||||
|
||||
fun encodeSigned(value: Long): ByteArray {
|
||||
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decodeSigned(data: ByteArray): Long {
|
||||
return decodeSigned(data.toDataSource())
|
||||
}
|
||||
}
|
||||
|
||||
inline fun <reified T : Any> IntCodec.decode(source: ByteArray): T {
|
||||
return decode(source.toDataSource())
|
||||
}
|
||||
|
||||
inline fun <reified T : Any> IntCodec.decode(source: DataSource): T {
|
||||
return when (typeOf<T>()) {
|
||||
typeOf<UByte>() -> decodeUnsigned(source).toUByte()
|
||||
typeOf<UInt>() -> decodeUnsigned(source).toUInt()
|
||||
typeOf<ULong>() -> decodeUnsigned(source).toULong()
|
||||
typeOf<Byte>() -> decodeSigned(source).toByte()
|
||||
typeOf<Int>() -> decodeSigned(source).toInt()
|
||||
typeOf<Long>() -> decodeSigned(source).toLong()
|
||||
else ->
|
||||
throw IllegalArgumentException("can't decode to ${T::class.simpleName}")
|
||||
} as T
|
||||
}
|
||||
|
||||
inline fun <reified T : Any> IntCodec.encode(x: T, dout: DataSink) {
|
||||
when (x) {
|
||||
is UByte -> encodeUnsigned(x.toULong(), dout)
|
||||
is UInt -> encodeUnsigned(x.toULong(), dout)
|
||||
is ULong -> encodeUnsigned(x, dout)
|
||||
is Byte -> encodeSigned(x.toLong(), dout)
|
||||
is Int -> encodeSigned(x.toLong(), dout)
|
||||
is Long -> encodeSigned(x, dout)
|
||||
else -> throw IllegalArgumentException("can't encode with varitn ${x::class.simpleName}: $x")
|
||||
}
|
||||
}
|
||||
|
||||
inline fun <reified T : Any> IntCodec.encode(x: T): ByteArray =
|
||||
ArrayDataSink().also { encode(x, it) }.toByteArray()
|
@ -29,11 +29,12 @@ private val hexDigits = "0123456789ABCDEF"
|
||||
fun Long.encodeToHex(length: Int = 0): String {
|
||||
var result = ""
|
||||
var value = this
|
||||
if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
|
||||
var end = if( value >= 0 ) 0L else -1L
|
||||
// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
|
||||
do {
|
||||
result = hexDigits[(value and 0x0f).toInt()] + result
|
||||
value = value shr 4
|
||||
} while (value > 0)
|
||||
} while (value != end)
|
||||
while (result.length < length) result = "0" + result
|
||||
return result
|
||||
}
|
||||
|
@ -2,13 +2,12 @@
|
||||
|
||||
package com.icodici.ubdata
|
||||
|
||||
import net.sergeych.bintools.ArrayDataSink
|
||||
import net.sergeych.bintools.DataSink
|
||||
import net.sergeych.bintools.DataSource
|
||||
import net.sergeych.bintools.toDataSource
|
||||
import net.sergeych.bintools.*
|
||||
|
||||
/**
|
||||
* Smart variable-length long encoding tools, async.
|
||||
* Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers
|
||||
* so it is very useful when encoding big numbers or at least very bui long values. In other cases
|
||||
* [Varint] works faster, and extra bits it uses does not play
|
||||
*
|
||||
* | Bytes sz | varint bits | smartint bits |
|
||||
* |:-----:|:------:|:---------:|
|
||||
@ -49,13 +48,13 @@ import net.sergeych.bintools.toDataSource
|
||||
* sequence.
|
||||
*
|
||||
*/
|
||||
object Smartint {
|
||||
object Smartint : IntCodec {
|
||||
|
||||
private val v0limit: ULong = (1L shl 6).toULong()
|
||||
private val v1limit = (1L shl 14).toULong()
|
||||
private val v2limit = (1L shl 22).toULong()
|
||||
|
||||
fun encode(value: ULong, sink: DataSink) {
|
||||
override fun encodeUnsigned(value: ULong, sink: DataSink) {
|
||||
when {
|
||||
value < v0limit -> encodeSeq(sink, 0, value)
|
||||
|
||||
@ -82,7 +81,7 @@ object Smartint {
|
||||
(value shr 6) and 0xFFu,
|
||||
(value shr 14) and 0xFFu,
|
||||
)
|
||||
Varint.encode(value shr 22, sink)
|
||||
Varint.encodeUnsigned(value shr 22, sink)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -98,7 +97,7 @@ object Smartint {
|
||||
}
|
||||
}
|
||||
|
||||
fun decode(source: DataSource): ULong {
|
||||
override fun decodeUnsigned(source: DataSource): ULong {
|
||||
fun get(): ULong = source.readUByte().toULong()
|
||||
val first = get().toUInt()
|
||||
var type = (first and 3u).toInt()
|
||||
@ -112,39 +111,7 @@ object Smartint {
|
||||
result = result or (get() shl 14)
|
||||
if (type == 0) return result // type 2
|
||||
|
||||
return result or (Varint.decode(source) shl 22)
|
||||
return result or (Varint.decodeUnsigned(source) shl 22)
|
||||
}
|
||||
|
||||
fun encodeSigned(value: Long, sink: DataSink) {
|
||||
val sigBit: ULong
|
||||
val x: ULong
|
||||
if (value < 0) {
|
||||
x = (-value).toULong()
|
||||
sigBit = 1u
|
||||
} else {
|
||||
x = value.toULong()
|
||||
sigBit = 0u
|
||||
}
|
||||
Varint.encode((x shl 1) or sigBit, sink)
|
||||
}
|
||||
|
||||
fun decodeSigned(source: DataSource): Long {
|
||||
val x = Varint.decode(source)
|
||||
val result = (x shr 1).toLong()
|
||||
return if ((x and 1u).toInt() != 0) -result else result
|
||||
}
|
||||
|
||||
fun encode(value: ULong): ByteArray {
|
||||
return ArrayDataSink().also { encode(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decode(packed: ByteArray) = decode(packed.toDataSource())
|
||||
|
||||
fun encodeSigned(value: Long): ByteArray {
|
||||
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decodeSigned(data: ByteArray): Long {
|
||||
return decodeSigned(data.toDataSource())
|
||||
}
|
||||
}
|
||||
|
@ -2,10 +2,7 @@
|
||||
|
||||
package com.icodici.ubdata
|
||||
|
||||
import net.sergeych.bintools.ArrayDataSink
|
||||
import net.sergeych.bintools.DataSink
|
||||
import net.sergeych.bintools.DataSource
|
||||
import net.sergeych.bintools.toDataSource
|
||||
import net.sergeych.bintools.*
|
||||
|
||||
/**
|
||||
* Variable-length long integer encoding. the MSB (0x80) bit of each byte flags
|
||||
@ -16,21 +13,8 @@ import net.sergeych.bintools.toDataSource
|
||||
* encoding numbers that needs more than 22 bits. With smaller numbers its either
|
||||
* same or even worse, see [Smartint] docs.
|
||||
*/
|
||||
object Varint {
|
||||
fun encodeSigned(value: Long, sink: DataSink): Unit {
|
||||
var sigBit: ULong
|
||||
var x: ULong
|
||||
if (value < 0) {
|
||||
x = (-value).toULong()
|
||||
sigBit = 1u
|
||||
} else {
|
||||
x = value.toULong()
|
||||
sigBit = 0u
|
||||
}
|
||||
encode((x shl 1) or sigBit, sink)
|
||||
}
|
||||
|
||||
fun encode(value: ULong, dout: DataSink) {
|
||||
object Varint: IntCodec {
|
||||
override fun encodeUnsigned(value: ULong, dout: DataSink) {
|
||||
var rest = value
|
||||
do {
|
||||
val x = (rest and 127u).toInt()
|
||||
@ -43,7 +27,7 @@ object Varint {
|
||||
} while (rest > 0u)
|
||||
}
|
||||
|
||||
fun decode(source: DataSource): ULong {
|
||||
override fun decodeUnsigned(source: DataSource): ULong {
|
||||
var result: ULong = 0u
|
||||
var count = 0
|
||||
while (true) {
|
||||
@ -55,28 +39,4 @@ object Varint {
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
fun decodeSigned(source: DataSource): Long {
|
||||
val x = decode(source)
|
||||
val result = (x shr 1).toLong()
|
||||
return if ((x and 1u).toInt() != 0) -result else result
|
||||
}
|
||||
|
||||
fun encode(value: ULong): ByteArray {
|
||||
return ArrayDataSink().also { encode(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decode(packed: ByteArray) = decode(packed.toDataSource())
|
||||
|
||||
fun encodeSigned(value: Long): ByteArray {
|
||||
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
||||
}
|
||||
|
||||
fun decodeSigned(data: ByteArray): Long {
|
||||
return decodeSigned(data.toDataSource())
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,16 +1,18 @@
|
||||
package bintools
|
||||
|
||||
import com.icodici.ubdata.Smartint
|
||||
import com.icodici.ubdata.Varint
|
||||
import net.sergeych.bintools.decode
|
||||
import net.sergeych.bintools.encode
|
||||
import net.sergeych.bintools.encodeToHex
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
class SmartintTest {
|
||||
|
||||
fun testValue(x: Long) {
|
||||
assertEquals(x.toULong(), Smartint.decode(Smartint.encode(x.toULong())))
|
||||
assertEquals(x, Smartint.decodeSigned(Smartint.encodeSigned(x)))
|
||||
println("+ ${x}: ${Smartint.encode(x.toULong()).encodeToHex()}")
|
||||
inline fun <reified T:Number>testValue(x: T) {
|
||||
assertEquals(x, Smartint.decode(Smartint.encode(x)))
|
||||
println("+ ${x}: ${Smartint.encode(x).encodeToHex()}")
|
||||
}
|
||||
|
||||
fun testAround(bits: Int) {
|
||||
@ -19,10 +21,12 @@ class SmartintTest {
|
||||
var median: Long = (1.toULong() shl bits).toLong()
|
||||
for( x in (median-2)..(median+2)) {
|
||||
testValue(x)
|
||||
testValue(-x)
|
||||
}
|
||||
median = median * 3 / 2
|
||||
for( x in (median-5)..(median+5)) {
|
||||
testValue(x)
|
||||
testValue(-x)
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,6 +43,13 @@ class SmartintTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
fun decode() {
|
||||
fun compareTest() {
|
||||
for( x in listOf<ULong>(0uL, 1uL, 66uL, 129uL, 219uL, 0x1122uL, 0xFFEEuL, 0xAAbbCCdduL,
|
||||
0x1111222233334444uL, (1UL shl 63))) {
|
||||
// 1--12--23--34--4
|
||||
println("--- $x / 0x${x.encodeToHex(8)}")
|
||||
println("V: ${Varint.encode(x).encodeToHex()}")
|
||||
println("S: ${Smartint.encode(x).encodeToHex()}")
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +1,18 @@
|
||||
package bintools
|
||||
|
||||
import com.icodici.ubdata.Varint
|
||||
import net.sergeych.bintools.decode
|
||||
import net.sergeych.bintools.encode
|
||||
import net.sergeych.bintools.encodeToHex
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
class VarintTest {
|
||||
|
||||
fun testValue(x: Long) {
|
||||
assertEquals(x.toULong(), Varint.decode(Varint.encode(x.toULong())))
|
||||
assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
|
||||
println("+ ${x}: ${Varint.encode(x.toULong()).encodeToHex()}")
|
||||
inline fun <reified T: Any>testValue(x: T) {
|
||||
assertEquals(x, Varint.decode(Varint.encode(x)))
|
||||
// assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
|
||||
println("+ ${x}: ${Varint.encode(x).encodeToHex()}")
|
||||
}
|
||||
|
||||
fun testAround(bits: Int) {
|
||||
@ -18,16 +20,19 @@ class VarintTest {
|
||||
var median: Long = (1.toULong() shl bits).toLong()
|
||||
for( x in (median-5)..(median+5)) {
|
||||
testValue(x)
|
||||
testValue(-x)
|
||||
}
|
||||
median = median * 3 / 2
|
||||
for( x in (median-5)..(median+5)) {
|
||||
testValue(x)
|
||||
testValue(-x)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun encode() {
|
||||
// for( i in 0..300) testValue(i.toLong())
|
||||
testAround(7)
|
||||
testAround(7)
|
||||
testAround(14)
|
||||
testAround(21)
|
||||
@ -35,6 +40,9 @@ class VarintTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
fun decode() {
|
||||
fun negative() {
|
||||
testValue(-1)
|
||||
testValue(-2)
|
||||
testValue(-3)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user