refactored int codecs to DRY and easy support signed/unsigned values
This commit is contained in:
parent
0f9702b1dc
commit
8fb052c4f9
@ -8,4 +8,10 @@ Goals:
|
|||||||
- Pack/unpack byte tools
|
- Pack/unpack byte tools
|
||||||
- 2 flavors of space-effecient varint packing
|
- 2 flavors of space-effecient varint packing
|
||||||
- CRCs and CRC-protected blocks
|
- CRCs and CRC-protected blocks
|
||||||
- Async variants
|
- Async variants
|
||||||
|
|
||||||
|
# Some publics
|
||||||
|
|
||||||
|
## SmartInt codec
|
||||||
|
|
||||||
|
Variable-length signed and unsigned integer codec, see `object SmartInt`. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte.
|
@ -1,8 +1,8 @@
|
|||||||
package net.sergeych.bintools
|
package net.sergeych.bintools
|
||||||
|
|
||||||
abstract class DataSink {
|
interface DataSink {
|
||||||
|
|
||||||
abstract fun writeByte(data: Byte)
|
fun writeByte(data: Byte)
|
||||||
|
|
||||||
fun writeByte(data: Int) = writeByte(data.toByte())
|
fun writeByte(data: Int) = writeByte(data.toByte())
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ abstract class DataSink {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ArrayDataSink : DataSink() {
|
class ArrayDataSink : DataSink {
|
||||||
private val result = mutableListOf<Byte>()
|
private val result = mutableListOf<Byte>()
|
||||||
|
|
||||||
override fun writeByte(data: Byte) {
|
override fun writeByte(data: Byte) {
|
||||||
|
@ -6,16 +6,16 @@ package net.sergeych.bintools
|
|||||||
* like multiplatform version of DataInput
|
* like multiplatform version of DataInput
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
abstract class DataSource {
|
interface DataSource {
|
||||||
|
|
||||||
abstract fun readByte(): Byte
|
fun readByte(): Byte
|
||||||
|
|
||||||
abstract val position: Int
|
val position: Int
|
||||||
|
|
||||||
open fun readUByte() = readByte().toUByte()
|
fun readUByte() = readByte().toUByte()
|
||||||
|
|
||||||
@Suppress("unused")
|
@Suppress("unused")
|
||||||
open fun readBytes(size: Int): ByteArray =
|
fun readBytes(size: Int): ByteArray =
|
||||||
ByteArray(size).also { a ->
|
ByteArray(size).also { a ->
|
||||||
for( i in 0..size)
|
for( i in 0..size)
|
||||||
a[i] = readByte()
|
a[i] = readByte()
|
||||||
@ -23,7 +23,7 @@ abstract class DataSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fun ByteArray.toDataSource(): DataSource =
|
fun ByteArray.toDataSource(): DataSource =
|
||||||
object : DataSource() {
|
object : DataSource {
|
||||||
override var position = 0
|
override var position = 0
|
||||||
|
|
||||||
override fun readByte(): Byte = this@toDataSource[position++]
|
override fun readByte(): Byte = this@toDataSource[position++]
|
||||||
|
87
src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt
Normal file
87
src/commonMain/kotlin/net.sergeych.bintools/IntCodec.kt
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
package net.sergeych.bintools
|
||||||
|
|
||||||
|
import com.icodici.ubdata.Varint
|
||||||
|
import kotlin.reflect.typeOf
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The common interface to whatever variable (or even fixed) length integer encoder.
|
||||||
|
* Implementation can just override [encodeUnsigned] and [decodeUnsigned] pair and
|
||||||
|
* get the rest (incliding signed codec) out of the box. DRY for [Smartint] and [Varint]
|
||||||
|
* codecs.
|
||||||
|
*/
|
||||||
|
interface IntCodec {
|
||||||
|
fun encodeUnsigned(value: ULong,sink: DataSink)
|
||||||
|
|
||||||
|
fun decodeUnsigned(source: DataSource): ULong
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
|
||||||
|
*/
|
||||||
|
fun encodeSigned(value: Long, sink: DataSink): Unit {
|
||||||
|
var sigBit: ULong
|
||||||
|
var x: ULong
|
||||||
|
if (value < 0) {
|
||||||
|
x = (-value).toULong()
|
||||||
|
sigBit = 1u
|
||||||
|
} else {
|
||||||
|
x = value.toULong()
|
||||||
|
sigBit = 0u
|
||||||
|
}
|
||||||
|
encodeUnsigned((x shl 1) or sigBit, sink)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default signed codec uses bit 0 as a sign (to keep packed as small as possible)
|
||||||
|
*/
|
||||||
|
fun decodeSigned(source: DataSource): Long {
|
||||||
|
val x = decodeUnsigned(source)
|
||||||
|
val result = (x shr 1).toLong()
|
||||||
|
return if ((x and 1u).toInt() != 0) -result else result
|
||||||
|
}
|
||||||
|
|
||||||
|
fun encodeUnsigned(value: ULong): ByteArray {
|
||||||
|
return ArrayDataSink().also { encodeUnsigned(value, it) }.toByteArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun decodeUnsigned(packed: ByteArray) = decodeUnsigned(packed.toDataSource())
|
||||||
|
|
||||||
|
fun encodeSigned(value: Long): ByteArray {
|
||||||
|
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun decodeSigned(data: ByteArray): Long {
|
||||||
|
return decodeSigned(data.toDataSource())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fun <reified T : Any> IntCodec.decode(source: ByteArray): T {
|
||||||
|
return decode(source.toDataSource())
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fun <reified T : Any> IntCodec.decode(source: DataSource): T {
|
||||||
|
return when (typeOf<T>()) {
|
||||||
|
typeOf<UByte>() -> decodeUnsigned(source).toUByte()
|
||||||
|
typeOf<UInt>() -> decodeUnsigned(source).toUInt()
|
||||||
|
typeOf<ULong>() -> decodeUnsigned(source).toULong()
|
||||||
|
typeOf<Byte>() -> decodeSigned(source).toByte()
|
||||||
|
typeOf<Int>() -> decodeSigned(source).toInt()
|
||||||
|
typeOf<Long>() -> decodeSigned(source).toLong()
|
||||||
|
else ->
|
||||||
|
throw IllegalArgumentException("can't decode to ${T::class.simpleName}")
|
||||||
|
} as T
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fun <reified T : Any> IntCodec.encode(x: T, dout: DataSink) {
|
||||||
|
when (x) {
|
||||||
|
is UByte -> encodeUnsigned(x.toULong(), dout)
|
||||||
|
is UInt -> encodeUnsigned(x.toULong(), dout)
|
||||||
|
is ULong -> encodeUnsigned(x, dout)
|
||||||
|
is Byte -> encodeSigned(x.toLong(), dout)
|
||||||
|
is Int -> encodeSigned(x.toLong(), dout)
|
||||||
|
is Long -> encodeSigned(x, dout)
|
||||||
|
else -> throw IllegalArgumentException("can't encode with varitn ${x::class.simpleName}: $x")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fun <reified T : Any> IntCodec.encode(x: T): ByteArray =
|
||||||
|
ArrayDataSink().also { encode(x, it) }.toByteArray()
|
@ -29,11 +29,12 @@ private val hexDigits = "0123456789ABCDEF"
|
|||||||
fun Long.encodeToHex(length: Int = 0): String {
|
fun Long.encodeToHex(length: Int = 0): String {
|
||||||
var result = ""
|
var result = ""
|
||||||
var value = this
|
var value = this
|
||||||
if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
|
var end = if( value >= 0 ) 0L else -1L
|
||||||
|
// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
|
||||||
do {
|
do {
|
||||||
result = hexDigits[(value and 0x0f).toInt()] + result
|
result = hexDigits[(value and 0x0f).toInt()] + result
|
||||||
value = value shr 4
|
value = value shr 4
|
||||||
} while (value > 0)
|
} while (value != end)
|
||||||
while (result.length < length) result = "0" + result
|
while (result.length < length) result = "0" + result
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
@ -2,13 +2,12 @@
|
|||||||
|
|
||||||
package com.icodici.ubdata
|
package com.icodici.ubdata
|
||||||
|
|
||||||
import net.sergeych.bintools.ArrayDataSink
|
import net.sergeych.bintools.*
|
||||||
import net.sergeych.bintools.DataSink
|
|
||||||
import net.sergeych.bintools.DataSource
|
|
||||||
import net.sergeych.bintools.toDataSource
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Smart variable-length long encoding tools, async.
|
* Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers
|
||||||
|
* so it is very useful when encoding big numbers or at least very bui long values. In other cases
|
||||||
|
* [Varint] works faster, and extra bits it uses does not play
|
||||||
*
|
*
|
||||||
* | Bytes sz | varint bits | smartint bits |
|
* | Bytes sz | varint bits | smartint bits |
|
||||||
* |:-----:|:------:|:---------:|
|
* |:-----:|:------:|:---------:|
|
||||||
@ -49,13 +48,13 @@ import net.sergeych.bintools.toDataSource
|
|||||||
* sequence.
|
* sequence.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
object Smartint {
|
object Smartint : IntCodec {
|
||||||
|
|
||||||
private val v0limit: ULong = (1L shl 6).toULong()
|
private val v0limit: ULong = (1L shl 6).toULong()
|
||||||
private val v1limit = (1L shl 14).toULong()
|
private val v1limit = (1L shl 14).toULong()
|
||||||
private val v2limit = (1L shl 22).toULong()
|
private val v2limit = (1L shl 22).toULong()
|
||||||
|
|
||||||
fun encode(value: ULong, sink: DataSink) {
|
override fun encodeUnsigned(value: ULong, sink: DataSink) {
|
||||||
when {
|
when {
|
||||||
value < v0limit -> encodeSeq(sink, 0, value)
|
value < v0limit -> encodeSeq(sink, 0, value)
|
||||||
|
|
||||||
@ -82,7 +81,7 @@ object Smartint {
|
|||||||
(value shr 6) and 0xFFu,
|
(value shr 6) and 0xFFu,
|
||||||
(value shr 14) and 0xFFu,
|
(value shr 14) and 0xFFu,
|
||||||
)
|
)
|
||||||
Varint.encode(value shr 22, sink)
|
Varint.encodeUnsigned(value shr 22, sink)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -98,7 +97,7 @@ object Smartint {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun decode(source: DataSource): ULong {
|
override fun decodeUnsigned(source: DataSource): ULong {
|
||||||
fun get(): ULong = source.readUByte().toULong()
|
fun get(): ULong = source.readUByte().toULong()
|
||||||
val first = get().toUInt()
|
val first = get().toUInt()
|
||||||
var type = (first and 3u).toInt()
|
var type = (first and 3u).toInt()
|
||||||
@ -112,39 +111,7 @@ object Smartint {
|
|||||||
result = result or (get() shl 14)
|
result = result or (get() shl 14)
|
||||||
if (type == 0) return result // type 2
|
if (type == 0) return result // type 2
|
||||||
|
|
||||||
return result or (Varint.decode(source) shl 22)
|
return result or (Varint.decodeUnsigned(source) shl 22)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun encodeSigned(value: Long, sink: DataSink) {
|
|
||||||
val sigBit: ULong
|
|
||||||
val x: ULong
|
|
||||||
if (value < 0) {
|
|
||||||
x = (-value).toULong()
|
|
||||||
sigBit = 1u
|
|
||||||
} else {
|
|
||||||
x = value.toULong()
|
|
||||||
sigBit = 0u
|
|
||||||
}
|
|
||||||
Varint.encode((x shl 1) or sigBit, sink)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decodeSigned(source: DataSource): Long {
|
|
||||||
val x = Varint.decode(source)
|
|
||||||
val result = (x shr 1).toLong()
|
|
||||||
return if ((x and 1u).toInt() != 0) -result else result
|
|
||||||
}
|
|
||||||
|
|
||||||
fun encode(value: ULong): ByteArray {
|
|
||||||
return ArrayDataSink().also { encode(value, it) }.toByteArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decode(packed: ByteArray) = decode(packed.toDataSource())
|
|
||||||
|
|
||||||
fun encodeSigned(value: Long): ByteArray {
|
|
||||||
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decodeSigned(data: ByteArray): Long {
|
|
||||||
return decodeSigned(data.toDataSource())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,7 @@
|
|||||||
|
|
||||||
package com.icodici.ubdata
|
package com.icodici.ubdata
|
||||||
|
|
||||||
import net.sergeych.bintools.ArrayDataSink
|
import net.sergeych.bintools.*
|
||||||
import net.sergeych.bintools.DataSink
|
|
||||||
import net.sergeych.bintools.DataSource
|
|
||||||
import net.sergeych.bintools.toDataSource
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Variable-length long integer encoding. the MSB (0x80) bit of each byte flags
|
* Variable-length long integer encoding. the MSB (0x80) bit of each byte flags
|
||||||
@ -16,21 +13,8 @@ import net.sergeych.bintools.toDataSource
|
|||||||
* encoding numbers that needs more than 22 bits. With smaller numbers its either
|
* encoding numbers that needs more than 22 bits. With smaller numbers its either
|
||||||
* same or even worse, see [Smartint] docs.
|
* same or even worse, see [Smartint] docs.
|
||||||
*/
|
*/
|
||||||
object Varint {
|
object Varint: IntCodec {
|
||||||
fun encodeSigned(value: Long, sink: DataSink): Unit {
|
override fun encodeUnsigned(value: ULong, dout: DataSink) {
|
||||||
var sigBit: ULong
|
|
||||||
var x: ULong
|
|
||||||
if (value < 0) {
|
|
||||||
x = (-value).toULong()
|
|
||||||
sigBit = 1u
|
|
||||||
} else {
|
|
||||||
x = value.toULong()
|
|
||||||
sigBit = 0u
|
|
||||||
}
|
|
||||||
encode((x shl 1) or sigBit, sink)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun encode(value: ULong, dout: DataSink) {
|
|
||||||
var rest = value
|
var rest = value
|
||||||
do {
|
do {
|
||||||
val x = (rest and 127u).toInt()
|
val x = (rest and 127u).toInt()
|
||||||
@ -43,7 +27,7 @@ object Varint {
|
|||||||
} while (rest > 0u)
|
} while (rest > 0u)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun decode(source: DataSource): ULong {
|
override fun decodeUnsigned(source: DataSource): ULong {
|
||||||
var result: ULong = 0u
|
var result: ULong = 0u
|
||||||
var count = 0
|
var count = 0
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -55,28 +39,4 @@ object Varint {
|
|||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fun decodeSigned(source: DataSource): Long {
|
|
||||||
val x = decode(source)
|
|
||||||
val result = (x shr 1).toLong()
|
|
||||||
return if ((x and 1u).toInt() != 0) -result else result
|
|
||||||
}
|
|
||||||
|
|
||||||
fun encode(value: ULong): ByteArray {
|
|
||||||
return ArrayDataSink().also { encode(value, it) }.toByteArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decode(packed: ByteArray) = decode(packed.toDataSource())
|
|
||||||
|
|
||||||
fun encodeSigned(value: Long): ByteArray {
|
|
||||||
return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decodeSigned(data: ByteArray): Long {
|
|
||||||
return decodeSigned(data.toDataSource())
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
package bintools
|
package bintools
|
||||||
|
|
||||||
import com.icodici.ubdata.Smartint
|
import com.icodici.ubdata.Smartint
|
||||||
|
import com.icodici.ubdata.Varint
|
||||||
|
import net.sergeych.bintools.decode
|
||||||
|
import net.sergeych.bintools.encode
|
||||||
import net.sergeych.bintools.encodeToHex
|
import net.sergeych.bintools.encodeToHex
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
import kotlin.test.assertEquals
|
import kotlin.test.assertEquals
|
||||||
|
|
||||||
class SmartintTest {
|
class SmartintTest {
|
||||||
|
|
||||||
fun testValue(x: Long) {
|
inline fun <reified T:Number>testValue(x: T) {
|
||||||
assertEquals(x.toULong(), Smartint.decode(Smartint.encode(x.toULong())))
|
assertEquals(x, Smartint.decode(Smartint.encode(x)))
|
||||||
assertEquals(x, Smartint.decodeSigned(Smartint.encodeSigned(x)))
|
println("+ ${x}: ${Smartint.encode(x).encodeToHex()}")
|
||||||
println("+ ${x}: ${Smartint.encode(x.toULong()).encodeToHex()}")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun testAround(bits: Int) {
|
fun testAround(bits: Int) {
|
||||||
@ -19,10 +21,12 @@ class SmartintTest {
|
|||||||
var median: Long = (1.toULong() shl bits).toLong()
|
var median: Long = (1.toULong() shl bits).toLong()
|
||||||
for( x in (median-2)..(median+2)) {
|
for( x in (median-2)..(median+2)) {
|
||||||
testValue(x)
|
testValue(x)
|
||||||
|
testValue(-x)
|
||||||
}
|
}
|
||||||
median = median * 3 / 2
|
median = median * 3 / 2
|
||||||
for( x in (median-5)..(median+5)) {
|
for( x in (median-5)..(median+5)) {
|
||||||
testValue(x)
|
testValue(x)
|
||||||
|
testValue(-x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -39,6 +43,13 @@ class SmartintTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun decode() {
|
fun compareTest() {
|
||||||
|
for( x in listOf<ULong>(0uL, 1uL, 66uL, 129uL, 219uL, 0x1122uL, 0xFFEEuL, 0xAAbbCCdduL,
|
||||||
|
0x1111222233334444uL, (1UL shl 63))) {
|
||||||
|
// 1--12--23--34--4
|
||||||
|
println("--- $x / 0x${x.encodeToHex(8)}")
|
||||||
|
println("V: ${Varint.encode(x).encodeToHex()}")
|
||||||
|
println("S: ${Smartint.encode(x).encodeToHex()}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,16 +1,18 @@
|
|||||||
package bintools
|
package bintools
|
||||||
|
|
||||||
import com.icodici.ubdata.Varint
|
import com.icodici.ubdata.Varint
|
||||||
|
import net.sergeych.bintools.decode
|
||||||
|
import net.sergeych.bintools.encode
|
||||||
import net.sergeych.bintools.encodeToHex
|
import net.sergeych.bintools.encodeToHex
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
import kotlin.test.assertEquals
|
import kotlin.test.assertEquals
|
||||||
|
|
||||||
class VarintTest {
|
class VarintTest {
|
||||||
|
|
||||||
fun testValue(x: Long) {
|
inline fun <reified T: Any>testValue(x: T) {
|
||||||
assertEquals(x.toULong(), Varint.decode(Varint.encode(x.toULong())))
|
assertEquals(x, Varint.decode(Varint.encode(x)))
|
||||||
assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
|
// assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x)))
|
||||||
println("+ ${x}: ${Varint.encode(x.toULong()).encodeToHex()}")
|
println("+ ${x}: ${Varint.encode(x).encodeToHex()}")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun testAround(bits: Int) {
|
fun testAround(bits: Int) {
|
||||||
@ -18,16 +20,19 @@ class VarintTest {
|
|||||||
var median: Long = (1.toULong() shl bits).toLong()
|
var median: Long = (1.toULong() shl bits).toLong()
|
||||||
for( x in (median-5)..(median+5)) {
|
for( x in (median-5)..(median+5)) {
|
||||||
testValue(x)
|
testValue(x)
|
||||||
|
testValue(-x)
|
||||||
}
|
}
|
||||||
median = median * 3 / 2
|
median = median * 3 / 2
|
||||||
for( x in (median-5)..(median+5)) {
|
for( x in (median-5)..(median+5)) {
|
||||||
testValue(x)
|
testValue(x)
|
||||||
|
testValue(-x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun encode() {
|
fun encode() {
|
||||||
// for( i in 0..300) testValue(i.toLong())
|
// for( i in 0..300) testValue(i.toLong())
|
||||||
|
testAround(7)
|
||||||
testAround(7)
|
testAround(7)
|
||||||
testAround(14)
|
testAround(14)
|
||||||
testAround(21)
|
testAround(21)
|
||||||
@ -35,6 +40,9 @@ class VarintTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun decode() {
|
fun negative() {
|
||||||
|
testValue(-1)
|
||||||
|
testValue(-2)
|
||||||
|
testValue(-3)
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user