From 0f9702b1dc03aed39a9377069c63cc7e5690877e Mon Sep 17 00:00:00 2001 From: sergeych Date: Fri, 3 Feb 2023 13:10:43 +0100 Subject: [PATCH] initial: varing and smarting packing tools --- .gitignore | 3 + README.md | 11 + build.gradle.kts | 60 +++++ gradle.properties | 2 + gradlew | 234 ++++++++++++++++++ gradlew.bat | 89 +++++++ settings.gradle.kts | 3 + .../kotlin/net.sergeych.bintools/DataSink.kt | 27 ++ .../net.sergeych.bintools/DataSource.kt | 30 +++ .../kotlin/net.sergeych.bintools/errors.kt | 4 + .../net.sergeych.bintools/simple_codecs.kt | 135 ++++++++++ .../kotlin/net.sergeych.bintools/smartint.kt | 150 +++++++++++ .../kotlin/net.sergeych.bintools/varint.kt | 82 ++++++ .../kotlin/bintools/SmartintTest.kt | 44 ++++ src/commonTest/kotlin/bintools/VarintTest.kt | 40 +++ 15 files changed, 914 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 build.gradle.kts create mode 100644 gradle.properties create mode 100755 gradlew create mode 100644 gradlew.bat create mode 100644 settings.gradle.kts create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/errors.kt create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/smartint.kt create mode 100644 src/commonMain/kotlin/net.sergeych.bintools/varint.kt create mode 100644 src/commonTest/kotlin/bintools/SmartintTest.kt create mode 100644 src/commonTest/kotlin/bintools/VarintTest.kt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8ea68f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.gradle/ +/.idea/ +/build/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..2cac10f --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Binary tools + +> Work in progress not ready to any practical use + +Goals: + +- Efficient binary serialization +- Pack/unpack byte tools +- 2 flavors of space-effecient varint packing +- CRCs and CRC-protected blocks +- Async variants \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..6f4a5a8 --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,60 @@ +plugins { + kotlin("multiplatform") version "1.7.21" +} + +group = "net.sergeych" +version = "0.0.1-SNAPSHOT" + +repositories { + mavenCentral() + mavenLocal() + maven("https://maven.universablockchain.com/") +} + +kotlin { + jvm { + compilations.all { + kotlinOptions.jvmTarget = "1.8" + } + withJava() + testRuns["test"].executionTask.configure { + useJUnitPlatform() + } + } + js(IR) { + browser { + commonWebpackConfig { + cssSupport.enabled = true + } + } + } + val hostOs = System.getProperty("os.name") + val isMingwX64 = hostOs.startsWith("Windows") + val nativeTarget = when { + hostOs == "Mac OS X" -> macosX64("native") + hostOs == "Linux" -> linuxX64("native") + isMingwX64 -> mingwX64("native") + else -> throw GradleException("Host OS is not supported in Kotlin/Native.") + } + + + sourceSets { + val commonMain by getting { + dependencies { + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4") + api("net.sergeych:mp_stools:1.3.2") + } + } + val commonTest by getting { + dependencies { + implementation(kotlin("test")) + } + } + val jvmMain by getting + val jvmTest by getting + val jsMain by getting + val jsTest by getting + val nativeMain by getting + val nativeTest by getting + } +} diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..9dcc3a7 --- /dev/null +++ b/gradle.properties @@ -0,0 +1,2 @@ +kotlin.code.style=official +kotlin.js.compiler=ir diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..1b6c787 --- /dev/null +++ b/gradlew @@ -0,0 +1,234 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..107acd3 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..6cf7e03 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,3 @@ + +rootProject.name = "mp_bintools" + diff --git a/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt b/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt new file mode 100644 index 0000000..76c9724 --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/DataSink.kt @@ -0,0 +1,27 @@ +package net.sergeych.bintools + +abstract class DataSink { + + abstract fun writeByte(data: Byte) + + fun writeByte(data: Int) = writeByte(data.toByte()) + + open fun writeUByte(data: UByte) { + writeByte(data.toByte()) + } + + @Suppress("unused") + open fun writeBytes(data: ByteArray) { + for(d in data) writeByte(d) + } +} + +class ArrayDataSink : DataSink() { + private val result = mutableListOf() + + override fun writeByte(data: Byte) { + result.add(data) + } + + fun toByteArray() = result.toByteArray() +} diff --git a/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt b/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt new file mode 100644 index 0000000..4338e2e --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/DataSource.kt @@ -0,0 +1,30 @@ +package net.sergeych.bintools + +/** + * data input stream-like abstraction. We need it because + * kotlinx serialization is synchronous and there us nothing + * like multiplatform version of DataInput + * + */ +abstract class DataSource { + + abstract fun readByte(): Byte + + abstract val position: Int + + open fun readUByte() = readByte().toUByte() + + @Suppress("unused") + open fun readBytes(size: Int): ByteArray = + ByteArray(size).also { a -> + for( i in 0..size) + a[i] = readByte() + } +} + +fun ByteArray.toDataSource(): DataSource = + object : DataSource() { + override var position = 0 + + override fun readByte(): Byte = this@toDataSource[position++] + } \ No newline at end of file diff --git a/src/commonMain/kotlin/net.sergeych.bintools/errors.kt b/src/commonMain/kotlin/net.sergeych.bintools/errors.kt new file mode 100644 index 0000000..fa68b16 --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/errors.kt @@ -0,0 +1,4 @@ +package net.sergeych.bintools + +class FormatException(message: String="invalid format",cause: Throwable?=null): + Exception(message, cause) \ No newline at end of file diff --git a/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt new file mode 100644 index 0000000..a66a877 --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt @@ -0,0 +1,135 @@ +package net.sergeych.bintools + + +fun longToBytes(value: Long): ByteArray { + var l = value + val result = ByteArray(8) + for (i in 7 downTo 0) { + result[i] = (l and 0xFF).toByte() + l = l shr 8 + } + return result +} + +/** + * Convert 8 bytes to LE long + */ +fun bytesToLong(b: ByteArray): Long { + var result: Long = 0 + for (i in 0 until 8) { + result = result shl 8 + result = result or (b[i].toLong() and 0xFF) + } + return result +} + + +private val hexDigits = "0123456789ABCDEF" + +fun Long.encodeToHex(length: Int = 0): String { + var result = "" + var value = this + if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)") + do { + result = hexDigits[(value and 0x0f).toInt()] + result + value = value shr 4 + } while (value > 0) + while (result.length < length) result = "0" + result + return result +} + +fun Int.encodeToHex(length: Int = 0) = (toLong() and 0xFFFFffff).encodeToHex(length) +@Suppress("unused") +fun UInt.encodeToHex(length: Int = 0) = toLong().encodeToHex(length) +fun Byte.encodeToHex(length: Int = 0) = (toLong() and 0xFF).encodeToHex(length) +fun UByte.encodeToHex(length: Int = 0) = toLong().encodeToHex(length) +@Suppress("unused") +fun ULong.encodeToHex(length: Int = 0) = toLong().encodeToHex(length) + +fun ByteArray.encodeToHex(separator: String = " "): String = joinToString(separator) { it.toUByte().encodeToHex(2) } +fun Collection.encodeToHex(separator: String = " "): String = joinToString(separator) { it.toUByte().encodeToHex(2) } + +fun ByteArray.toDump(wide: Boolean = false): String = toDumpLines(wide).joinToString("\n") + +fun ByteArray.toDumpLines(wide: Boolean = false): List { + + val lineSize = if (wide) 32 else 16 + + fun dumpChars(_from: Int): String { + var from = _from + val b = StringBuilder(22) + + b.append('|') + val max: Int = kotlin.math.min(size, from + lineSize) + while (from < max) { + val ch = this[from++].toInt() + if (ch >= ' '.code && ch < 127) b.append(ch.toChar()) else b.append('.') + } + val remainder = from % lineSize + if (remainder > 0) { + var cnt = lineSize - remainder + while (cnt-- > 0) b.append(' ') + } + return b.append("|").toString() + } + + + val lines = mutableListOf() + if (size == 0) return lines + var line: StringBuilder? = null + + if (size != 0) { + for (i in indices) { + if (i % lineSize == 0) { + if (line != null) { + line.append(dumpChars(i - lineSize)) + lines.add(line.toString()) + } + line = StringBuilder(i.encodeToHex(4)) + line.append(' ') + } + line!!.append((this[i].toUByte()).encodeToHex(2)) + line.append(' ') + } + if (line != null) { + val l = size + var fill = lineSize - l % lineSize + if (fill < lineSize) while (fill-- > 0) line.append(" ") + val index = l - l % lineSize + line.append(dumpChars(if (index < l) index else l - lineSize)) + lines.add(line.toString()) + } + } + return lines +} + +fun String.decodeHex(): ByteArray { + val source = this.trim().uppercase() + val result = arrayListOf() + var pos = 0 + while (pos < source.length) { + val i = hexDigits.indexOf(source[pos++]) + if (i < 0) throw FormatException("invalid hex digit in ${source} at ${pos - 1}") + if (pos >= source.length) throw FormatException( + "hex string must consist of bytes " + + "(unexepced end of data): $source" + ) + val j = hexDigits.indexOf(source[pos++]) + if (j < 0) throw FormatException("invalid hex digit in ${source} at ${pos - 1}") + result.add(((i shl 4) or j).toByte()) + while (pos < source.length && (source[pos].isWhitespace())) pos++ + } + return result.toByteArray() +} + +fun ByteArray.flipSelf() { + var i = 0 + var j = size - 1 + while (i < j) { + val x = this[i] + this[i++] = this[j] + this[j--] = x + } +} + +fun ByteArray.flip(): ByteArray = copyOf().also { it.flipSelf() } diff --git a/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt b/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt new file mode 100644 index 0000000..8ff7623 --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/smartint.kt @@ -0,0 +1,150 @@ +@file:OptIn(ExperimentalUnsignedTypes::class) + +package com.icodici.ubdata + +import net.sergeych.bintools.ArrayDataSink +import net.sergeych.bintools.DataSink +import net.sergeych.bintools.DataSource +import net.sergeych.bintools.toDataSource + +/** + * Smart variable-length long encoding tools, async. + * + * | Bytes sz | varint bits | smartint bits | + * |:-----:|:------:|:---------:| + * | 1 | 7 | 6 | + * | 2 | 14 | 14 | + * | 3 | 21 | 22 | + * | 4 | 28 | 29 | + * | 5 | 35 | 36 | + * | 6+ | 7*N | 7*N+1 | + * | 9 | 63 | 64 | + * | 10 | 64 | --- | + * + * In other words, except for very small numbers smartint + * gives 1 bit gain. So, full sized 64 bits with smartint takes + * 9 bytes, while varint needs 10. This could be important. + * + * Encoding is the following: + * + * Byte 0: bits 0..1 : type + * bits 2..7 : v0 + * + * Then depending on the type: + * + * type = 0: + * v0 is the resul 0..64 (or -32..32) + * + * type = 1: + * v0, v1 is the result, 14 bits + * + * type = 2: + * v0, v1, v2 are the result, 22bits + * + * type = 3: + * v0, v1, v2, varint encoded + * + * Varint encodes bytes with last bit reserved as end + * flag and first 7 bits are data bits. Last bit 0 means end of the + * sequence. + * + */ +object Smartint { + + private val v0limit: ULong = (1L shl 6).toULong() + private val v1limit = (1L shl 14).toULong() + private val v2limit = (1L shl 22).toULong() + + fun encode(value: ULong, sink: DataSink) { + when { + value < v0limit -> encodeSeq(sink, 0, value) + + value < v1limit -> encodeSeq( + sink, + 1, + value and 0x3Fu, + value shr 6 + ) + + value < v2limit -> encodeSeq( + sink, + 2, + value and 0x3Fu, + (value shr 6) and 0xFFu, + (value shr 14) and 0xFFu, + ) + + else -> { + encodeSeq( + sink, + 3, + value and 0x3Fu, + (value shr 6) and 0xFFu, + (value shr 14) and 0xFFu, + ) + Varint.encode(value shr 22, sink) + } + } + } + + private fun encodeSeq(sink: DataSink, type: Int, vararg bytes: ULong) { + if (bytes.size == 0) + sink.writeByte(0) + else { + if (bytes[0] > v0limit) throw IllegalArgumentException("first number is too big") + sink.writeUByte(((type and 0x03) or (bytes[0] shl 2).toInt()).toUByte()) + for (x in bytes.drop(1)) + sink.writeUByte(x.toUByte()) + } + } + + fun decode(source: DataSource): ULong { + fun get(): ULong = source.readUByte().toULong() + val first = get().toUInt() + var type = (first and 3u).toInt() + + var result: ULong = first.toULong() shr 2 + if (type-- == 0) return result // type 0 + + result = result or (get() shl 6) + if (type-- == 0) return result // type 1 + + result = result or (get() shl 14) + if (type == 0) return result // type 2 + + return result or (Varint.decode(source) shl 22) + } + + fun encodeSigned(value: Long, sink: DataSink) { + val sigBit: ULong + val x: ULong + if (value < 0) { + x = (-value).toULong() + sigBit = 1u + } else { + x = value.toULong() + sigBit = 0u + } + Varint.encode((x shl 1) or sigBit, sink) + } + + fun decodeSigned(source: DataSource): Long { + val x = Varint.decode(source) + val result = (x shr 1).toLong() + return if ((x and 1u).toInt() != 0) -result else result + } + + fun encode(value: ULong): ByteArray { + return ArrayDataSink().also { encode(value, it) }.toByteArray() + } + + fun decode(packed: ByteArray) = decode(packed.toDataSource()) + + fun encodeSigned(value: Long): ByteArray { + return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray() + } + + fun decodeSigned(data: ByteArray): Long { + return decodeSigned(data.toDataSource()) + } +} diff --git a/src/commonMain/kotlin/net.sergeych.bintools/varint.kt b/src/commonMain/kotlin/net.sergeych.bintools/varint.kt new file mode 100644 index 0000000..7869533 --- /dev/null +++ b/src/commonMain/kotlin/net.sergeych.bintools/varint.kt @@ -0,0 +1,82 @@ +@file:OptIn(ExperimentalUnsignedTypes::class) + +package com.icodici.ubdata + +import net.sergeych.bintools.ArrayDataSink +import net.sergeych.bintools.DataSink +import net.sergeych.bintools.DataSource +import net.sergeych.bintools.toDataSource + +/** + * Variable-length long integer encoding. the MSB (0x80) bit of each byte flags + * that it is not the last one, and all ncecssary bits are encoded with 7-bit + * portions, LSB to MSB (big endian of sorts). + * + * There is slower but more compact encoding variant, [Smartint] that is better when + * encoding numbers that needs more than 22 bits. With smaller numbers its either + * same or even worse, see [Smartint] docs. + */ +object Varint { + fun encodeSigned(value: Long, sink: DataSink): Unit { + var sigBit: ULong + var x: ULong + if (value < 0) { + x = (-value).toULong() + sigBit = 1u + } else { + x = value.toULong() + sigBit = 0u + } + encode((x shl 1) or sigBit, sink) + } + + fun encode(value: ULong, dout: DataSink) { + var rest = value + do { + val x = (rest and 127u).toInt() + rest = rest shr 7 + if (rest > 0u) + dout.writeByte(x or 0x80) + else + dout.writeByte(x) + + } while (rest > 0u) + } + + fun decode(source: DataSource): ULong { + var result: ULong = 0u + var count = 0 + while (true) { + val x = source.readUByte().toInt() + result = result or ((x and 0x7F).toULong() shl count) + if ((x and 0x80) == 0) + break + count += 7 + } + return result + } + + + fun decodeSigned(source: DataSource): Long { + val x = decode(source) + val result = (x shr 1).toLong() + return if ((x and 1u).toInt() != 0) -result else result + } + + fun encode(value: ULong): ByteArray { + return ArrayDataSink().also { encode(value, it) }.toByteArray() + } + + fun decode(packed: ByteArray) = decode(packed.toDataSource()) + + fun encodeSigned(value: Long): ByteArray { + return ArrayDataSink().also { encodeSigned(value, it) }.toByteArray() + } + + fun decodeSigned(data: ByteArray): Long { + return decodeSigned(data.toDataSource()) + } + + + +} diff --git a/src/commonTest/kotlin/bintools/SmartintTest.kt b/src/commonTest/kotlin/bintools/SmartintTest.kt new file mode 100644 index 0000000..802faa6 --- /dev/null +++ b/src/commonTest/kotlin/bintools/SmartintTest.kt @@ -0,0 +1,44 @@ +package bintools + +import com.icodici.ubdata.Smartint +import net.sergeych.bintools.encodeToHex +import kotlin.test.Test +import kotlin.test.assertEquals + +class SmartintTest { + + fun testValue(x: Long) { + assertEquals(x.toULong(), Smartint.decode(Smartint.encode(x.toULong()))) + assertEquals(x, Smartint.decodeSigned(Smartint.encodeSigned(x))) + println("+ ${x}: ${Smartint.encode(x.toULong()).encodeToHex()}") + } + + fun testAround(bits: Int) { + println("----- $bits -----") + val window = 5 + var median: Long = (1.toULong() shl bits).toLong() + for( x in (median-2)..(median+2)) { + testValue(x) + } + median = median * 3 / 2 + for( x in (median-5)..(median+5)) { + testValue(x) + } + } + + @Test + fun encode() { + for( i in 0..70) testValue(i.toLong()) + testAround(6) + testAround(14) + testAround(22) + testAround(29) + testAround(36) + testValue(-1) +// testAround(28) + } + + @Test + fun decode() { + } +} \ No newline at end of file diff --git a/src/commonTest/kotlin/bintools/VarintTest.kt b/src/commonTest/kotlin/bintools/VarintTest.kt new file mode 100644 index 0000000..8a7de1d --- /dev/null +++ b/src/commonTest/kotlin/bintools/VarintTest.kt @@ -0,0 +1,40 @@ +package bintools + +import com.icodici.ubdata.Varint +import net.sergeych.bintools.encodeToHex +import kotlin.test.Test +import kotlin.test.assertEquals + +class VarintTest { + + fun testValue(x: Long) { + assertEquals(x.toULong(), Varint.decode(Varint.encode(x.toULong()))) + assertEquals(x, Varint.decodeSigned(Varint.encodeSigned(x))) + println("+ ${x}: ${Varint.encode(x.toULong()).encodeToHex()}") + } + + fun testAround(bits: Int) { + val window = 5 + var median: Long = (1.toULong() shl bits).toLong() + for( x in (median-5)..(median+5)) { + testValue(x) + } + median = median * 3 / 2 + for( x in (median-5)..(median+5)) { + testValue(x) + } + } + + @Test + fun encode() { +// for( i in 0..300) testValue(i.toLong()) + testAround(7) + testAround(14) + testAround(21) + testAround(28) + } + + @Test + fun decode() { + } +} \ No newline at end of file