diff --git a/README.md b/README.md index 978a4ea..8280d70 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,99 @@ -# Binary tools +# Binary tools and BiPack serializer -> Work in progress not ready to any practical use +> beta version -Goals: +Multiplatform binary tools collection, including portable serialization of the compact and fast [Bipack] format, that works well also in the browser and in native targets. -- Efficient binary serialization -- Pack/unpack byte tools -- 2 flavors of space-effecient varint packing -- CRCs and CRC-protected blocks -- Async variants +# Usage -# Some publics +TODO: specify maven: how? -## SmartInt codec +# Bipack -Variable-length signed and unsigned integer codec, see [src:Smartint]. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte. \ No newline at end of file +## Why? + +Bipack is a compact and efficiten binary serialization library (and format) was designed with the following main goals: + +### - be as compact as possible + +For this reason it is a binary notation, it uses binary form for decimal numbers and can use variery of encoding for +integers: + +#### Varint + +Variable-length compact encoding is used internally in some cases. It uses a 0x80 bit in every byte to mark coninuation. +See `object Varint`. + +#### Smartint + +Variable-length compact encoding for signed and unsigned integers use as few bytes as possible to encode integers. It is +used automatically when serializing integers. It is slightly more sophisticated than straight `Varint`. + +### - do not reveal information about stored data + +Many extendable formats, like JSON, BSON, BOSS and may others are keeping data in key-value pairs. While it is good in +many aspets, it has a clear disadvantages: it uses more space and it reveals inner data structure to the world. It is +possible to unpack such formats with zero information about inner structure. + +Bipack does not store field names, so it is not possible to unpack or interpret it without knowledge of the data +structure. Only probablistic analysis. Let's not make life of attacker easier :) + +### - allow upgrading data structures with backward compatibility + +The dark side of serialization formats of this kind is that you can't change the structures without either loosing +backward compatibility with already serialzied data or using volumous boilerplate code to implement some sort of +versioning. + +Not to waste space and reveal more information that needed Bipack allows extending classes marked as [@Extendable] to be +extended with more data _appended to the end of list of fields with required defaul values_. For such classes Bipack +stores number of actually serialized fields and atuomatically uses default values for non-serialized ones when unpacking +old data. + +### - protect data with framing and CRC + +When needed, serialization lobrary allow to store/check CRC32 tag of the structure name with `@Framed` (can be overriden +as usual with `@SerialName`), or be followed with CRC32 of the serialized binary data, that will be checked on +deserialization, using `@CrcProtected`. This allows to check the data consistency out of the box and only where needed. + +# Usage + +Use kotlinx serializatino as usual. There are following Bipack-specific annotation at your service. All class annotations could be combined. + +## @Extendable + +Classes marked this way store number of fields. It allows to add to the class data more fields, to the end of list, with +default initializers, keeping backward compatibility. For example if you have serialized: + +```kotlin +@Serializable +@Extendable +data class foo(val i: Int) +``` + +and then decided to add a field: + +```kotlin +@Serializable +@Extendable +data class foo(val i: Int, val bar: String = "buzz") +``` + +It adds 1 or more bytes to the serialized data (field counts in `Varint` format) + +Bipack will properly deserialize the data serialzied for an old version. + +## @CrcProtected + +Bipack will calculate and store CRC32 of serialized data at the end, and automatically check it on deserializing throwing `InvalidFrameCRCException` if it does not match. + +It adds 4 bytes to the serialized data. + +## @Framed + +Put the CRC32 of the serializing class name (`@SerialName` allows to change it as usual) and checks it on deserializing. Throws `InvalidFrameHeaderException` if it does not match. + +It adds 4 bytes to the serialized data. + +## @Unisgned + +This __field annontation__ allows to store __integer fields__ of any size more compact by not saving the sign. Could be applyed to both signed and unsigned integers of any size. diff --git a/build.gradle.kts b/build.gradle.kts index bcacbb0..6f32265 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,9 +1,12 @@ plugins { kotlin("multiplatform") version "1.8.10" kotlin("plugin.serialization") version "1.8.10" + id("org.jetbrains.dokka") version "1.6.0" `maven-publish` } +val serialization_version = "1.3.4" + group = "net.sergeych" version = "0.0.1-SNAPSHOT" @@ -47,7 +50,9 @@ kotlin { val commonMain by getting { dependencies { implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4") - api("net.sergeych:mp_stools:[1.3.3,)") + // this is actually a bug: we need only the core, but bare core causes strange errors + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0") +// api("net.sergeych:mp_stools:[1.3.3,)") } } val commonTest by getting { @@ -82,3 +87,13 @@ kotlin { } } + +tasks.dokkaHtml.configure { + outputDirectory.set(buildDir.resolve("dokka")) + dokkaSourceSets { + configureEach { + includes.from("docs/bipack.md") + } + } +} + diff --git a/docs/bipack.md b/docs/bipack.md index 1036cb1..6500693 100644 --- a/docs/bipack.md +++ b/docs/bipack.md @@ -1,89 +1,19 @@ -# Bipack: compact binary serialization +# Module mp_bintools -## Why? +This library contains a `Bipack` binary format serializer, see [net.sergeych.bipack.BipackEncoder] and [net.sergeych.bipack.BipackDecoder]. Also, there are many general-purpose utilities that simplify binary data processing, see package [net.sergeych.bintools] below. -Bipack was designed with the following main goals: -### Be as compact as possible +# Package net.sergeych.bipack -For this reason it is a binary notation, it uses binary form for decimal numbers and can use variery of encoding for -integers: +Bipack ais a common kotlinx serializer that works pretty much like any other `kotlinx.serialization` format. You just mark your class as `@Serializable` and it does the rest. -#### Varint +- [BipackEncoder] to serializes anything to bipack format. +- [BpackDecoder] deserializes from bipack back. -Variable-length compact encoding is used internally in some cases. It uses a 0x80 bit in every byte to mark coninuation. -See `object Varint`. +There are also special annotation to fine tune the format: [Extendable], [Framed], [CrcProtected] for classes and [Unsigned] for integer data fields. -#### Smartint +# Package net.sergeych.bintools -Variable-length compact encoding for signed and unsigned integers use as few bytes as possible to encode integers. It is -used automatically when serializing integers. It is slightly more sophisticated than straight `Varint`. +General-purpose binary tools: encoding to bytes, hex, binary dumps. variable length integer, ect. Most of it is used internally by bipack serializers, see [net.sergeych.bipack] for details. -### Do not reveal information about stored data - -Many extendable formats, like JSON, BSON, BOSS and may others are keeping data in key-value pairs. While it is good in -many aspets, it has a clear disadvantages: it uses more space and it reveals inner data structure to the world. It is -possible to unpack such formats with zero information about inner structure. - -Bipack does not store field names, so it is not possible to unpack or interpret it without knowledge of the data -structure. Only probablistic analysis. Let's not make life of attacker easier :) - -### Allow upgrading data structures with backward compatibility - -The dark side of serialization formats of this kind is that you can't change the structures without either loosing -backward compatibility with already serialzied data or using volumous boilerplate code to implement some sort of -versioning. - -Not to waste space and reveal more information that needed Bipack allows extending classes marked as [@Extendable] to be -extended with more data _appended to the end of list of fields with required defaul values_. For such classes Bipack -stores number of actually serialized fields and atuomatically uses default values for non-serialized ones when unpacking -old data. - -### Protect data with framing and CRC - -When needed, serialization lobrary allow to store/check CRC32 tag of the structure name with `@Framed` (can be overriden -as usual with `@SerialName`), or be followed with CRC32 of the serialized binary data, that will be checked on -deserialization, using `@CrcProtected`. This allows to check the data consistency out of the box and only where needed. - -# Usage - -Use kotlinx serializatino as usual. There are following Bipack-specific annotation at your service. All class annotations could be combined. - -## @Extendable - -Classes marked this way store number of fields. It allows to add to the class data more fields, to the end of list, with -default initializers, keeping backward compatibility. For example if you have serialized: - -```kotlin -@Serializable -@Extendable -data class foo(i: Int) -``` - -and then decided to add a field: - -```kotlin -@Serializable -@Extendable -data class foo(val i: Int, bar: String = "buzz") -``` - -It adds 1 or more bytes to the serialized data (field counts in `Varint` format) - -Bipack will properly deserialize the data serialzied for an old version. - -## @CrcProtected - -Bipack will calculate and store CRC32 of serialized data at the end, and automatically check it on deserializing throwing `InvalidFrameCRCException` if it does not match. - -It adds 4 bytes to the serialized data. - -## @Framed - -Put the CRC32 of the serializing class name (`@SerialName` allows to change it as usual) and checks it on deserializing. Throws `InvalidFrameHeaderException` if it does not match. - -It adds 4 bytes to the serialized data. - -## @Unisgned - -This __field annontation__ allows to store __integer fields__ of any size more compact by not saving the sign. Could be applyed to both signed and unsigned integers of any size. +In particular, see [Varint] and [Smartint] variable-length compact integer codecs and also [DataSource] and [DataSink] multiplatform synchronous read/write interfaces. \ No newline at end of file diff --git a/kotlin-js-store/yarn.lock b/kotlin-js-store/yarn.lock index 62dc1b0..a6a9476 100644 --- a/kotlin-js-store/yarn.lock +++ b/kotlin-js-store/yarn.lock @@ -52,11 +52,6 @@ "@jridgewell/resolve-uri" "3.1.0" "@jridgewell/sourcemap-codec" "1.4.14" -"@js-joda/core@3.2.0": - version "3.2.0" - resolved "https://registry.yarnpkg.com/@js-joda/core/-/core-3.2.0.tgz#3e61e21b7b2b8a6be746df1335cf91d70db2a273" - integrity sha512-PMqgJ0sw5B7FKb2d5bWYIoxjri+QlW/Pys7+Rw82jSH0QN3rB05jZ/VrrsUdh1w4+i2kw9JOejXGq/KhDOX7Kg== - "@socket.io/component-emitter@~3.1.0": version "3.1.0" resolved "https://registry.yarnpkg.com/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz#96116f2a912e0c02817345b3c10751069920d553" @@ -603,14 +598,6 @@ dom-serialize@^2.2.1: extend "^3.0.0" void-elements "^2.0.0" -dukat@0.5.8-rc.4: - version "0.5.8-rc.4" - resolved "https://registry.yarnpkg.com/dukat/-/dukat-0.5.8-rc.4.tgz#90384dcb50b14c26f0e99dae92b2dea44f5fce21" - integrity sha512-ZnMt6DGBjlVgK2uQamXfd7uP/AxH7RqI0BL9GLrrJb2gKdDxvJChWy+M9AQEaL+7/6TmxzJxFOsRiInY9oGWTA== - dependencies: - google-protobuf "3.12.2" - typescript "3.9.5" - ee-first@1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d" @@ -875,11 +862,6 @@ glob@^7.1.3, glob@^7.1.7: once "^1.3.0" path-is-absolute "^1.0.0" -google-protobuf@3.12.2: - version "3.12.2" - resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.12.2.tgz#50ce9f9b6281235724eb243d6a83e969a2176e53" - integrity sha512-4CZhpuRr1d6HjlyrxoXoocoGFnRYgKULgMtikMddA9ztRyYR59Aondv2FioyxWVamRo0rF2XpYawkTCBEQOSkA== - graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9: version "4.2.10" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c" @@ -1729,11 +1711,6 @@ type-is@~1.6.18: media-typer "0.3.0" mime-types "~2.1.24" -typescript@3.9.5: - version "3.9.5" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.9.5.tgz#586f0dba300cde8be52dd1ac4f7e1009c1b13f36" - integrity sha512-hSAifV3k+i6lEoCJ2k6R2Z/rp/H3+8sdmcn5NrS3/3kE7+RyZXm9aqvxWqjEXHAd8b0pShatpcdMTvEdvAJltQ== - ua-parser-js@^0.7.30: version "0.7.33" resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.33.tgz#1d04acb4ccef9293df6f70f2c3d22f3030d8b532"