dokka docs added
This commit is contained in:
parent
2cd68276d4
commit
61c0b7b0f8
104
README.md
104
README.md
@ -1,17 +1,99 @@
|
|||||||
# Binary tools
|
# Binary tools and BiPack serializer
|
||||||
|
|
||||||
> Work in progress not ready to any practical use
|
> beta version
|
||||||
|
|
||||||
Goals:
|
Multiplatform binary tools collection, including portable serialization of the compact and fast [Bipack] format, that works well also in the browser and in native targets.
|
||||||
|
|
||||||
- Efficient binary serialization
|
# Usage
|
||||||
- Pack/unpack byte tools
|
|
||||||
- 2 flavors of space-effecient varint packing
|
|
||||||
- CRCs and CRC-protected blocks
|
|
||||||
- Async variants
|
|
||||||
|
|
||||||
# Some publics
|
TODO: specify maven: how?
|
||||||
|
|
||||||
## SmartInt codec
|
# Bipack
|
||||||
|
|
||||||
Variable-length signed and unsigned integer codec, see [src:Smartint]. For not too small numbers it is slightly more effective than `VarInt` codec, for example on `Long` values it saves a byte.
|
## Why?
|
||||||
|
|
||||||
|
Bipack is a compact and efficiten binary serialization library (and format) was designed with the following main goals:
|
||||||
|
|
||||||
|
### - be as compact as possible
|
||||||
|
|
||||||
|
For this reason it is a binary notation, it uses binary form for decimal numbers and can use variery of encoding for
|
||||||
|
integers:
|
||||||
|
|
||||||
|
#### Varint
|
||||||
|
|
||||||
|
Variable-length compact encoding is used internally in some cases. It uses a 0x80 bit in every byte to mark coninuation.
|
||||||
|
See `object Varint`.
|
||||||
|
|
||||||
|
#### Smartint
|
||||||
|
|
||||||
|
Variable-length compact encoding for signed and unsigned integers use as few bytes as possible to encode integers. It is
|
||||||
|
used automatically when serializing integers. It is slightly more sophisticated than straight `Varint`.
|
||||||
|
|
||||||
|
### - do not reveal information about stored data
|
||||||
|
|
||||||
|
Many extendable formats, like JSON, BSON, BOSS and may others are keeping data in key-value pairs. While it is good in
|
||||||
|
many aspets, it has a clear disadvantages: it uses more space and it reveals inner data structure to the world. It is
|
||||||
|
possible to unpack such formats with zero information about inner structure.
|
||||||
|
|
||||||
|
Bipack does not store field names, so it is not possible to unpack or interpret it without knowledge of the data
|
||||||
|
structure. Only probablistic analysis. Let's not make life of attacker easier :)
|
||||||
|
|
||||||
|
### - allow upgrading data structures with backward compatibility
|
||||||
|
|
||||||
|
The dark side of serialization formats of this kind is that you can't change the structures without either loosing
|
||||||
|
backward compatibility with already serialzied data or using volumous boilerplate code to implement some sort of
|
||||||
|
versioning.
|
||||||
|
|
||||||
|
Not to waste space and reveal more information that needed Bipack allows extending classes marked as [@Extendable] to be
|
||||||
|
extended with more data _appended to the end of list of fields with required defaul values_. For such classes Bipack
|
||||||
|
stores number of actually serialized fields and atuomatically uses default values for non-serialized ones when unpacking
|
||||||
|
old data.
|
||||||
|
|
||||||
|
### - protect data with framing and CRC
|
||||||
|
|
||||||
|
When needed, serialization lobrary allow to store/check CRC32 tag of the structure name with `@Framed` (can be overriden
|
||||||
|
as usual with `@SerialName`), or be followed with CRC32 of the serialized binary data, that will be checked on
|
||||||
|
deserialization, using `@CrcProtected`. This allows to check the data consistency out of the box and only where needed.
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
Use kotlinx serializatino as usual. There are following Bipack-specific annotation at your service. All class annotations could be combined.
|
||||||
|
|
||||||
|
## @Extendable
|
||||||
|
|
||||||
|
Classes marked this way store number of fields. It allows to add to the class data more fields, to the end of list, with
|
||||||
|
default initializers, keeping backward compatibility. For example if you have serialized:
|
||||||
|
|
||||||
|
```kotlin
|
||||||
|
@Serializable
|
||||||
|
@Extendable
|
||||||
|
data class foo(val i: Int)
|
||||||
|
```
|
||||||
|
|
||||||
|
and then decided to add a field:
|
||||||
|
|
||||||
|
```kotlin
|
||||||
|
@Serializable
|
||||||
|
@Extendable
|
||||||
|
data class foo(val i: Int, val bar: String = "buzz")
|
||||||
|
```
|
||||||
|
|
||||||
|
It adds 1 or more bytes to the serialized data (field counts in `Varint` format)
|
||||||
|
|
||||||
|
Bipack will properly deserialize the data serialzied for an old version.
|
||||||
|
|
||||||
|
## @CrcProtected
|
||||||
|
|
||||||
|
Bipack will calculate and store CRC32 of serialized data at the end, and automatically check it on deserializing throwing `InvalidFrameCRCException` if it does not match.
|
||||||
|
|
||||||
|
It adds 4 bytes to the serialized data.
|
||||||
|
|
||||||
|
## @Framed
|
||||||
|
|
||||||
|
Put the CRC32 of the serializing class name (`@SerialName` allows to change it as usual) and checks it on deserializing. Throws `InvalidFrameHeaderException` if it does not match.
|
||||||
|
|
||||||
|
It adds 4 bytes to the serialized data.
|
||||||
|
|
||||||
|
## @Unisgned
|
||||||
|
|
||||||
|
This __field annontation__ allows to store __integer fields__ of any size more compact by not saving the sign. Could be applyed to both signed and unsigned integers of any size.
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
plugins {
|
plugins {
|
||||||
kotlin("multiplatform") version "1.8.10"
|
kotlin("multiplatform") version "1.8.10"
|
||||||
kotlin("plugin.serialization") version "1.8.10"
|
kotlin("plugin.serialization") version "1.8.10"
|
||||||
|
id("org.jetbrains.dokka") version "1.6.0"
|
||||||
`maven-publish`
|
`maven-publish`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val serialization_version = "1.3.4"
|
||||||
|
|
||||||
group = "net.sergeych"
|
group = "net.sergeych"
|
||||||
version = "0.0.1-SNAPSHOT"
|
version = "0.0.1-SNAPSHOT"
|
||||||
|
|
||||||
@ -47,7 +50,9 @@ kotlin {
|
|||||||
val commonMain by getting {
|
val commonMain by getting {
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4")
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4")
|
||||||
api("net.sergeych:mp_stools:[1.3.3,)")
|
// this is actually a bug: we need only the core, but bare core causes strange errors
|
||||||
|
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0")
|
||||||
|
// api("net.sergeych:mp_stools:[1.3.3,)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val commonTest by getting {
|
val commonTest by getting {
|
||||||
@ -82,3 +87,13 @@ kotlin {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tasks.dokkaHtml.configure {
|
||||||
|
outputDirectory.set(buildDir.resolve("dokka"))
|
||||||
|
dokkaSourceSets {
|
||||||
|
configureEach {
|
||||||
|
includes.from("docs/bipack.md")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -1,89 +1,19 @@
|
|||||||
# Bipack: compact binary serialization
|
# Module mp_bintools
|
||||||
|
|
||||||
## Why?
|
This library contains a `Bipack` binary format serializer, see [net.sergeych.bipack.BipackEncoder] and [net.sergeych.bipack.BipackDecoder]. Also, there are many general-purpose utilities that simplify binary data processing, see package [net.sergeych.bintools] below.
|
||||||
|
|
||||||
Bipack was designed with the following main goals:
|
|
||||||
|
|
||||||
### Be as compact as possible
|
# Package net.sergeych.bipack
|
||||||
|
|
||||||
For this reason it is a binary notation, it uses binary form for decimal numbers and can use variery of encoding for
|
Bipack ais a common kotlinx serializer that works pretty much like any other `kotlinx.serialization` format. You just mark your class as `@Serializable` and it does the rest.
|
||||||
integers:
|
|
||||||
|
|
||||||
#### Varint
|
- [BipackEncoder] to serializes anything to bipack format.
|
||||||
|
- [BpackDecoder] deserializes from bipack back.
|
||||||
|
|
||||||
Variable-length compact encoding is used internally in some cases. It uses a 0x80 bit in every byte to mark coninuation.
|
There are also special annotation to fine tune the format: [Extendable], [Framed], [CrcProtected] for classes and [Unsigned] for integer data fields.
|
||||||
See `object Varint`.
|
|
||||||
|
|
||||||
#### Smartint
|
# Package net.sergeych.bintools
|
||||||
|
|
||||||
Variable-length compact encoding for signed and unsigned integers use as few bytes as possible to encode integers. It is
|
General-purpose binary tools: encoding to bytes, hex, binary dumps. variable length integer, ect. Most of it is used internally by bipack serializers, see [net.sergeych.bipack] for details.
|
||||||
used automatically when serializing integers. It is slightly more sophisticated than straight `Varint`.
|
|
||||||
|
|
||||||
### Do not reveal information about stored data
|
In particular, see [Varint] and [Smartint] variable-length compact integer codecs and also [DataSource] and [DataSink] multiplatform synchronous read/write interfaces.
|
||||||
|
|
||||||
Many extendable formats, like JSON, BSON, BOSS and may others are keeping data in key-value pairs. While it is good in
|
|
||||||
many aspets, it has a clear disadvantages: it uses more space and it reveals inner data structure to the world. It is
|
|
||||||
possible to unpack such formats with zero information about inner structure.
|
|
||||||
|
|
||||||
Bipack does not store field names, so it is not possible to unpack or interpret it without knowledge of the data
|
|
||||||
structure. Only probablistic analysis. Let's not make life of attacker easier :)
|
|
||||||
|
|
||||||
### Allow upgrading data structures with backward compatibility
|
|
||||||
|
|
||||||
The dark side of serialization formats of this kind is that you can't change the structures without either loosing
|
|
||||||
backward compatibility with already serialzied data or using volumous boilerplate code to implement some sort of
|
|
||||||
versioning.
|
|
||||||
|
|
||||||
Not to waste space and reveal more information that needed Bipack allows extending classes marked as [@Extendable] to be
|
|
||||||
extended with more data _appended to the end of list of fields with required defaul values_. For such classes Bipack
|
|
||||||
stores number of actually serialized fields and atuomatically uses default values for non-serialized ones when unpacking
|
|
||||||
old data.
|
|
||||||
|
|
||||||
### Protect data with framing and CRC
|
|
||||||
|
|
||||||
When needed, serialization lobrary allow to store/check CRC32 tag of the structure name with `@Framed` (can be overriden
|
|
||||||
as usual with `@SerialName`), or be followed with CRC32 of the serialized binary data, that will be checked on
|
|
||||||
deserialization, using `@CrcProtected`. This allows to check the data consistency out of the box and only where needed.
|
|
||||||
|
|
||||||
# Usage
|
|
||||||
|
|
||||||
Use kotlinx serializatino as usual. There are following Bipack-specific annotation at your service. All class annotations could be combined.
|
|
||||||
|
|
||||||
## @Extendable
|
|
||||||
|
|
||||||
Classes marked this way store number of fields. It allows to add to the class data more fields, to the end of list, with
|
|
||||||
default initializers, keeping backward compatibility. For example if you have serialized:
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
@Serializable
|
|
||||||
@Extendable
|
|
||||||
data class foo(i: Int)
|
|
||||||
```
|
|
||||||
|
|
||||||
and then decided to add a field:
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
@Serializable
|
|
||||||
@Extendable
|
|
||||||
data class foo(val i: Int, bar: String = "buzz")
|
|
||||||
```
|
|
||||||
|
|
||||||
It adds 1 or more bytes to the serialized data (field counts in `Varint` format)
|
|
||||||
|
|
||||||
Bipack will properly deserialize the data serialzied for an old version.
|
|
||||||
|
|
||||||
## @CrcProtected
|
|
||||||
|
|
||||||
Bipack will calculate and store CRC32 of serialized data at the end, and automatically check it on deserializing throwing `InvalidFrameCRCException` if it does not match.
|
|
||||||
|
|
||||||
It adds 4 bytes to the serialized data.
|
|
||||||
|
|
||||||
## @Framed
|
|
||||||
|
|
||||||
Put the CRC32 of the serializing class name (`@SerialName` allows to change it as usual) and checks it on deserializing. Throws `InvalidFrameHeaderException` if it does not match.
|
|
||||||
|
|
||||||
It adds 4 bytes to the serialized data.
|
|
||||||
|
|
||||||
## @Unisgned
|
|
||||||
|
|
||||||
This __field annontation__ allows to store __integer fields__ of any size more compact by not saving the sign. Could be applyed to both signed and unsigned integers of any size.
|
|
@ -52,11 +52,6 @@
|
|||||||
"@jridgewell/resolve-uri" "3.1.0"
|
"@jridgewell/resolve-uri" "3.1.0"
|
||||||
"@jridgewell/sourcemap-codec" "1.4.14"
|
"@jridgewell/sourcemap-codec" "1.4.14"
|
||||||
|
|
||||||
"@js-joda/core@3.2.0":
|
|
||||||
version "3.2.0"
|
|
||||||
resolved "https://registry.yarnpkg.com/@js-joda/core/-/core-3.2.0.tgz#3e61e21b7b2b8a6be746df1335cf91d70db2a273"
|
|
||||||
integrity sha512-PMqgJ0sw5B7FKb2d5bWYIoxjri+QlW/Pys7+Rw82jSH0QN3rB05jZ/VrrsUdh1w4+i2kw9JOejXGq/KhDOX7Kg==
|
|
||||||
|
|
||||||
"@socket.io/component-emitter@~3.1.0":
|
"@socket.io/component-emitter@~3.1.0":
|
||||||
version "3.1.0"
|
version "3.1.0"
|
||||||
resolved "https://registry.yarnpkg.com/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz#96116f2a912e0c02817345b3c10751069920d553"
|
resolved "https://registry.yarnpkg.com/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz#96116f2a912e0c02817345b3c10751069920d553"
|
||||||
@ -603,14 +598,6 @@ dom-serialize@^2.2.1:
|
|||||||
extend "^3.0.0"
|
extend "^3.0.0"
|
||||||
void-elements "^2.0.0"
|
void-elements "^2.0.0"
|
||||||
|
|
||||||
dukat@0.5.8-rc.4:
|
|
||||||
version "0.5.8-rc.4"
|
|
||||||
resolved "https://registry.yarnpkg.com/dukat/-/dukat-0.5.8-rc.4.tgz#90384dcb50b14c26f0e99dae92b2dea44f5fce21"
|
|
||||||
integrity sha512-ZnMt6DGBjlVgK2uQamXfd7uP/AxH7RqI0BL9GLrrJb2gKdDxvJChWy+M9AQEaL+7/6TmxzJxFOsRiInY9oGWTA==
|
|
||||||
dependencies:
|
|
||||||
google-protobuf "3.12.2"
|
|
||||||
typescript "3.9.5"
|
|
||||||
|
|
||||||
ee-first@1.1.1:
|
ee-first@1.1.1:
|
||||||
version "1.1.1"
|
version "1.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
|
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
|
||||||
@ -875,11 +862,6 @@ glob@^7.1.3, glob@^7.1.7:
|
|||||||
once "^1.3.0"
|
once "^1.3.0"
|
||||||
path-is-absolute "^1.0.0"
|
path-is-absolute "^1.0.0"
|
||||||
|
|
||||||
google-protobuf@3.12.2:
|
|
||||||
version "3.12.2"
|
|
||||||
resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.12.2.tgz#50ce9f9b6281235724eb243d6a83e969a2176e53"
|
|
||||||
integrity sha512-4CZhpuRr1d6HjlyrxoXoocoGFnRYgKULgMtikMddA9ztRyYR59Aondv2FioyxWVamRo0rF2XpYawkTCBEQOSkA==
|
|
||||||
|
|
||||||
graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9:
|
graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9:
|
||||||
version "4.2.10"
|
version "4.2.10"
|
||||||
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c"
|
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c"
|
||||||
@ -1729,11 +1711,6 @@ type-is@~1.6.18:
|
|||||||
media-typer "0.3.0"
|
media-typer "0.3.0"
|
||||||
mime-types "~2.1.24"
|
mime-types "~2.1.24"
|
||||||
|
|
||||||
typescript@3.9.5:
|
|
||||||
version "3.9.5"
|
|
||||||
resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.9.5.tgz#586f0dba300cde8be52dd1ac4f7e1009c1b13f36"
|
|
||||||
integrity sha512-hSAifV3k+i6lEoCJ2k6R2Z/rp/H3+8sdmcn5NrS3/3kE7+RyZXm9aqvxWqjEXHAd8b0pShatpcdMTvEdvAJltQ==
|
|
||||||
|
|
||||||
ua-parser-js@^0.7.30:
|
ua-parser-js@^0.7.30:
|
||||||
version "0.7.33"
|
version "0.7.33"
|
||||||
resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.33.tgz#1d04acb4ccef9293df6f70f2c3d22f3030d8b532"
|
resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.33.tgz#1d04acb4ccef9293df6f70f2c3d22f3030d8b532"
|
||||||
|
Loading…
Reference in New Issue
Block a user