added ans fixed LOT of docs
This commit is contained in:
parent
e06d553ed7
commit
962fcc1ac8
@ -7,7 +7,7 @@ description = "binary size-effective format used in Divan smart contracts, wasm
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
#anyhow = "1.0"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
string-builder = "0.2.0"
|
string-builder = "0.2.0"
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ into_u64!(u8, u16, u32, usize, u64);
|
|||||||
|
|
||||||
/// Data sink to encode bipack binary format.
|
/// Data sink to encode bipack binary format.
|
||||||
///
|
///
|
||||||
/// To implement just override [put_u8] and optionally [put_fixed_bytes].
|
/// To implement just override [BipackSink::put_u8] and optionally [BipackSink::put_fixed_bytes].
|
||||||
///
|
///
|
||||||
/// Note that the sink is not returning errors, unlike [crate::bipack_source::BipackSource].
|
/// Note that the sink is not returning errors, unlike [crate::bipack_source::BipackSource].
|
||||||
/// It is supposed that the sink has unlimited
|
/// It is supposed that the sink has unlimited
|
||||||
@ -77,7 +77,7 @@ pub trait BipackSink {
|
|||||||
|
|
||||||
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
|
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
|
||||||
/// terms. This format is used to store size of variable-length binaries and strings.
|
/// terms. This format is used to store size of variable-length binaries and strings.
|
||||||
/// Use [BipackSource::unsigned()] to unpack it.
|
/// Use [crate::bipack_source::BipackSource::get_unsigned] to unpack it.
|
||||||
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
|
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
|
||||||
let value = number.into_u64();
|
let value = number.into_u64();
|
||||||
let mut encode_seq = |ty: u8, bytes: &[u64]| {
|
let mut encode_seq = |ty: u8, bytes: &[u64]| {
|
||||||
|
@ -4,18 +4,18 @@ use std::string::FromUtf8Error;
|
|||||||
use crate::bipack_source::BipackError::NoDataError;
|
use crate::bipack_source::BipackError::NoDataError;
|
||||||
|
|
||||||
/// Result of error-aware bipack function
|
/// Result of error-aware bipack function
|
||||||
pub(crate) type Result<T> = std::result::Result<T, BipackError>;
|
pub type Result<T> = std::result::Result<T, BipackError>;
|
||||||
|
|
||||||
/// There is not enought data to fulfill the request
|
/// There is not enought data to fulfill the request
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum BipackError {
|
pub enum BipackError {
|
||||||
NoDataError,
|
NoDataError,
|
||||||
BadEncoding(FromUtf8Error)
|
BadEncoding(FromUtf8Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for BipackError {
|
impl Display for BipackError {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f,"{:?}", self)
|
write!(f, "{:?}", self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,11 +24,11 @@ impl Error for BipackError {}
|
|||||||
|
|
||||||
/// Data source compatible with mp_bintools serialization. It supports
|
/// Data source compatible with mp_bintools serialization. It supports
|
||||||
/// fixed-size integers in right order and varint ans smartint encodings
|
/// fixed-size integers in right order and varint ans smartint encodings
|
||||||
/// separately. There is out of the box implementation for [Vec<u8>], and
|
/// separately. There is out of the box implementation for [`Vec<u8>`], and
|
||||||
/// it is easy to implements your own.
|
/// it is easy to implements your own.
|
||||||
///
|
///
|
||||||
/// To implement source for other type, implement just [u8()] or mayve also
|
/// To implement source for other type, implement just [BipackSource::get_u8] or mayve also
|
||||||
/// [fixed_bytes] for effectiveness.
|
/// [BipackSource::get_fixed_bytes] for effectiveness.
|
||||||
///
|
///
|
||||||
/// Unlike the [crate::bipack_sink::BipackSink] the source is returning errors. This is because
|
/// Unlike the [crate::bipack_sink::BipackSink] the source is returning errors. This is because
|
||||||
/// it often appears when reading data do not correspond to the packed one, and this is an often
|
/// it often appears when reading data do not correspond to the packed one, and this is an often
|
||||||
@ -71,7 +71,7 @@ pub trait BipackSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
|
/// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
|
||||||
/// using it directly; use [get_unsigned] instead.
|
/// using it directly; use [BipackSource::get_unsigned] instead.
|
||||||
fn get_varint_unsigned(self: &mut Self) -> Result<u64> {
|
fn get_varint_unsigned(self: &mut Self) -> Result<u64> {
|
||||||
let mut result = 0u64;
|
let mut result = 0u64;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
@ -83,15 +83,15 @@ pub trait BipackSource {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
|
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
|
||||||
/// as u16
|
/// [BipackSource::get_unsigned] as u16
|
||||||
fn get_packed_u16(self: &mut Self) -> Result<u16> {
|
fn get_packed_u16(self: &mut Self) -> Result<u16> {
|
||||||
Ok(self.get_unsigned()? as u16)
|
Ok(self.get_unsigned()? as u16)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// read 4-bytes unsigned value from the source
|
/// read 4-bytes unsigned value from the source
|
||||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
|
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
|
||||||
/// as u32
|
/// [BipackSource::get_unsigned] as u32.
|
||||||
fn get_packed_u32(self: &mut Self) -> Result<u32> { Ok(self.get_unsigned()? as u32) }
|
fn get_packed_u32(self: &mut Self) -> Result<u32> { Ok(self.get_unsigned()? as u32) }
|
||||||
|
|
||||||
/// read exact number of bytes from the source as a vec.
|
/// read exact number of bytes from the source as a vec.
|
||||||
@ -102,8 +102,9 @@ pub trait BipackSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Read variable-length byte array from the source (with packed size), created
|
/// Read variable-length byte array from the source (with packed size), created
|
||||||
/// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded
|
/// by [crate::bipack_sink::BipackSink::put_var_bytes] or
|
||||||
/// the same way as does [BipackSink::put_unsigned] and can be manually read by
|
/// [crate::bipack_sink::BipackSink::put_str]. The size is encoded the same way as does
|
||||||
|
/// [crate::bipack_sink::BipackSink::put_unsigned] and can be manually read by
|
||||||
/// [BipackSource::get_unsigned].
|
/// [BipackSource::get_unsigned].
|
||||||
fn var_bytes(self: &mut Self) -> Result<Vec<u8>> {
|
fn var_bytes(self: &mut Self) -> Result<Vec<u8>> {
|
||||||
let size = self.get_unsigned()? as usize;
|
let size = self.get_unsigned()? as usize;
|
||||||
@ -111,7 +112,7 @@ pub trait BipackSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// REad a variable length string from a source packed with
|
/// REad a variable length string from a source packed with
|
||||||
/// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded
|
/// [crate::bipack_sink::BipackSink::put_str]. It is a variable sized array fo utf8 encoded
|
||||||
/// characters.
|
/// characters.
|
||||||
fn str(self: &mut Self) -> Result<String> {
|
fn str(self: &mut Self) -> Result<String> {
|
||||||
String::from_utf8(
|
String::from_utf8(
|
||||||
@ -121,7 +122,7 @@ pub trait BipackSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The bipack source capable of extracting data from a slice.
|
/// The bipack source capable of extracting data from a slice.
|
||||||
/// use [SliceSource::from()] or [bipack_source()] to create one.
|
/// use [SliceSource::from()] to create one.
|
||||||
pub struct SliceSource<'a> {
|
pub struct SliceSource<'a> {
|
||||||
data: &'a [u8],
|
data: &'a [u8],
|
||||||
position: usize,
|
position: usize,
|
||||||
|
97
src/lib.rs
97
src/lib.rs
@ -1,4 +1,4 @@
|
|||||||
//! # pipack codec
|
//! # Bipack codec
|
||||||
//!
|
//!
|
||||||
//! The set of tools to effectively encode and decode bipack values. It is internationally
|
//! The set of tools to effectively encode and decode bipack values. It is internationally
|
||||||
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
|
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
|
||||||
@ -7,22 +7,107 @@
|
|||||||
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
|
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
|
||||||
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
|
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
|
||||||
//! method for the implementation.
|
//! method for the implementation.
|
||||||
//! - [bipack_sink::bipack_source]
|
//!
|
||||||
|
//! - [bipack_sink::BipackSink] trait that is also implemented for [`Vec<u8>`] allows to encode values
|
||||||
|
//! into the bipack format. It is the same simple to implement it for any else binary data
|
||||||
|
//! source.
|
||||||
|
//!
|
||||||
|
//! ## Utilities
|
||||||
|
//!
|
||||||
|
//! - to siplify encoding of unsigned ints the [bipack_sink::IntoU64] trait is used with
|
||||||
|
//! imlementation for usual u* types.
|
||||||
|
//!
|
||||||
|
//! - [tools::to_dump] utility function converts binary data into human-readable dump as in old goot
|
||||||
|
//! times (address, bytes, ASCII characters).
|
||||||
|
//!
|
||||||
|
//! ## About Bipack format
|
||||||
|
//!
|
||||||
|
//! This is a binary format created wround the idea of bit-effectiveness and not disclosing
|
||||||
|
//! inner data structure. Unlike many known binary and text formats, liek JSON, BSON, BOSS, and
|
||||||
|
//! many others, it does not includes field names into packed binaries.
|
||||||
|
//!
|
||||||
|
//! It also uses ratinally packed variable length format very effective for unsigned integers of
|
||||||
|
//! various sizes. This implementation supports sizes for u8, u16, u32 and u64. It is capable of
|
||||||
|
//! holding longer values too but for big numbers the fixed size encoding is mostly more effective.
|
||||||
|
//! This rarional encoding format is called `smartint` and is internally used everywhere when one
|
||||||
|
//! need to pack unsigned number, unless the fixed size is important.
|
||||||
|
//!
|
||||||
|
//! ### Varint encoding
|
||||||
|
//!
|
||||||
|
//! Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers,
|
||||||
|
//! so it is very useful when encoding big numbers or at least very bui long values. In other cases
|
||||||
|
//! [bipack_sink::BipackSink::put_unsigned] works faster, and extra bits it uses does not play
|
||||||
|
//!
|
||||||
|
//! | Bytes sz | varint bits | smartint bits |
|
||||||
|
//! |:-----:|:------:|:---------:|
|
||||||
|
//! | 1 | 7 | 6 |
|
||||||
|
//! | 2 | 14 | 14 |
|
||||||
|
//! | 3 | 21 | 22 |
|
||||||
|
//! | 4 | 28 | 29 |
|
||||||
|
//! | 5 | 35 | 36 |
|
||||||
|
//! | 6+ | 7*N | 7*N+1 |
|
||||||
|
//! | 9 | 63 | 64 |
|
||||||
|
//! | 10 | 64 | --- |
|
||||||
|
//!
|
||||||
|
//! In other words, except for very small numbers smartint
|
||||||
|
//! gives 1 data bit gain for the same packed byte size. For example,
|
||||||
|
//! full size 64 bits number with smartint takes one byte less (9 bytes vs. 10 in Varint).
|
||||||
|
//!
|
||||||
|
//! So, except for values in range 32..63 it gives same or better byte size effectiveness
|
||||||
|
//! than `Varint`. In particular:
|
||||||
|
//!
|
||||||
|
//! The effect of it could be interpreted as:
|
||||||
|
//!
|
||||||
|
//! | number values | size |
|
||||||
|
//! |:--------------|:------:|
|
||||||
|
//! | 0..31 | same |
|
||||||
|
//! | 32..63 | worse 1 byte |
|
||||||
|
//! | 64..1048573 | same |
|
||||||
|
//! | 1048576..2097151 | 1 byte better |
|
||||||
|
//! | 2097152..134217727 | same |
|
||||||
|
//! | 134217728..268435456 | 1 byte better |
|
||||||
|
//!
|
||||||
|
//! etc.
|
||||||
|
//!
|
||||||
|
//! ## Encoding format
|
||||||
|
//!
|
||||||
|
//! Enncoded data could be 1 or more bytes in length. Data are
|
||||||
|
//! packed as follows:
|
||||||
|
//!
|
||||||
|
//! | byte offset | bits range | field |
|
||||||
|
//! |-------------|------------|-------|
|
||||||
|
//! | 0 | 0..1 | type |
|
||||||
|
//! | 0 | 2..7 | v0 |
|
||||||
|
//! | 1 | 0..7 | v1 (when used) |
|
||||||
|
//! | 2 | 0..7 | v2 (when used) |
|
||||||
|
//!
|
||||||
|
//! Then depending on the `type` field:
|
||||||
|
//!
|
||||||
|
//! | type | encoded |
|
||||||
|
//! |------|---------|
|
||||||
|
//! | 0 | v0 is the result 0..64 (or -32..32) |
|
||||||
|
//! | 1 | v0 ## v1 are the result, 14 bits |
|
||||||
|
//! | 2 | v0 ## v1 ## v2 are the result, 22bits
|
||||||
|
//! | 3 | v0, ## v1 ## v2 ## (varint encoded rest) |
|
||||||
|
//!
|
||||||
|
//! Where `##` means bits concatenation. The bits are interpreted as BIG ENDIAN,
|
||||||
|
//! for example `24573` will be encoded to `EA FF 02`
|
||||||
//!
|
//!
|
||||||
//!
|
//!
|
||||||
|
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
#![allow(unused_variables)]
|
#![allow(unused_variables)]
|
||||||
|
|
||||||
mod bipack_source;
|
pub mod bipack_source;
|
||||||
mod bipack_sink;
|
pub mod bipack_sink;
|
||||||
mod to_dump;
|
pub mod tools;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
use crate::bipack_sink::{BipackSink};
|
use crate::bipack_sink::{BipackSink};
|
||||||
use crate::bipack_source::{BipackSource, Result, SliceSource};
|
use crate::bipack_source::{BipackSource, Result, SliceSource};
|
||||||
use crate::to_dump::to_dump;
|
use crate::tools::to_dump;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn fixed_unpack() -> Result<()> {
|
fn fixed_unpack() -> Result<()> {
|
||||||
|
@ -1,5 +1,14 @@
|
|||||||
|
// use string_builder::Builder;
|
||||||
|
|
||||||
use string_builder::Builder;
|
use string_builder::Builder;
|
||||||
|
|
||||||
|
/// Convert binary data into text dump, human readable, like:
|
||||||
|
/// ```text
|
||||||
|
/// 0000 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................|
|
||||||
|
/// 0010 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................|
|
||||||
|
/// 0020 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f | !"#$%&'()*+,-./|
|
||||||
|
/// 0030 30 31 |01 |
|
||||||
|
///```
|
||||||
pub fn to_dump(data: &[u8]) -> String {
|
pub fn to_dump(data: &[u8]) -> String {
|
||||||
let mut offset = 0usize;
|
let mut offset = 0usize;
|
||||||
let mut counter = 0;
|
let mut counter = 0;
|
||||||
@ -35,3 +44,4 @@ pub fn to_dump(data: &[u8]) -> String {
|
|||||||
if counter != 0 { ascii_dump(&mut result, counter, data, offset); }
|
if counter != 0 { ascii_dump(&mut result, counter, data, offset); }
|
||||||
result.string().unwrap()
|
result.string().unwrap()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user