From 962fcc1ac8532d72db605452d6ef8d5a46ef0120 Mon Sep 17 00:00:00 2001 From: sergeych Date: Mon, 9 Oct 2023 23:26:08 +0100 Subject: [PATCH] added ans fixed LOT of docs --- Cargo.toml | 2 +- src/bipack_sink.rs | 4 +- src/bipack_source.rs | 31 ++++++------ src/lib.rs | 97 +++++++++++++++++++++++++++++++++--- src/{to_dump.rs => tools.rs} | 10 ++++ 5 files changed, 120 insertions(+), 24 deletions(-) rename src/{to_dump.rs => tools.rs} (72%) diff --git a/Cargo.toml b/Cargo.toml index 9e55a95..4794b1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ description = "binary size-effective format used in Divan smart contracts, wasm # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = "1.0" +#anyhow = "1.0" lazy_static = "1.4.0" string-builder = "0.2.0" diff --git a/src/bipack_sink.rs b/src/bipack_sink.rs index 31a2edb..d968356 100644 --- a/src/bipack_sink.rs +++ b/src/bipack_sink.rs @@ -26,7 +26,7 @@ into_u64!(u8, u16, u32, usize, u64); /// Data sink to encode bipack binary format. /// -/// To implement just override [put_u8] and optionally [put_fixed_bytes]. +/// To implement just override [BipackSink::put_u8] and optionally [BipackSink::put_fixed_bytes]. /// /// Note that the sink is not returning errors, unlike [crate::bipack_source::BipackSource]. /// It is supposed that the sink has unlimited @@ -77,7 +77,7 @@ pub trait BipackSink { /// Put unsigned value to compressed variable-length format, `Smartint` in the bipack /// terms. This format is used to store size of variable-length binaries and strings. - /// Use [BipackSource::unsigned()] to unpack it. + /// Use [crate::bipack_source::BipackSource::get_unsigned] to unpack it. fn put_unsigned(self: &mut Self, number: T) { let value = number.into_u64(); let mut encode_seq = |ty: u8, bytes: &[u64]| { diff --git a/src/bipack_source.rs b/src/bipack_source.rs index 35c8582..6bfc981 100644 --- a/src/bipack_source.rs +++ b/src/bipack_source.rs @@ -4,18 +4,18 @@ use std::string::FromUtf8Error; use crate::bipack_source::BipackError::NoDataError; /// Result of error-aware bipack function -pub(crate) type Result = std::result::Result; +pub type Result = std::result::Result; /// There is not enought data to fulfill the request #[derive(Debug, Clone)] pub enum BipackError { NoDataError, - BadEncoding(FromUtf8Error) + BadEncoding(FromUtf8Error), } impl Display for BipackError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f,"{:?}", self) + write!(f, "{:?}", self) } } @@ -24,11 +24,11 @@ impl Error for BipackError {} /// Data source compatible with mp_bintools serialization. It supports /// fixed-size integers in right order and varint ans smartint encodings -/// separately. There is out of the box implementation for [Vec], and +/// separately. There is out of the box implementation for [`Vec`], and /// it is easy to implements your own. /// -/// To implement source for other type, implement just [u8()] or mayve also -/// [fixed_bytes] for effectiveness. +/// To implement source for other type, implement just [BipackSource::get_u8] or mayve also +/// [BipackSource::get_fixed_bytes] for effectiveness. /// /// Unlike the [crate::bipack_sink::BipackSink] the source is returning errors. This is because /// it often appears when reading data do not correspond to the packed one, and this is an often @@ -71,7 +71,7 @@ pub trait BipackSource { } /// read 8-bytes varint-packed unsigned value from the source. We dont' recommend - /// using it directly; use [get_unsigned] instead. + /// using it directly; use [BipackSource::get_unsigned] instead. fn get_varint_unsigned(self: &mut Self) -> Result { let mut result = 0u64; let mut count = 0; @@ -83,15 +83,15 @@ pub trait BipackSource { } } - /// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned] - /// as u16 + /// read 2-bytes unsigned value from the source as smartint-encoded, same as + /// [BipackSource::get_unsigned] as u16 fn get_packed_u16(self: &mut Self) -> Result { Ok(self.get_unsigned()? as u16) } /// read 4-bytes unsigned value from the source - /// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned] - /// as u32 + /// read 2-bytes unsigned value from the source as smartint-encoded, same as + /// [BipackSource::get_unsigned] as u32. fn get_packed_u32(self: &mut Self) -> Result { Ok(self.get_unsigned()? as u32) } /// read exact number of bytes from the source as a vec. @@ -102,8 +102,9 @@ pub trait BipackSource { } /// Read variable-length byte array from the source (with packed size), created - /// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded - /// the same way as does [BipackSink::put_unsigned] and can be manually read by + /// by [crate::bipack_sink::BipackSink::put_var_bytes] or + /// [crate::bipack_sink::BipackSink::put_str]. The size is encoded the same way as does + /// [crate::bipack_sink::BipackSink::put_unsigned] and can be manually read by /// [BipackSource::get_unsigned]. fn var_bytes(self: &mut Self) -> Result> { let size = self.get_unsigned()? as usize; @@ -111,7 +112,7 @@ pub trait BipackSource { } /// REad a variable length string from a source packed with - /// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded + /// [crate::bipack_sink::BipackSink::put_str]. It is a variable sized array fo utf8 encoded /// characters. fn str(self: &mut Self) -> Result { String::from_utf8( @@ -121,7 +122,7 @@ pub trait BipackSource { } /// The bipack source capable of extracting data from a slice. -/// use [SliceSource::from()] or [bipack_source()] to create one. +/// use [SliceSource::from()] to create one. pub struct SliceSource<'a> { data: &'a [u8], position: usize, diff --git a/src/lib.rs b/src/lib.rs index 6ddf52a..1c8a8d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -//! # pipack codec +//! # Bipack codec //! //! The set of tools to effectively encode and decode bipack values. It is internationally //! minimalistic to be used wit Divan smart-contracts where number of instructions could @@ -7,22 +7,107 @@ //! - [bipack_source::BipackSource] is used to decode values, there is implementation //! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read //! method for the implementation. -//! - [bipack_sink::bipack_source] +//! +//! - [bipack_sink::BipackSink] trait that is also implemented for [`Vec`] allows to encode values +//! into the bipack format. It is the same simple to implement it for any else binary data +//! source. +//! +//! ## Utilities +//! +//! - to siplify encoding of unsigned ints the [bipack_sink::IntoU64] trait is used with +//! imlementation for usual u* types. +//! +//! - [tools::to_dump] utility function converts binary data into human-readable dump as in old goot +//! times (address, bytes, ASCII characters). +//! +//! ## About Bipack format +//! +//! This is a binary format created wround the idea of bit-effectiveness and not disclosing +//! inner data structure. Unlike many known binary and text formats, liek JSON, BSON, BOSS, and +//! many others, it does not includes field names into packed binaries. +//! +//! It also uses ratinally packed variable length format very effective for unsigned integers of +//! various sizes. This implementation supports sizes for u8, u16, u32 and u64. It is capable of +//! holding longer values too but for big numbers the fixed size encoding is mostly more effective. +//! This rarional encoding format is called `smartint` and is internally used everywhere when one +//! need to pack unsigned number, unless the fixed size is important. +//! +//! ### Varint encoding +//! +//! Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers, +//! so it is very useful when encoding big numbers or at least very bui long values. In other cases +//! [bipack_sink::BipackSink::put_unsigned] works faster, and extra bits it uses does not play +//! +//! | Bytes sz | varint bits | smartint bits | +//! |:-----:|:------:|:---------:| +//! | 1 | 7 | 6 | +//! | 2 | 14 | 14 | +//! | 3 | 21 | 22 | +//! | 4 | 28 | 29 | +//! | 5 | 35 | 36 | +//! | 6+ | 7*N | 7*N+1 | +//! | 9 | 63 | 64 | +//! | 10 | 64 | --- | +//! +//! In other words, except for very small numbers smartint +//! gives 1 data bit gain for the same packed byte size. For example, +//! full size 64 bits number with smartint takes one byte less (9 bytes vs. 10 in Varint). +//! +//! So, except for values in range 32..63 it gives same or better byte size effectiveness +//! than `Varint`. In particular: +//! +//! The effect of it could be interpreted as: +//! +//! | number values | size | +//! |:--------------|:------:| +//! | 0..31 | same | +//! | 32..63 | worse 1 byte | +//! | 64..1048573 | same | +//! | 1048576..2097151 | 1 byte better | +//! | 2097152..134217727 | same | +//! | 134217728..268435456 | 1 byte better | +//! +//! etc. +//! +//! ## Encoding format +//! +//! Enncoded data could be 1 or more bytes in length. Data are +//! packed as follows: +//! +//! | byte offset | bits range | field | +//! |-------------|------------|-------| +//! | 0 | 0..1 | type | +//! | 0 | 2..7 | v0 | +//! | 1 | 0..7 | v1 (when used) | +//! | 2 | 0..7 | v2 (when used) | +//! +//! Then depending on the `type` field: +//! +//! | type | encoded | +//! |------|---------| +//! | 0 | v0 is the result 0..64 (or -32..32) | +//! | 1 | v0 ## v1 are the result, 14 bits | +//! | 2 | v0 ## v1 ## v2 are the result, 22bits +//! | 3 | v0, ## v1 ## v2 ## (varint encoded rest) | +//! +//! Where `##` means bits concatenation. The bits are interpreted as BIG ENDIAN, +//! for example `24573` will be encoded to `EA FF 02` //! //! + #![allow(dead_code)] #![allow(unused_variables)] -mod bipack_source; -mod bipack_sink; -mod to_dump; +pub mod bipack_source; +pub mod bipack_sink; +pub mod tools; #[cfg(test)] mod tests { use base64::Engine; use crate::bipack_sink::{BipackSink}; use crate::bipack_source::{BipackSource, Result, SliceSource}; - use crate::to_dump::to_dump; + use crate::tools::to_dump; #[test] fn fixed_unpack() -> Result<()> { diff --git a/src/to_dump.rs b/src/tools.rs similarity index 72% rename from src/to_dump.rs rename to src/tools.rs index 897daca..d5d9f93 100644 --- a/src/to_dump.rs +++ b/src/tools.rs @@ -1,5 +1,14 @@ +// use string_builder::Builder; + use string_builder::Builder; +/// Convert binary data into text dump, human readable, like: +/// ```text +/// 0000 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................| +/// 0010 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................| +/// 0020 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f | !"#$%&'()*+,-./| +/// 0030 30 31 |01 | +///``` pub fn to_dump(data: &[u8]) -> String { let mut offset = 0usize; let mut counter = 0; @@ -35,3 +44,4 @@ pub fn to_dump(data: &[u8]) -> String { if counter != 0 { ascii_dump(&mut result, counter, data, offset); } result.string().unwrap() } +