added ans fixed LOT of docs

This commit is contained in:
Sergey Chernov 2023-10-09 23:26:08 +01:00
parent e06d553ed7
commit 962fcc1ac8
5 changed files with 120 additions and 24 deletions

View File

@ -7,7 +7,7 @@ description = "binary size-effective format used in Divan smart contracts, wasm
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0"
#anyhow = "1.0"
lazy_static = "1.4.0"
string-builder = "0.2.0"

View File

@ -26,7 +26,7 @@ into_u64!(u8, u16, u32, usize, u64);
/// Data sink to encode bipack binary format.
///
/// To implement just override [put_u8] and optionally [put_fixed_bytes].
/// To implement just override [BipackSink::put_u8] and optionally [BipackSink::put_fixed_bytes].
///
/// Note that the sink is not returning errors, unlike [crate::bipack_source::BipackSource].
/// It is supposed that the sink has unlimited
@ -77,7 +77,7 @@ pub trait BipackSink {
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
/// terms. This format is used to store size of variable-length binaries and strings.
/// Use [BipackSource::unsigned()] to unpack it.
/// Use [crate::bipack_source::BipackSource::get_unsigned] to unpack it.
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
let value = number.into_u64();
let mut encode_seq = |ty: u8, bytes: &[u64]| {

View File

@ -4,13 +4,13 @@ use std::string::FromUtf8Error;
use crate::bipack_source::BipackError::NoDataError;
/// Result of error-aware bipack function
pub(crate) type Result<T> = std::result::Result<T, BipackError>;
pub type Result<T> = std::result::Result<T, BipackError>;
/// There is not enought data to fulfill the request
#[derive(Debug, Clone)]
pub enum BipackError {
NoDataError,
BadEncoding(FromUtf8Error)
BadEncoding(FromUtf8Error),
}
impl Display for BipackError {
@ -24,11 +24,11 @@ impl Error for BipackError {}
/// Data source compatible with mp_bintools serialization. It supports
/// fixed-size integers in right order and varint ans smartint encodings
/// separately. There is out of the box implementation for [Vec<u8>], and
/// separately. There is out of the box implementation for [`Vec<u8>`], and
/// it is easy to implements your own.
///
/// To implement source for other type, implement just [u8()] or mayve also
/// [fixed_bytes] for effectiveness.
/// To implement source for other type, implement just [BipackSource::get_u8] or mayve also
/// [BipackSource::get_fixed_bytes] for effectiveness.
///
/// Unlike the [crate::bipack_sink::BipackSink] the source is returning errors. This is because
/// it often appears when reading data do not correspond to the packed one, and this is an often
@ -71,7 +71,7 @@ pub trait BipackSource {
}
/// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
/// using it directly; use [get_unsigned] instead.
/// using it directly; use [BipackSource::get_unsigned] instead.
fn get_varint_unsigned(self: &mut Self) -> Result<u64> {
let mut result = 0u64;
let mut count = 0;
@ -83,15 +83,15 @@ pub trait BipackSource {
}
}
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
/// as u16
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
/// [BipackSource::get_unsigned] as u16
fn get_packed_u16(self: &mut Self) -> Result<u16> {
Ok(self.get_unsigned()? as u16)
}
/// read 4-bytes unsigned value from the source
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
/// as u32
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
/// [BipackSource::get_unsigned] as u32.
fn get_packed_u32(self: &mut Self) -> Result<u32> { Ok(self.get_unsigned()? as u32) }
/// read exact number of bytes from the source as a vec.
@ -102,8 +102,9 @@ pub trait BipackSource {
}
/// Read variable-length byte array from the source (with packed size), created
/// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded
/// the same way as does [BipackSink::put_unsigned] and can be manually read by
/// by [crate::bipack_sink::BipackSink::put_var_bytes] or
/// [crate::bipack_sink::BipackSink::put_str]. The size is encoded the same way as does
/// [crate::bipack_sink::BipackSink::put_unsigned] and can be manually read by
/// [BipackSource::get_unsigned].
fn var_bytes(self: &mut Self) -> Result<Vec<u8>> {
let size = self.get_unsigned()? as usize;
@ -111,7 +112,7 @@ pub trait BipackSource {
}
/// REad a variable length string from a source packed with
/// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded
/// [crate::bipack_sink::BipackSink::put_str]. It is a variable sized array fo utf8 encoded
/// characters.
fn str(self: &mut Self) -> Result<String> {
String::from_utf8(
@ -121,7 +122,7 @@ pub trait BipackSource {
}
/// The bipack source capable of extracting data from a slice.
/// use [SliceSource::from()] or [bipack_source()] to create one.
/// use [SliceSource::from()] to create one.
pub struct SliceSource<'a> {
data: &'a [u8],
position: usize,

View File

@ -1,4 +1,4 @@
//! # pipack codec
//! # Bipack codec
//!
//! The set of tools to effectively encode and decode bipack values. It is internationally
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
@ -7,22 +7,107 @@
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
//! method for the implementation.
//! - [bipack_sink::bipack_source]
//!
//! - [bipack_sink::BipackSink] trait that is also implemented for [`Vec<u8>`] allows to encode values
//! into the bipack format. It is the same simple to implement it for any else binary data
//! source.
//!
//! ## Utilities
//!
//! - to siplify encoding of unsigned ints the [bipack_sink::IntoU64] trait is used with
//! imlementation for usual u* types.
//!
//! - [tools::to_dump] utility function converts binary data into human-readable dump as in old goot
//! times (address, bytes, ASCII characters).
//!
//! ## About Bipack format
//!
//! This is a binary format created wround the idea of bit-effectiveness and not disclosing
//! inner data structure. Unlike many known binary and text formats, liek JSON, BSON, BOSS, and
//! many others, it does not includes field names into packed binaries.
//!
//! It also uses ratinally packed variable length format very effective for unsigned integers of
//! various sizes. This implementation supports sizes for u8, u16, u32 and u64. It is capable of
//! holding longer values too but for big numbers the fixed size encoding is mostly more effective.
//! This rarional encoding format is called `smartint` and is internally used everywhere when one
//! need to pack unsigned number, unless the fixed size is important.
//!
//! ### Varint encoding
//!
//! Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers,
//! so it is very useful when encoding big numbers or at least very bui long values. In other cases
//! [bipack_sink::BipackSink::put_unsigned] works faster, and extra bits it uses does not play
//!
//! | Bytes sz | varint bits | smartint bits |
//! |:-----:|:------:|:---------:|
//! | 1 | 7 | 6 |
//! | 2 | 14 | 14 |
//! | 3 | 21 | 22 |
//! | 4 | 28 | 29 |
//! | 5 | 35 | 36 |
//! | 6+ | 7*N | 7*N+1 |
//! | 9 | 63 | 64 |
//! | 10 | 64 | --- |
//!
//! In other words, except for very small numbers smartint
//! gives 1 data bit gain for the same packed byte size. For example,
//! full size 64 bits number with smartint takes one byte less (9 bytes vs. 10 in Varint).
//!
//! So, except for values in range 32..63 it gives same or better byte size effectiveness
//! than `Varint`. In particular:
//!
//! The effect of it could be interpreted as:
//!
//! | number values | size |
//! |:--------------|:------:|
//! | 0..31 | same |
//! | 32..63 | worse 1 byte |
//! | 64..1048573 | same |
//! | 1048576..2097151 | 1 byte better |
//! | 2097152..134217727 | same |
//! | 134217728..268435456 | 1 byte better |
//!
//! etc.
//!
//! ## Encoding format
//!
//! Enncoded data could be 1 or more bytes in length. Data are
//! packed as follows:
//!
//! | byte offset | bits range | field |
//! |-------------|------------|-------|
//! | 0 | 0..1 | type |
//! | 0 | 2..7 | v0 |
//! | 1 | 0..7 | v1 (when used) |
//! | 2 | 0..7 | v2 (when used) |
//!
//! Then depending on the `type` field:
//!
//! | type | encoded |
//! |------|---------|
//! | 0 | v0 is the result 0..64 (or -32..32) |
//! | 1 | v0 ## v1 are the result, 14 bits |
//! | 2 | v0 ## v1 ## v2 are the result, 22bits
//! | 3 | v0, ## v1 ## v2 ## (varint encoded rest) |
//!
//! Where `##` means bits concatenation. The bits are interpreted as BIG ENDIAN,
//! for example `24573` will be encoded to `EA FF 02`
//!
//!
#![allow(dead_code)]
#![allow(unused_variables)]
mod bipack_source;
mod bipack_sink;
mod to_dump;
pub mod bipack_source;
pub mod bipack_sink;
pub mod tools;
#[cfg(test)]
mod tests {
use base64::Engine;
use crate::bipack_sink::{BipackSink};
use crate::bipack_source::{BipackSource, Result, SliceSource};
use crate::to_dump::to_dump;
use crate::tools::to_dump;
#[test]
fn fixed_unpack() -> Result<()> {

View File

@ -1,5 +1,14 @@
// use string_builder::Builder;
use string_builder::Builder;
/// Convert binary data into text dump, human readable, like:
/// ```text
/// 0000 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................|
/// 0010 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................|
/// 0020 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f | !"#$%&'()*+,-./|
/// 0030 30 31 |01 |
///```
pub fn to_dump(data: &[u8]) -> String {
let mut offset = 0usize;
let mut counter = 0;
@ -35,3 +44,4 @@ pub fn to_dump(data: &[u8]) -> String {
if counter != 0 { ascii_dump(&mut result, counter, data, offset); }
result.string().unwrap()
}