added ans fixed LOT of docs
This commit is contained in:
parent
e06d553ed7
commit
962fcc1ac8
@ -7,7 +7,7 @@ description = "binary size-effective format used in Divan smart contracts, wasm
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
#anyhow = "1.0"
|
||||
lazy_static = "1.4.0"
|
||||
string-builder = "0.2.0"
|
||||
|
||||
|
@ -26,7 +26,7 @@ into_u64!(u8, u16, u32, usize, u64);
|
||||
|
||||
/// Data sink to encode bipack binary format.
|
||||
///
|
||||
/// To implement just override [put_u8] and optionally [put_fixed_bytes].
|
||||
/// To implement just override [BipackSink::put_u8] and optionally [BipackSink::put_fixed_bytes].
|
||||
///
|
||||
/// Note that the sink is not returning errors, unlike [crate::bipack_source::BipackSource].
|
||||
/// It is supposed that the sink has unlimited
|
||||
@ -77,7 +77,7 @@ pub trait BipackSink {
|
||||
|
||||
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
|
||||
/// terms. This format is used to store size of variable-length binaries and strings.
|
||||
/// Use [BipackSource::unsigned()] to unpack it.
|
||||
/// Use [crate::bipack_source::BipackSource::get_unsigned] to unpack it.
|
||||
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
|
||||
let value = number.into_u64();
|
||||
let mut encode_seq = |ty: u8, bytes: &[u64]| {
|
||||
|
@ -4,13 +4,13 @@ use std::string::FromUtf8Error;
|
||||
use crate::bipack_source::BipackError::NoDataError;
|
||||
|
||||
/// Result of error-aware bipack function
|
||||
pub(crate) type Result<T> = std::result::Result<T, BipackError>;
|
||||
pub type Result<T> = std::result::Result<T, BipackError>;
|
||||
|
||||
/// There is not enought data to fulfill the request
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum BipackError {
|
||||
NoDataError,
|
||||
BadEncoding(FromUtf8Error)
|
||||
BadEncoding(FromUtf8Error),
|
||||
}
|
||||
|
||||
impl Display for BipackError {
|
||||
@ -24,11 +24,11 @@ impl Error for BipackError {}
|
||||
|
||||
/// Data source compatible with mp_bintools serialization. It supports
|
||||
/// fixed-size integers in right order and varint ans smartint encodings
|
||||
/// separately. There is out of the box implementation for [Vec<u8>], and
|
||||
/// separately. There is out of the box implementation for [`Vec<u8>`], and
|
||||
/// it is easy to implements your own.
|
||||
///
|
||||
/// To implement source for other type, implement just [u8()] or mayve also
|
||||
/// [fixed_bytes] for effectiveness.
|
||||
/// To implement source for other type, implement just [BipackSource::get_u8] or mayve also
|
||||
/// [BipackSource::get_fixed_bytes] for effectiveness.
|
||||
///
|
||||
/// Unlike the [crate::bipack_sink::BipackSink] the source is returning errors. This is because
|
||||
/// it often appears when reading data do not correspond to the packed one, and this is an often
|
||||
@ -71,7 +71,7 @@ pub trait BipackSource {
|
||||
}
|
||||
|
||||
/// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
|
||||
/// using it directly; use [get_unsigned] instead.
|
||||
/// using it directly; use [BipackSource::get_unsigned] instead.
|
||||
fn get_varint_unsigned(self: &mut Self) -> Result<u64> {
|
||||
let mut result = 0u64;
|
||||
let mut count = 0;
|
||||
@ -83,15 +83,15 @@ pub trait BipackSource {
|
||||
}
|
||||
}
|
||||
|
||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
|
||||
/// as u16
|
||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
|
||||
/// [BipackSource::get_unsigned] as u16
|
||||
fn get_packed_u16(self: &mut Self) -> Result<u16> {
|
||||
Ok(self.get_unsigned()? as u16)
|
||||
}
|
||||
|
||||
/// read 4-bytes unsigned value from the source
|
||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
|
||||
/// as u32
|
||||
/// read 2-bytes unsigned value from the source as smartint-encoded, same as
|
||||
/// [BipackSource::get_unsigned] as u32.
|
||||
fn get_packed_u32(self: &mut Self) -> Result<u32> { Ok(self.get_unsigned()? as u32) }
|
||||
|
||||
/// read exact number of bytes from the source as a vec.
|
||||
@ -102,8 +102,9 @@ pub trait BipackSource {
|
||||
}
|
||||
|
||||
/// Read variable-length byte array from the source (with packed size), created
|
||||
/// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded
|
||||
/// the same way as does [BipackSink::put_unsigned] and can be manually read by
|
||||
/// by [crate::bipack_sink::BipackSink::put_var_bytes] or
|
||||
/// [crate::bipack_sink::BipackSink::put_str]. The size is encoded the same way as does
|
||||
/// [crate::bipack_sink::BipackSink::put_unsigned] and can be manually read by
|
||||
/// [BipackSource::get_unsigned].
|
||||
fn var_bytes(self: &mut Self) -> Result<Vec<u8>> {
|
||||
let size = self.get_unsigned()? as usize;
|
||||
@ -111,7 +112,7 @@ pub trait BipackSource {
|
||||
}
|
||||
|
||||
/// REad a variable length string from a source packed with
|
||||
/// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded
|
||||
/// [crate::bipack_sink::BipackSink::put_str]. It is a variable sized array fo utf8 encoded
|
||||
/// characters.
|
||||
fn str(self: &mut Self) -> Result<String> {
|
||||
String::from_utf8(
|
||||
@ -121,7 +122,7 @@ pub trait BipackSource {
|
||||
}
|
||||
|
||||
/// The bipack source capable of extracting data from a slice.
|
||||
/// use [SliceSource::from()] or [bipack_source()] to create one.
|
||||
/// use [SliceSource::from()] to create one.
|
||||
pub struct SliceSource<'a> {
|
||||
data: &'a [u8],
|
||||
position: usize,
|
||||
|
97
src/lib.rs
97
src/lib.rs
@ -1,4 +1,4 @@
|
||||
//! # pipack codec
|
||||
//! # Bipack codec
|
||||
//!
|
||||
//! The set of tools to effectively encode and decode bipack values. It is internationally
|
||||
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
|
||||
@ -7,22 +7,107 @@
|
||||
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
|
||||
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
|
||||
//! method for the implementation.
|
||||
//! - [bipack_sink::bipack_source]
|
||||
//!
|
||||
//! - [bipack_sink::BipackSink] trait that is also implemented for [`Vec<u8>`] allows to encode values
|
||||
//! into the bipack format. It is the same simple to implement it for any else binary data
|
||||
//! source.
|
||||
//!
|
||||
//! ## Utilities
|
||||
//!
|
||||
//! - to siplify encoding of unsigned ints the [bipack_sink::IntoU64] trait is used with
|
||||
//! imlementation for usual u* types.
|
||||
//!
|
||||
//! - [tools::to_dump] utility function converts binary data into human-readable dump as in old goot
|
||||
//! times (address, bytes, ASCII characters).
|
||||
//!
|
||||
//! ## About Bipack format
|
||||
//!
|
||||
//! This is a binary format created wround the idea of bit-effectiveness and not disclosing
|
||||
//! inner data structure. Unlike many known binary and text formats, liek JSON, BSON, BOSS, and
|
||||
//! many others, it does not includes field names into packed binaries.
|
||||
//!
|
||||
//! It also uses ratinally packed variable length format very effective for unsigned integers of
|
||||
//! various sizes. This implementation supports sizes for u8, u16, u32 and u64. It is capable of
|
||||
//! holding longer values too but for big numbers the fixed size encoding is mostly more effective.
|
||||
//! This rarional encoding format is called `smartint` and is internally used everywhere when one
|
||||
//! need to pack unsigned number, unless the fixed size is important.
|
||||
//!
|
||||
//! ### Varint encoding
|
||||
//!
|
||||
//! Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers,
|
||||
//! so it is very useful when encoding big numbers or at least very bui long values. In other cases
|
||||
//! [bipack_sink::BipackSink::put_unsigned] works faster, and extra bits it uses does not play
|
||||
//!
|
||||
//! | Bytes sz | varint bits | smartint bits |
|
||||
//! |:-----:|:------:|:---------:|
|
||||
//! | 1 | 7 | 6 |
|
||||
//! | 2 | 14 | 14 |
|
||||
//! | 3 | 21 | 22 |
|
||||
//! | 4 | 28 | 29 |
|
||||
//! | 5 | 35 | 36 |
|
||||
//! | 6+ | 7*N | 7*N+1 |
|
||||
//! | 9 | 63 | 64 |
|
||||
//! | 10 | 64 | --- |
|
||||
//!
|
||||
//! In other words, except for very small numbers smartint
|
||||
//! gives 1 data bit gain for the same packed byte size. For example,
|
||||
//! full size 64 bits number with smartint takes one byte less (9 bytes vs. 10 in Varint).
|
||||
//!
|
||||
//! So, except for values in range 32..63 it gives same or better byte size effectiveness
|
||||
//! than `Varint`. In particular:
|
||||
//!
|
||||
//! The effect of it could be interpreted as:
|
||||
//!
|
||||
//! | number values | size |
|
||||
//! |:--------------|:------:|
|
||||
//! | 0..31 | same |
|
||||
//! | 32..63 | worse 1 byte |
|
||||
//! | 64..1048573 | same |
|
||||
//! | 1048576..2097151 | 1 byte better |
|
||||
//! | 2097152..134217727 | same |
|
||||
//! | 134217728..268435456 | 1 byte better |
|
||||
//!
|
||||
//! etc.
|
||||
//!
|
||||
//! ## Encoding format
|
||||
//!
|
||||
//! Enncoded data could be 1 or more bytes in length. Data are
|
||||
//! packed as follows:
|
||||
//!
|
||||
//! | byte offset | bits range | field |
|
||||
//! |-------------|------------|-------|
|
||||
//! | 0 | 0..1 | type |
|
||||
//! | 0 | 2..7 | v0 |
|
||||
//! | 1 | 0..7 | v1 (when used) |
|
||||
//! | 2 | 0..7 | v2 (when used) |
|
||||
//!
|
||||
//! Then depending on the `type` field:
|
||||
//!
|
||||
//! | type | encoded |
|
||||
//! |------|---------|
|
||||
//! | 0 | v0 is the result 0..64 (or -32..32) |
|
||||
//! | 1 | v0 ## v1 are the result, 14 bits |
|
||||
//! | 2 | v0 ## v1 ## v2 are the result, 22bits
|
||||
//! | 3 | v0, ## v1 ## v2 ## (varint encoded rest) |
|
||||
//!
|
||||
//! Where `##` means bits concatenation. The bits are interpreted as BIG ENDIAN,
|
||||
//! for example `24573` will be encoded to `EA FF 02`
|
||||
//!
|
||||
//!
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_variables)]
|
||||
|
||||
mod bipack_source;
|
||||
mod bipack_sink;
|
||||
mod to_dump;
|
||||
pub mod bipack_source;
|
||||
pub mod bipack_sink;
|
||||
pub mod tools;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use base64::Engine;
|
||||
use crate::bipack_sink::{BipackSink};
|
||||
use crate::bipack_source::{BipackSource, Result, SliceSource};
|
||||
use crate::to_dump::to_dump;
|
||||
use crate::tools::to_dump;
|
||||
|
||||
#[test]
|
||||
fn fixed_unpack() -> Result<()> {
|
||||
|
@ -1,5 +1,14 @@
|
||||
// use string_builder::Builder;
|
||||
|
||||
use string_builder::Builder;
|
||||
|
||||
/// Convert binary data into text dump, human readable, like:
|
||||
/// ```text
|
||||
/// 0000 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................|
|
||||
/// 0010 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................|
|
||||
/// 0020 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f | !"#$%&'()*+,-./|
|
||||
/// 0030 30 31 |01 |
|
||||
///```
|
||||
pub fn to_dump(data: &[u8]) -> String {
|
||||
let mut offset = 0usize;
|
||||
let mut counter = 0;
|
||||
@ -35,3 +44,4 @@ pub fn to_dump(data: &[u8]) -> String {
|
||||
if counter != 0 { ascii_dump(&mut result, counter, data, offset); }
|
||||
result.string().unwrap()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user