From 131859ffba9824c5fa60a43aea7082f81eefd394 Mon Sep 17 00:00:00 2001 From: sergeych Date: Mon, 9 Oct 2023 22:36:02 +0100 Subject: [PATCH] refined source to report errors. We might need to fix the sink too, but now we think it could not be overflown. --- Cargo.toml | 4 +- src/bipack_sink.rs | 10 +++- src/bipack_source.rs | 131 ++++++++++++++++++++++++++++++------------- src/lib.rs | 54 +++++++++++------- 4 files changed, 136 insertions(+), 63 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1d947c0..9e55a95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ name = "bipack_ru" version = "0.1.0" edition = "2021" - +license = "Apache-2.0" +description = "binary size-effective format used in Divan smart contracts, wasm bindings, network protocols, etc." # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] @@ -13,3 +14,4 @@ string-builder = "0.2.0" [dev-dependencies] base64 = "0.21.4" hex = "0.4.3" +derive_more = "0.99.17" \ No newline at end of file diff --git a/src/bipack_sink.rs b/src/bipack_sink.rs index de7a753..5731295 100644 --- a/src/bipack_sink.rs +++ b/src/bipack_sink.rs @@ -5,6 +5,9 @@ const V0LIMIT: u64 = 1u64 << 6; const V1LIMIT: u64 = 1u64 << 14; const V2LIMIT: u64 = 1u64 << 22; +/// Numeric value convertible to Unsigned 64 bit to be used +/// with [BipackSink#put_unsigned] compressed format. It is implemented fir usize +/// and u* types already. pub trait IntoU64 { fn into_u64(self) -> u64; } @@ -21,7 +24,9 @@ macro_rules! into_u64 { into_u64!(u8, u16, u32, usize, u64); - +/// Data sink to encode bipack binary format. +/// +/// To implement just override [put_u8] and optionally [put_fixed_bytes] pub trait BipackSink { fn put_u8(self: &mut Self, data: u8); @@ -64,6 +69,9 @@ pub trait BipackSink { self.put_fixed_bytes(&result); } + /// Put unsigned value to compressed variable-length format, `Smartint` in the bipack + /// terms. This format is used to store size of variable-length binaries and strings. + /// Use [BipackSource::unsigned()] to unpack it. fn put_unsigned(self: &mut Self, number: T) { let value = number.into_u64(); let mut encode_seq = |ty: u8, bytes: &[u64]| { diff --git a/src/bipack_source.rs b/src/bipack_source.rs index ee0e97f..a516740 100644 --- a/src/bipack_source.rs +++ b/src/bipack_source.rs @@ -1,93 +1,144 @@ +use std::error::Error; +use std::fmt::{Display, Formatter}; use std::string::FromUtf8Error; +use crate::bipack_source::BipackError::NoDataError; + +/// Result of error-aware bipack function +pub(crate) type Res = Result; + +/// There is not enought data to fulfill the request +#[derive(Debug, Clone)] +pub enum BipackError { + NoDataError, + BadEncoding(FromUtf8Error) +} + +impl Display for BipackError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f,"{:?}", self) + } +} + +impl Error for BipackError {} + /// Data source compatible with mp_bintools serialization. It supports -/// fixed-size integers in rihgt order and varint ans smartint encodings -/// separately. +/// fixed-size integers in right order and varint ans smartint encodings +/// separately. There is out of the box implementation for [Vec], and +/// it is easy to implements your own. +/// +/// To implement source for other type, implement just [u8()] or mayve also +/// [fixed_bytes] for effectiveness. pub trait BipackSource { - fn u8(self: &mut Self) -> u8; + fn get_u8(self: &mut Self) -> Res; - fn u16(self: &mut Self) -> u16 { - ((self.u8() as u16) << 8) + (self.u8() as u16) + fn get_u16(self: &mut Self) -> Res { + Ok(((self.get_u8()? as u16) << 8) + (self.get_u8()? as u16)) } - fn u32(self: &mut Self) -> u32 { - ((self.u16() as u32) << 16) + (self.u16() as u32) + fn get_u32(self: &mut Self) -> Res { + Ok(((self.get_u16()? as u32) << 16) + (self.get_u16()? as u32)) } - fn u64(self: &mut Self) -> u64 { - ((self.u32() as u64) << 32) | (self.u32() as u64) + fn get_u64(self: &mut Self) -> Res { + Ok(((self.get_u32()? as u64) << 32) | (self.get_u32()? as u64)) } - fn smart_u64(self: &mut Self) -> u64 { - let mut get = || -> u64 { self.u8() as u64 }; - let first = get(); + /// Unpack variable-length packed unsigned value, used aslo internally to store size + /// of arrays, binary data, strings, etc. To pack use + /// [crate::bipack_sink::BipackSink::put_unsigned()]. + fn get_unsigned(self: &mut Self) -> Res { + let mut get = || -> Res { Ok(self.get_u8()? as u64) }; + let first = get()?; let mut ty = first & 3; let mut result = first >> 2; - if ty == 0 { return result; } + if ty == 0 { return Ok(result); } ty -= 1; - result = result + (get() << 6); - if ty == 0 { return result; } + result = result + (get()? << 6); + if ty == 0 { return Ok(result); } ty -= 1; - result = result + (get() << 14); - if ty == 0 { return result; } + result = result + (get()? << 14); + if ty == 0 { return Ok(result); } - result | (self.var_u64() << 22) + Ok(result | (self.get_varint_unsigned()? << 22)) } - fn var_u64(self: &mut Self) -> u64 { + /// read 8-bytes varint-packed unsigned value from the source. We dont' recommend + /// using it directly; use [get_unsigned] instead. + fn get_varint_unsigned(self: &mut Self) -> Res { let mut result = 0u64; let mut count = 0; loop { - let x = self.u8() as u64; + let x = self.get_u8()? as u64; result = result | ((x & 0x7F) << count); - if (x & 0x80) == 0 { return result; } + if (x & 0x80) == 0 { return Ok(result); } count += 7 } } - fn smart_u16(self: &mut Self) -> u16 { - self.smart_u64() as u16 + /// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned] + /// as u16 + fn get_packed_u16(self: &mut Self) -> Res { + Ok(self.get_unsigned()? as u16) } - fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 } - fn fixed_bytes(self: &mut Self,size: usize) -> Vec { + /// read 4-bytes unsigned value from the source + /// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned] + /// as u32 + fn get_packed_u32(self: &mut Self) -> Res { Ok(self.get_unsigned()? as u32) } + + /// read exact number of bytes from the source as a vec. + fn get_fixed_bytes(self: &mut Self, size: usize) -> Res> { let mut result = Vec::with_capacity(size); - for i in 0..size { result.push(self.u8()); } - result + for i in 0..size { result.push(self.get_u8()?); } + Ok(result) } - fn var_bytes(self: &mut Self) -> Vec { - let size = self.smart_u64() as usize; - self.fixed_bytes(size) + /// Read variable-length byte array from the source (with packed size), created + /// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded + /// the same way as does [BipackSink::put_unsigned] and can be manually read by + /// [BipackSource::get_unsigned]. + fn var_bytes(self: &mut Self) -> Res> { + let size = self.get_unsigned()? as usize; + self.get_fixed_bytes(size) } - fn str(self: &mut Self) -> Result { - String::from_utf8(self.var_bytes()) + + /// REad a variable length string from a source packed with + /// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded + /// characters. + fn str(self: &mut Self) -> Res { + String::from_utf8( + self.var_bytes()? + ).or_else(|e| Err(BipackError::BadEncoding(e))) } } +/// The bipack source capable of extracting data from a slice. +/// use [SliceSource::from()] or [bipack_source()] to create one. pub struct SliceSource<'a> { data: &'a [u8], position: usize, } impl<'a> SliceSource<'a> { - pub fn new(src: &'a [u8]) -> SliceSource { + pub fn from(src: &'a [u8]) -> SliceSource { SliceSource { data: src, position: 0 } } } impl<'x> BipackSource for SliceSource<'x> { - fn u8(self: &mut Self) -> u8 { - let result = self.data[self.position]; - self.position += 1; - result + fn get_u8(self: &mut Self) -> Res { + if self.position >= self.data.len() { + Err(NoDataError) + } else { + let result = self.data[self.position]; + self.position += 1; + Ok(result) + } } } -pub fn bipack_source<'b>(v: &'b [u8]) -> SliceSource<'b> { - SliceSource::new(v) -} diff --git a/src/lib.rs b/src/lib.rs index 1fbf26a..ee4e8a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,15 @@ +//! # pipack codec +//! +//! The set of tools to effectively encode and decode bipack values. It is internationally +//! minimalistic to be used wit Divan smart-contracts where number of instructions could +//! be important. +//! +//! - [bipack_source::BipackSource] is used to decode values, there is implementation +//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read +//! method for the implementation. +//! - [bipack_sink::bipack_source] +//! +//! #![allow(dead_code)] #![allow(unused_variables)] @@ -5,43 +17,43 @@ mod bipack_source; mod bipack_sink; mod to_dump; -pub fn add(left: usize, right: usize) -> usize { - left + right -} - #[cfg(test)] mod tests { + use std::error::Error; use base64::Engine; use crate::bipack_sink::{BipackSink}; - use crate::bipack_source::{bipack_source, BipackSource, SliceSource}; + use crate::bipack_source::{BipackSource, Res, SliceSource}; use crate::to_dump::to_dump; #[test] - fn fixed_unpack() { + fn fixed_unpack() -> Result<(),Box> { let mut src = Vec::new(); base64::engine::general_purpose::STANDARD_NO_PAD .decode_vec("B/oAAAEB0AAAANjLgKAv", &mut src) .expect("decoded vector"); println!(": {}", hex::encode(&src)); - let mut ss = SliceSource::new(&src); - assert_eq!(7, ss.u8()); - assert_eq!(64000, ss.u16()); - assert_eq!(66000, ss.u32()); - assert_eq!(931127140399, ss.u64()); + let mut ss = SliceSource::from(&src); + let d7 = ss.get_u8()?; + assert_eq!(7, ss.get_u8()?); + assert_eq!(64000, ss.get_u16()?); + assert_eq!(66000, ss.get_u32()?); + assert_eq!(931127140399, ss.get_u64()?); + Ok(()) } #[test] - fn smartint_unpack() { + fn smartint_unpack() -> Res<()> { let mut src = Vec::new(); base64::engine::general_purpose::STANDARD_NO_PAD .decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src) .expect("decoded vector"); // println!("{}", hex::encode(&src)); - let mut ss = bipack_source(&src); - assert_eq!(7, ss.u8()); - assert_eq!(64000, ss.smart_u16()); - assert_eq!(66000, ss.smart_u32()); - assert_eq!(931127140399, ss.smart_u64()); + let mut ss = SliceSource::from(&src); + assert_eq!(7, ss.get_u8()?); + assert_eq!(64000, ss.get_packed_u16()?); + assert_eq!(66000, ss.get_packed_u32()?); + assert_eq!(931127140399, ss.get_unsigned()?); + Ok(()) } #[test] @@ -69,19 +81,19 @@ mod tests { fn pack_varbinaries_and_string() { let mut data = Vec::::new(); data.put_str("Hello, rupack!"); - println!("{}",to_dump(&data)); - let mut src = bipack_source(&data); + println!("size ${}\n{}",data.len(), to_dump(&data)); + let mut src = SliceSource::from(&data); assert_eq!("Hello, rupack!", src.str().unwrap()); } #[test] fn test_dump() { - for l in 1..64 { + for l in 0..64 { let mut d2 = Vec::new(); for u in 0..l { d2.push(u as u8); } - println!("{}", to_dump(&d2)); + println!("size {}\n{}", d2.len(), to_dump(&d2)); } } } \ No newline at end of file