refined source to report errors. We might need to fix the sink too, but now we think it could not be overflown.

This commit is contained in:
Sergey Chernov 2023-10-09 22:36:02 +01:00
parent 3433e3bd84
commit 131859ffba
4 changed files with 136 additions and 63 deletions

View File

@ -2,7 +2,8 @@
name = "bipack_ru" name = "bipack_ru"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
license = "Apache-2.0"
description = "binary size-effective format used in Divan smart contracts, wasm bindings, network protocols, etc."
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
@ -13,3 +14,4 @@ string-builder = "0.2.0"
[dev-dependencies] [dev-dependencies]
base64 = "0.21.4" base64 = "0.21.4"
hex = "0.4.3" hex = "0.4.3"
derive_more = "0.99.17"

View File

@ -5,6 +5,9 @@ const V0LIMIT: u64 = 1u64 << 6;
const V1LIMIT: u64 = 1u64 << 14; const V1LIMIT: u64 = 1u64 << 14;
const V2LIMIT: u64 = 1u64 << 22; const V2LIMIT: u64 = 1u64 << 22;
/// Numeric value convertible to Unsigned 64 bit to be used
/// with [BipackSink#put_unsigned] compressed format. It is implemented fir usize
/// and u* types already.
pub trait IntoU64 { pub trait IntoU64 {
fn into_u64(self) -> u64; fn into_u64(self) -> u64;
} }
@ -21,7 +24,9 @@ macro_rules! into_u64 {
into_u64!(u8, u16, u32, usize, u64); into_u64!(u8, u16, u32, usize, u64);
/// Data sink to encode bipack binary format.
///
/// To implement just override [put_u8] and optionally [put_fixed_bytes]
pub trait BipackSink { pub trait BipackSink {
fn put_u8(self: &mut Self, data: u8); fn put_u8(self: &mut Self, data: u8);
@ -64,6 +69,9 @@ pub trait BipackSink {
self.put_fixed_bytes(&result); self.put_fixed_bytes(&result);
} }
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
/// terms. This format is used to store size of variable-length binaries and strings.
/// Use [BipackSource::unsigned()] to unpack it.
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) { fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
let value = number.into_u64(); let value = number.into_u64();
let mut encode_seq = |ty: u8, bytes: &[u64]| { let mut encode_seq = |ty: u8, bytes: &[u64]| {

View File

@ -1,93 +1,144 @@
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::string::FromUtf8Error; use std::string::FromUtf8Error;
use crate::bipack_source::BipackError::NoDataError;
/// Result of error-aware bipack function
pub(crate) type Res<T> = Result<T, BipackError>;
/// There is not enought data to fulfill the request
#[derive(Debug, Clone)]
pub enum BipackError {
NoDataError,
BadEncoding(FromUtf8Error)
}
impl Display for BipackError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f,"{:?}", self)
}
}
impl Error for BipackError {}
/// Data source compatible with mp_bintools serialization. It supports /// Data source compatible with mp_bintools serialization. It supports
/// fixed-size integers in rihgt order and varint ans smartint encodings /// fixed-size integers in right order and varint ans smartint encodings
/// separately. /// separately. There is out of the box implementation for [Vec<u8>], and
/// it is easy to implements your own.
///
/// To implement source for other type, implement just [u8()] or mayve also
/// [fixed_bytes] for effectiveness.
pub trait BipackSource { pub trait BipackSource {
fn u8(self: &mut Self) -> u8; fn get_u8(self: &mut Self) -> Res<u8>;
fn u16(self: &mut Self) -> u16 { fn get_u16(self: &mut Self) -> Res<u16> {
((self.u8() as u16) << 8) + (self.u8() as u16) Ok(((self.get_u8()? as u16) << 8) + (self.get_u8()? as u16))
} }
fn u32(self: &mut Self) -> u32 { fn get_u32(self: &mut Self) -> Res<u32> {
((self.u16() as u32) << 16) + (self.u16() as u32) Ok(((self.get_u16()? as u32) << 16) + (self.get_u16()? as u32))
} }
fn u64(self: &mut Self) -> u64 { fn get_u64(self: &mut Self) -> Res<u64> {
((self.u32() as u64) << 32) | (self.u32() as u64) Ok(((self.get_u32()? as u64) << 32) | (self.get_u32()? as u64))
} }
fn smart_u64(self: &mut Self) -> u64 { /// Unpack variable-length packed unsigned value, used aslo internally to store size
let mut get = || -> u64 { self.u8() as u64 }; /// of arrays, binary data, strings, etc. To pack use
let first = get(); /// [crate::bipack_sink::BipackSink::put_unsigned()].
fn get_unsigned(self: &mut Self) -> Res<u64> {
let mut get = || -> Res<u64> { Ok(self.get_u8()? as u64) };
let first = get()?;
let mut ty = first & 3; let mut ty = first & 3;
let mut result = first >> 2; let mut result = first >> 2;
if ty == 0 { return result; } if ty == 0 { return Ok(result); }
ty -= 1; ty -= 1;
result = result + (get() << 6); result = result + (get()? << 6);
if ty == 0 { return result; } if ty == 0 { return Ok(result); }
ty -= 1; ty -= 1;
result = result + (get() << 14); result = result + (get()? << 14);
if ty == 0 { return result; } if ty == 0 { return Ok(result); }
result | (self.var_u64() << 22) Ok(result | (self.get_varint_unsigned()? << 22))
} }
fn var_u64(self: &mut Self) -> u64 { /// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
/// using it directly; use [get_unsigned] instead.
fn get_varint_unsigned(self: &mut Self) -> Res<u64> {
let mut result = 0u64; let mut result = 0u64;
let mut count = 0; let mut count = 0;
loop { loop {
let x = self.u8() as u64; let x = self.get_u8()? as u64;
result = result | ((x & 0x7F) << count); result = result | ((x & 0x7F) << count);
if (x & 0x80) == 0 { return result; } if (x & 0x80) == 0 { return Ok(result); }
count += 7 count += 7
} }
} }
fn smart_u16(self: &mut Self) -> u16 { /// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
self.smart_u64() as u16 /// as u16
fn get_packed_u16(self: &mut Self) -> Res<u16> {
Ok(self.get_unsigned()? as u16)
} }
fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 }
fn fixed_bytes(self: &mut Self,size: usize) -> Vec<u8> { /// read 4-bytes unsigned value from the source
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
/// as u32
fn get_packed_u32(self: &mut Self) -> Res<u32> { Ok(self.get_unsigned()? as u32) }
/// read exact number of bytes from the source as a vec.
fn get_fixed_bytes(self: &mut Self, size: usize) -> Res<Vec<u8>> {
let mut result = Vec::with_capacity(size); let mut result = Vec::with_capacity(size);
for i in 0..size { result.push(self.u8()); } for i in 0..size { result.push(self.get_u8()?); }
result Ok(result)
} }
fn var_bytes(self: &mut Self) -> Vec<u8> { /// Read variable-length byte array from the source (with packed size), created
let size = self.smart_u64() as usize; /// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded
self.fixed_bytes(size) /// the same way as does [BipackSink::put_unsigned] and can be manually read by
/// [BipackSource::get_unsigned].
fn var_bytes(self: &mut Self) -> Res<Vec<u8>> {
let size = self.get_unsigned()? as usize;
self.get_fixed_bytes(size)
} }
fn str(self: &mut Self) -> Result<String, FromUtf8Error> {
String::from_utf8(self.var_bytes()) /// REad a variable length string from a source packed with
/// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded
/// characters.
fn str(self: &mut Self) -> Res<String> {
String::from_utf8(
self.var_bytes()?
).or_else(|e| Err(BipackError::BadEncoding(e)))
} }
} }
/// The bipack source capable of extracting data from a slice.
/// use [SliceSource::from()] or [bipack_source()] to create one.
pub struct SliceSource<'a> { pub struct SliceSource<'a> {
data: &'a [u8], data: &'a [u8],
position: usize, position: usize,
} }
impl<'a> SliceSource<'a> { impl<'a> SliceSource<'a> {
pub fn new(src: &'a [u8]) -> SliceSource { pub fn from(src: &'a [u8]) -> SliceSource {
SliceSource { data: src, position: 0 } SliceSource { data: src, position: 0 }
} }
} }
impl<'x> BipackSource for SliceSource<'x> { impl<'x> BipackSource for SliceSource<'x> {
fn u8(self: &mut Self) -> u8 { fn get_u8(self: &mut Self) -> Res<u8> {
let result = self.data[self.position]; if self.position >= self.data.len() {
self.position += 1; Err(NoDataError)
result } else {
let result = self.data[self.position];
self.position += 1;
Ok(result)
}
} }
} }
pub fn bipack_source<'b>(v: &'b [u8]) -> SliceSource<'b> {
SliceSource::new(v)
}

View File

@ -1,3 +1,15 @@
//! # pipack codec
//!
//! The set of tools to effectively encode and decode bipack values. It is internationally
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
//! be important.
//!
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
//! method for the implementation.
//! - [bipack_sink::bipack_source]
//!
//!
#![allow(dead_code)] #![allow(dead_code)]
#![allow(unused_variables)] #![allow(unused_variables)]
@ -5,43 +17,43 @@ mod bipack_source;
mod bipack_sink; mod bipack_sink;
mod to_dump; mod to_dump;
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::error::Error;
use base64::Engine; use base64::Engine;
use crate::bipack_sink::{BipackSink}; use crate::bipack_sink::{BipackSink};
use crate::bipack_source::{bipack_source, BipackSource, SliceSource}; use crate::bipack_source::{BipackSource, Res, SliceSource};
use crate::to_dump::to_dump; use crate::to_dump::to_dump;
#[test] #[test]
fn fixed_unpack() { fn fixed_unpack() -> Result<(),Box<dyn Error>> {
let mut src = Vec::new(); let mut src = Vec::new();
base64::engine::general_purpose::STANDARD_NO_PAD base64::engine::general_purpose::STANDARD_NO_PAD
.decode_vec("B/oAAAEB0AAAANjLgKAv", &mut src) .decode_vec("B/oAAAEB0AAAANjLgKAv", &mut src)
.expect("decoded vector"); .expect("decoded vector");
println!(": {}", hex::encode(&src)); println!(": {}", hex::encode(&src));
let mut ss = SliceSource::new(&src); let mut ss = SliceSource::from(&src);
assert_eq!(7, ss.u8()); let d7 = ss.get_u8()?;
assert_eq!(64000, ss.u16()); assert_eq!(7, ss.get_u8()?);
assert_eq!(66000, ss.u32()); assert_eq!(64000, ss.get_u16()?);
assert_eq!(931127140399, ss.u64()); assert_eq!(66000, ss.get_u32()?);
assert_eq!(931127140399, ss.get_u64()?);
Ok(())
} }
#[test] #[test]
fn smartint_unpack() { fn smartint_unpack() -> Res<()> {
let mut src = Vec::new(); let mut src = Vec::new();
base64::engine::general_purpose::STANDARD_NO_PAD base64::engine::general_purpose::STANDARD_NO_PAD
.decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src) .decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src)
.expect("decoded vector"); .expect("decoded vector");
// println!("{}", hex::encode(&src)); // println!("{}", hex::encode(&src));
let mut ss = bipack_source(&src); let mut ss = SliceSource::from(&src);
assert_eq!(7, ss.u8()); assert_eq!(7, ss.get_u8()?);
assert_eq!(64000, ss.smart_u16()); assert_eq!(64000, ss.get_packed_u16()?);
assert_eq!(66000, ss.smart_u32()); assert_eq!(66000, ss.get_packed_u32()?);
assert_eq!(931127140399, ss.smart_u64()); assert_eq!(931127140399, ss.get_unsigned()?);
Ok(())
} }
#[test] #[test]
@ -69,19 +81,19 @@ mod tests {
fn pack_varbinaries_and_string() { fn pack_varbinaries_and_string() {
let mut data = Vec::<u8>::new(); let mut data = Vec::<u8>::new();
data.put_str("Hello, rupack!"); data.put_str("Hello, rupack!");
println!("{}",to_dump(&data)); println!("size ${}\n{}",data.len(), to_dump(&data));
let mut src = bipack_source(&data); let mut src = SliceSource::from(&data);
assert_eq!("Hello, rupack!", src.str().unwrap()); assert_eq!("Hello, rupack!", src.str().unwrap());
} }
#[test] #[test]
fn test_dump() { fn test_dump() {
for l in 1..64 { for l in 0..64 {
let mut d2 = Vec::new(); let mut d2 = Vec::new();
for u in 0..l { for u in 0..l {
d2.push(u as u8); d2.push(u as u8);
} }
println!("{}", to_dump(&d2)); println!("size {}\n{}", d2.len(), to_dump(&d2));
} }
} }
} }