refined source to report errors. We might need to fix the sink too, but now we think it could not be overflown.

This commit is contained in:
Sergey Chernov 2023-10-09 22:36:02 +01:00
parent 3433e3bd84
commit 131859ffba
4 changed files with 136 additions and 63 deletions

View File

@ -2,7 +2,8 @@
name = "bipack_ru"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
description = "binary size-effective format used in Divan smart contracts, wasm bindings, network protocols, etc."
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
@ -13,3 +14,4 @@ string-builder = "0.2.0"
[dev-dependencies]
base64 = "0.21.4"
hex = "0.4.3"
derive_more = "0.99.17"

View File

@ -5,6 +5,9 @@ const V0LIMIT: u64 = 1u64 << 6;
const V1LIMIT: u64 = 1u64 << 14;
const V2LIMIT: u64 = 1u64 << 22;
/// Numeric value convertible to Unsigned 64 bit to be used
/// with [BipackSink#put_unsigned] compressed format. It is implemented fir usize
/// and u* types already.
pub trait IntoU64 {
fn into_u64(self) -> u64;
}
@ -21,7 +24,9 @@ macro_rules! into_u64 {
into_u64!(u8, u16, u32, usize, u64);
/// Data sink to encode bipack binary format.
///
/// To implement just override [put_u8] and optionally [put_fixed_bytes]
pub trait BipackSink {
fn put_u8(self: &mut Self, data: u8);
@ -64,6 +69,9 @@ pub trait BipackSink {
self.put_fixed_bytes(&result);
}
/// Put unsigned value to compressed variable-length format, `Smartint` in the bipack
/// terms. This format is used to store size of variable-length binaries and strings.
/// Use [BipackSource::unsigned()] to unpack it.
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
let value = number.into_u64();
let mut encode_seq = |ty: u8, bytes: &[u64]| {

View File

@ -1,93 +1,144 @@
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::string::FromUtf8Error;
use crate::bipack_source::BipackError::NoDataError;
/// Result of error-aware bipack function
pub(crate) type Res<T> = Result<T, BipackError>;
/// There is not enought data to fulfill the request
#[derive(Debug, Clone)]
pub enum BipackError {
NoDataError,
BadEncoding(FromUtf8Error)
}
impl Display for BipackError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f,"{:?}", self)
}
}
impl Error for BipackError {}
/// Data source compatible with mp_bintools serialization. It supports
/// fixed-size integers in rihgt order and varint ans smartint encodings
/// separately.
/// fixed-size integers in right order and varint ans smartint encodings
/// separately. There is out of the box implementation for [Vec<u8>], and
/// it is easy to implements your own.
///
/// To implement source for other type, implement just [u8()] or mayve also
/// [fixed_bytes] for effectiveness.
pub trait BipackSource {
fn u8(self: &mut Self) -> u8;
fn get_u8(self: &mut Self) -> Res<u8>;
fn u16(self: &mut Self) -> u16 {
((self.u8() as u16) << 8) + (self.u8() as u16)
fn get_u16(self: &mut Self) -> Res<u16> {
Ok(((self.get_u8()? as u16) << 8) + (self.get_u8()? as u16))
}
fn u32(self: &mut Self) -> u32 {
((self.u16() as u32) << 16) + (self.u16() as u32)
fn get_u32(self: &mut Self) -> Res<u32> {
Ok(((self.get_u16()? as u32) << 16) + (self.get_u16()? as u32))
}
fn u64(self: &mut Self) -> u64 {
((self.u32() as u64) << 32) | (self.u32() as u64)
fn get_u64(self: &mut Self) -> Res<u64> {
Ok(((self.get_u32()? as u64) << 32) | (self.get_u32()? as u64))
}
fn smart_u64(self: &mut Self) -> u64 {
let mut get = || -> u64 { self.u8() as u64 };
let first = get();
/// Unpack variable-length packed unsigned value, used aslo internally to store size
/// of arrays, binary data, strings, etc. To pack use
/// [crate::bipack_sink::BipackSink::put_unsigned()].
fn get_unsigned(self: &mut Self) -> Res<u64> {
let mut get = || -> Res<u64> { Ok(self.get_u8()? as u64) };
let first = get()?;
let mut ty = first & 3;
let mut result = first >> 2;
if ty == 0 { return result; }
if ty == 0 { return Ok(result); }
ty -= 1;
result = result + (get() << 6);
if ty == 0 { return result; }
result = result + (get()? << 6);
if ty == 0 { return Ok(result); }
ty -= 1;
result = result + (get() << 14);
if ty == 0 { return result; }
result = result + (get()? << 14);
if ty == 0 { return Ok(result); }
result | (self.var_u64() << 22)
Ok(result | (self.get_varint_unsigned()? << 22))
}
fn var_u64(self: &mut Self) -> u64 {
/// read 8-bytes varint-packed unsigned value from the source. We dont' recommend
/// using it directly; use [get_unsigned] instead.
fn get_varint_unsigned(self: &mut Self) -> Res<u64> {
let mut result = 0u64;
let mut count = 0;
loop {
let x = self.u8() as u64;
let x = self.get_u8()? as u64;
result = result | ((x & 0x7F) << count);
if (x & 0x80) == 0 { return result; }
if (x & 0x80) == 0 { return Ok(result); }
count += 7
}
}
fn smart_u16(self: &mut Self) -> u16 {
self.smart_u64() as u16
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
/// as u16
fn get_packed_u16(self: &mut Self) -> Res<u16> {
Ok(self.get_unsigned()? as u16)
}
fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 }
fn fixed_bytes(self: &mut Self,size: usize) -> Vec<u8> {
/// read 4-bytes unsigned value from the source
/// read 2-bytes unsigned value from the source as smartint-encoded, same as [get_unsigned]
/// as u32
fn get_packed_u32(self: &mut Self) -> Res<u32> { Ok(self.get_unsigned()? as u32) }
/// read exact number of bytes from the source as a vec.
fn get_fixed_bytes(self: &mut Self, size: usize) -> Res<Vec<u8>> {
let mut result = Vec::with_capacity(size);
for i in 0..size { result.push(self.u8()); }
result
for i in 0..size { result.push(self.get_u8()?); }
Ok(result)
}
fn var_bytes(self: &mut Self) -> Vec<u8> {
let size = self.smart_u64() as usize;
self.fixed_bytes(size)
/// Read variable-length byte array from the source (with packed size), created
/// by [BipackSink::put_var_bytes] or [BipackSink::put_string]. The size is encoded
/// the same way as does [BipackSink::put_unsigned] and can be manually read by
/// [BipackSource::get_unsigned].
fn var_bytes(self: &mut Self) -> Res<Vec<u8>> {
let size = self.get_unsigned()? as usize;
self.get_fixed_bytes(size)
}
fn str(self: &mut Self) -> Result<String, FromUtf8Error> {
String::from_utf8(self.var_bytes())
/// REad a variable length string from a source packed with
/// [BipavkSink::put_string]. It is a variable sized array fo utf8 encoded
/// characters.
fn str(self: &mut Self) -> Res<String> {
String::from_utf8(
self.var_bytes()?
).or_else(|e| Err(BipackError::BadEncoding(e)))
}
}
/// The bipack source capable of extracting data from a slice.
/// use [SliceSource::from()] or [bipack_source()] to create one.
pub struct SliceSource<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> SliceSource<'a> {
pub fn new(src: &'a [u8]) -> SliceSource {
pub fn from(src: &'a [u8]) -> SliceSource {
SliceSource { data: src, position: 0 }
}
}
impl<'x> BipackSource for SliceSource<'x> {
fn u8(self: &mut Self) -> u8 {
let result = self.data[self.position];
self.position += 1;
result
fn get_u8(self: &mut Self) -> Res<u8> {
if self.position >= self.data.len() {
Err(NoDataError)
} else {
let result = self.data[self.position];
self.position += 1;
Ok(result)
}
}
}
pub fn bipack_source<'b>(v: &'b [u8]) -> SliceSource<'b> {
SliceSource::new(v)
}

View File

@ -1,3 +1,15 @@
//! # pipack codec
//!
//! The set of tools to effectively encode and decode bipack values. It is internationally
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
//! be important.
//!
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
//! [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
//! method for the implementation.
//! - [bipack_sink::bipack_source]
//!
//!
#![allow(dead_code)]
#![allow(unused_variables)]
@ -5,43 +17,43 @@ mod bipack_source;
mod bipack_sink;
mod to_dump;
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)]
mod tests {
use std::error::Error;
use base64::Engine;
use crate::bipack_sink::{BipackSink};
use crate::bipack_source::{bipack_source, BipackSource, SliceSource};
use crate::bipack_source::{BipackSource, Res, SliceSource};
use crate::to_dump::to_dump;
#[test]
fn fixed_unpack() {
fn fixed_unpack() -> Result<(),Box<dyn Error>> {
let mut src = Vec::new();
base64::engine::general_purpose::STANDARD_NO_PAD
.decode_vec("B/oAAAEB0AAAANjLgKAv", &mut src)
.expect("decoded vector");
println!(": {}", hex::encode(&src));
let mut ss = SliceSource::new(&src);
assert_eq!(7, ss.u8());
assert_eq!(64000, ss.u16());
assert_eq!(66000, ss.u32());
assert_eq!(931127140399, ss.u64());
let mut ss = SliceSource::from(&src);
let d7 = ss.get_u8()?;
assert_eq!(7, ss.get_u8()?);
assert_eq!(64000, ss.get_u16()?);
assert_eq!(66000, ss.get_u32()?);
assert_eq!(931127140399, ss.get_u64()?);
Ok(())
}
#[test]
fn smartint_unpack() {
fn smartint_unpack() -> Res<()> {
let mut src = Vec::new();
base64::engine::general_purpose::STANDARD_NO_PAD
.decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src)
.expect("decoded vector");
// println!("{}", hex::encode(&src));
let mut ss = bipack_source(&src);
assert_eq!(7, ss.u8());
assert_eq!(64000, ss.smart_u16());
assert_eq!(66000, ss.smart_u32());
assert_eq!(931127140399, ss.smart_u64());
let mut ss = SliceSource::from(&src);
assert_eq!(7, ss.get_u8()?);
assert_eq!(64000, ss.get_packed_u16()?);
assert_eq!(66000, ss.get_packed_u32()?);
assert_eq!(931127140399, ss.get_unsigned()?);
Ok(())
}
#[test]
@ -69,19 +81,19 @@ mod tests {
fn pack_varbinaries_and_string() {
let mut data = Vec::<u8>::new();
data.put_str("Hello, rupack!");
println!("{}",to_dump(&data));
let mut src = bipack_source(&data);
println!("size ${}\n{}",data.len(), to_dump(&data));
let mut src = SliceSource::from(&data);
assert_eq!("Hello, rupack!", src.str().unwrap());
}
#[test]
fn test_dump() {
for l in 1..64 {
for l in 0..64 {
let mut d2 = Vec::new();
for u in 0..l {
d2.push(u as u8);
}
println!("{}", to_dump(&d2));
println!("size {}\n{}", d2.len(), to_dump(&d2));
}
}
}