From 3433e3bd84f7d8258034c43305a1aa0dbe7a9fc4 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Sat, 7 Oct 2023 09:27:16 +0000 Subject: [PATCH] support for byte buffers, strings and binary dump --- Cargo.toml | 1 + src/bipack_sink.rs | 148 +++++++++++++++++++++++++++++++------------ src/bipack_source.rs | 16 +++++ src/lib.rs | 38 ++++++++++- src/to_dump.rs | 37 +++++++++++ 5 files changed, 195 insertions(+), 45 deletions(-) create mode 100644 src/to_dump.rs diff --git a/Cargo.toml b/Cargo.toml index 3b3e004..1d947c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" [dependencies] anyhow = "1.0" lazy_static = "1.4.0" +string-builder = "0.2.0" [dev-dependencies] base64 = "0.21.4" diff --git a/src/bipack_sink.rs b/src/bipack_sink.rs index a048637..de7a753 100644 --- a/src/bipack_sink.rs +++ b/src/bipack_sink.rs @@ -1,51 +1,115 @@ use std::iter::Iterator; - -pub trait BipackSink { - fn put_u8(self: &mut Self, data: u8) -> &Self; - - fn put_fixed_bytes(self: &mut Self, data: &[u8]) -> &Self { - for b in data { self.put_u8(*b); } - return self - } - - fn put_u16(self: &mut Self, mut value: u16) -> &Self { - let mut result = [0u8; 2]; - for i in (0..result.len()).rev() { - result[i] = value as u8; - println!(":: {} / {}", value, value as u8); - value = value >> 8; - } - self.put_fixed_bytes(&result) - } - - fn put_u32(self: &mut Self, mut value: u32) -> &Self { - let mut result = [0u8; 4]; - for i in (0..result.len()).rev() { - result[i] = value as u8; - println!(":: {} / {}", value, value as u8); - value = value >> 8; - } - self.put_fixed_bytes(&result) - } - fn put_u64(self: &mut Self, mut value: u64) -> &Self { - let mut result = [0u8; 8]; - for i in (0..result.len()).rev() { - result[i] = value as u8; - println!(":: {} / {}", value, value as u8); - value = value >> 8; - } - self.put_fixed_bytes(&result) - } -} +use std::usize; const V0LIMIT: u64 = 1u64 << 6; const V1LIMIT: u64 = 1u64 << 14; const V2LIMIT: u64 = 1u64 << 22; -impl BipackSink for Vec { - fn put_u8(self: &mut Self, data: u8) -> &Self { - self.push(data); - self +pub trait IntoU64 { + fn into_u64(self) -> u64; +} + +macro_rules! into_u64 { + ($($type:ident),*) => { + $(impl IntoU64 for $type { + fn into_u64(self) -> u64 { + self as u64 + } + })* + }; +} + +into_u64!(u8, u16, u32, usize, u64); + + +pub trait BipackSink { + fn put_u8(self: &mut Self, data: u8); + + fn put_fixed_bytes(self: &mut Self, data: &[u8]) { + for b in data { self.put_u8(*b); } + } + + fn put_var_bytes(self: &mut Self,data: &[u8]) { + self.put_unsigned(data.len()); + self.put_fixed_bytes(data); + } + + fn put_str(self: &mut Self,str: &str) { + self.put_var_bytes(str.as_bytes()); + } + + fn put_u16(self: &mut Self, mut value: u16) { + let mut result = [0u8; 2]; + for i in (0..result.len()).rev() { + result[i] = value as u8; + value = value >> 8; + } + self.put_fixed_bytes(&result); + } + + fn put_u32(self: &mut Self, mut value: u32) { + let mut result = [0u8; 4]; + for i in (0..result.len()).rev() { + result[i] = value as u8; + value = value >> 8; + } + self.put_fixed_bytes(&result); + } + fn put_u64(self: &mut Self, mut value: u64) { + let mut result = [0u8; 8]; + for i in (0..result.len()).rev() { + result[i] = value as u8; + value = value >> 8; + } + self.put_fixed_bytes(&result); + } + + fn put_unsigned(self: &mut Self, number: T) { + let value = number.into_u64(); + let mut encode_seq = |ty: u8, bytes: &[u64]| { + if bytes.len() == 0 { self.put_u8(0); } else { + if bytes[0] as u64 > V0LIMIT { panic!("first byte is too big (internal error)"); } + self.put_u8((ty & 0x03) | ((bytes[0] as u8) << 2)); + for i in 1..bytes.len() { + self.put_u8(bytes[i] as u8); + } + } + }; + + if value < V0LIMIT { + encode_seq(0, &[value]); + } + else if value < V1LIMIT { + encode_seq( 1, &[value & 0x3F, value >> 6]); + } + else if value < V2LIMIT { + encode_seq( 2, &[value & 0x3f, value >> 6, value >> 14]); + } + else { + encode_seq(3, &[value & 0x3f, value >> 6, value >> 14]); + self.put_var_unsigned(value >> 22); + } + } + + fn put_var_unsigned(self: &mut Self, value: u64) { + let mut rest = value; + loop { + let x = rest & 127; + rest = rest >> 7; + if rest > 0 { + self.put_u8((x | 0x80) as u8); + } else { + self.put_u8(x as u8) + } + if rest == 0 { break; } + } + } +} + + +impl BipackSink for Vec { + fn put_u8(self: &mut Self, data: u8) { + self.push(data); } } diff --git a/src/bipack_source.rs b/src/bipack_source.rs index ad2d431..ee0e97f 100644 --- a/src/bipack_source.rs +++ b/src/bipack_source.rs @@ -1,3 +1,5 @@ +use std::string::FromUtf8Error; + /// Data source compatible with mp_bintools serialization. It supports /// fixed-size integers in rihgt order and varint ans smartint encodings /// separately. @@ -50,6 +52,20 @@ pub trait BipackSource { self.smart_u64() as u16 } fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 } + + fn fixed_bytes(self: &mut Self,size: usize) -> Vec { + let mut result = Vec::with_capacity(size); + for i in 0..size { result.push(self.u8()); } + result + } + + fn var_bytes(self: &mut Self) -> Vec { + let size = self.smart_u64() as usize; + self.fixed_bytes(size) + } + fn str(self: &mut Self) -> Result { + String::from_utf8(self.var_bytes()) + } } pub struct SliceSource<'a> { diff --git a/src/lib.rs b/src/lib.rs index 2f29f16..1fbf26a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ mod bipack_source; mod bipack_sink; +mod to_dump; pub fn add(left: usize, right: usize) -> usize { left + right @@ -13,6 +14,7 @@ mod tests { use base64::Engine; use crate::bipack_sink::{BipackSink}; use crate::bipack_source::{bipack_source, BipackSource, SliceSource}; + use crate::to_dump::to_dump; #[test] fn fixed_unpack() { @@ -34,6 +36,7 @@ mod tests { base64::engine::general_purpose::STANDARD_NO_PAD .decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src) .expect("decoded vector"); + // println!("{}", hex::encode(&src)); let mut ss = bipack_source(&src); assert_eq!(7, ss.u8()); assert_eq!(64000, ss.smart_u16()); @@ -48,8 +51,37 @@ mod tests { data.put_u16(64000); data.put_u32(66000); data.put_u64(931127140399); - // println!("-- {:?}", data.iter().map(|x| format!("{:0x}", x)).collect::>()); - assert_eq!("07fa00000101d0000000d8cb80a02f", hex::encode(&data).as_str()); - // println!("data = {}", to_hex(&data)); + assert_eq!("07fa00000101d0000000d8cb80a02f", hex::encode(&data)); + } + + #[test] + fn smart_pack() { + let mut data: Vec = Vec::new(); + data.put_u8(7); + data.put_unsigned(64000u16); + data.put_unsigned(66000u32); + data.put_unsigned(931127140399u64); + // println!("?? {}", hex::encode(&data)); + assert_eq!("0702e803420704bf8002aec60d", hex::encode(&data)); + } + + #[test] + fn pack_varbinaries_and_string() { + let mut data = Vec::::new(); + data.put_str("Hello, rupack!"); + println!("{}",to_dump(&data)); + let mut src = bipack_source(&data); + assert_eq!("Hello, rupack!", src.str().unwrap()); + } + + #[test] + fn test_dump() { + for l in 1..64 { + let mut d2 = Vec::new(); + for u in 0..l { + d2.push(u as u8); + } + println!("{}", to_dump(&d2)); + } } } \ No newline at end of file diff --git a/src/to_dump.rs b/src/to_dump.rs new file mode 100644 index 0000000..897daca --- /dev/null +++ b/src/to_dump.rs @@ -0,0 +1,37 @@ +use string_builder::Builder; + +pub fn to_dump(data: &[u8]) -> String { + let mut offset = 0usize; + let mut counter = 0; + let mut result = Builder::default(); + + fn ascii_dump(result: &mut Builder, counter: usize, data: &[u8], offset: usize) { + for i in counter..16 { result.append(" "); } + result.append("|"); + for i in 0..counter { + let b = data[offset - counter + i]; + if b >= 32 && b <= 127 { + result.append(b as char) + } else { + result.append('.'); + } + } + for i in counter..16 { result.append(' '); } + result.append("|\n"); + } + + while offset < data.len() { + if counter == 0 { + result.append(format!("{:04X} ", offset)) + } + counter += 1; + result.append(format!("{:02x} ", data[offset])); + offset += 1; + if counter == 16 { + ascii_dump(&mut result, counter, data, offset); + counter = 0; + } + } + if counter != 0 { ascii_dump(&mut result, counter, data, offset); } + result.string().unwrap() +}