support for byte buffers, strings and binary dump

This commit is contained in:
Sergey Chernov 2023-10-07 09:27:16 +00:00 committed by sergeych
parent cf63de0dbb
commit 3433e3bd84
5 changed files with 195 additions and 45 deletions

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
string-builder = "0.2.0"
[dev-dependencies] [dev-dependencies]
base64 = "0.21.4" base64 = "0.21.4"

View File

@ -1,51 +1,115 @@
use std::iter::Iterator; use std::iter::Iterator;
use std::usize;
pub trait BipackSink {
fn put_u8(self: &mut Self, data: u8) -> &Self;
fn put_fixed_bytes(self: &mut Self, data: &[u8]) -> &Self {
for b in data { self.put_u8(*b); }
return self
}
fn put_u16(self: &mut Self, mut value: u16) -> &Self {
let mut result = [0u8; 2];
for i in (0..result.len()).rev() {
result[i] = value as u8;
println!(":: {} / {}", value, value as u8);
value = value >> 8;
}
self.put_fixed_bytes(&result)
}
fn put_u32(self: &mut Self, mut value: u32) -> &Self {
let mut result = [0u8; 4];
for i in (0..result.len()).rev() {
result[i] = value as u8;
println!(":: {} / {}", value, value as u8);
value = value >> 8;
}
self.put_fixed_bytes(&result)
}
fn put_u64(self: &mut Self, mut value: u64) -> &Self {
let mut result = [0u8; 8];
for i in (0..result.len()).rev() {
result[i] = value as u8;
println!(":: {} / {}", value, value as u8);
value = value >> 8;
}
self.put_fixed_bytes(&result)
}
}
const V0LIMIT: u64 = 1u64 << 6; const V0LIMIT: u64 = 1u64 << 6;
const V1LIMIT: u64 = 1u64 << 14; const V1LIMIT: u64 = 1u64 << 14;
const V2LIMIT: u64 = 1u64 << 22; const V2LIMIT: u64 = 1u64 << 22;
impl BipackSink for Vec<u8> { pub trait IntoU64 {
fn put_u8(self: &mut Self, data: u8) -> &Self { fn into_u64(self) -> u64;
self.push(data); }
self
macro_rules! into_u64 {
($($type:ident),*) => {
$(impl IntoU64 for $type {
fn into_u64(self) -> u64 {
self as u64
}
})*
};
}
into_u64!(u8, u16, u32, usize, u64);
pub trait BipackSink {
fn put_u8(self: &mut Self, data: u8);
fn put_fixed_bytes(self: &mut Self, data: &[u8]) {
for b in data { self.put_u8(*b); }
}
fn put_var_bytes(self: &mut Self,data: &[u8]) {
self.put_unsigned(data.len());
self.put_fixed_bytes(data);
}
fn put_str(self: &mut Self,str: &str) {
self.put_var_bytes(str.as_bytes());
}
fn put_u16(self: &mut Self, mut value: u16) {
let mut result = [0u8; 2];
for i in (0..result.len()).rev() {
result[i] = value as u8;
value = value >> 8;
}
self.put_fixed_bytes(&result);
}
fn put_u32(self: &mut Self, mut value: u32) {
let mut result = [0u8; 4];
for i in (0..result.len()).rev() {
result[i] = value as u8;
value = value >> 8;
}
self.put_fixed_bytes(&result);
}
fn put_u64(self: &mut Self, mut value: u64) {
let mut result = [0u8; 8];
for i in (0..result.len()).rev() {
result[i] = value as u8;
value = value >> 8;
}
self.put_fixed_bytes(&result);
}
fn put_unsigned<T: IntoU64>(self: &mut Self, number: T) {
let value = number.into_u64();
let mut encode_seq = |ty: u8, bytes: &[u64]| {
if bytes.len() == 0 { self.put_u8(0); } else {
if bytes[0] as u64 > V0LIMIT { panic!("first byte is too big (internal error)"); }
self.put_u8((ty & 0x03) | ((bytes[0] as u8) << 2));
for i in 1..bytes.len() {
self.put_u8(bytes[i] as u8);
}
}
};
if value < V0LIMIT {
encode_seq(0, &[value]);
}
else if value < V1LIMIT {
encode_seq( 1, &[value & 0x3F, value >> 6]);
}
else if value < V2LIMIT {
encode_seq( 2, &[value & 0x3f, value >> 6, value >> 14]);
}
else {
encode_seq(3, &[value & 0x3f, value >> 6, value >> 14]);
self.put_var_unsigned(value >> 22);
}
}
fn put_var_unsigned(self: &mut Self, value: u64) {
let mut rest = value;
loop {
let x = rest & 127;
rest = rest >> 7;
if rest > 0 {
self.put_u8((x | 0x80) as u8);
} else {
self.put_u8(x as u8)
}
if rest == 0 { break; }
}
}
}
impl BipackSink for Vec<u8> {
fn put_u8(self: &mut Self, data: u8) {
self.push(data);
} }
} }

View File

@ -1,3 +1,5 @@
use std::string::FromUtf8Error;
/// Data source compatible with mp_bintools serialization. It supports /// Data source compatible with mp_bintools serialization. It supports
/// fixed-size integers in rihgt order and varint ans smartint encodings /// fixed-size integers in rihgt order and varint ans smartint encodings
/// separately. /// separately.
@ -50,6 +52,20 @@ pub trait BipackSource {
self.smart_u64() as u16 self.smart_u64() as u16
} }
fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 } fn smart_u32(self: &mut Self) -> u32 { self.smart_u64() as u32 }
fn fixed_bytes(self: &mut Self,size: usize) -> Vec<u8> {
let mut result = Vec::with_capacity(size);
for i in 0..size { result.push(self.u8()); }
result
}
fn var_bytes(self: &mut Self) -> Vec<u8> {
let size = self.smart_u64() as usize;
self.fixed_bytes(size)
}
fn str(self: &mut Self) -> Result<String, FromUtf8Error> {
String::from_utf8(self.var_bytes())
}
} }
pub struct SliceSource<'a> { pub struct SliceSource<'a> {

View File

@ -3,6 +3,7 @@
mod bipack_source; mod bipack_source;
mod bipack_sink; mod bipack_sink;
mod to_dump;
pub fn add(left: usize, right: usize) -> usize { pub fn add(left: usize, right: usize) -> usize {
left + right left + right
@ -13,6 +14,7 @@ mod tests {
use base64::Engine; use base64::Engine;
use crate::bipack_sink::{BipackSink}; use crate::bipack_sink::{BipackSink};
use crate::bipack_source::{bipack_source, BipackSource, SliceSource}; use crate::bipack_source::{bipack_source, BipackSource, SliceSource};
use crate::to_dump::to_dump;
#[test] #[test]
fn fixed_unpack() { fn fixed_unpack() {
@ -34,6 +36,7 @@ mod tests {
base64::engine::general_purpose::STANDARD_NO_PAD base64::engine::general_purpose::STANDARD_NO_PAD
.decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src) .decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src)
.expect("decoded vector"); .expect("decoded vector");
// println!("{}", hex::encode(&src));
let mut ss = bipack_source(&src); let mut ss = bipack_source(&src);
assert_eq!(7, ss.u8()); assert_eq!(7, ss.u8());
assert_eq!(64000, ss.smart_u16()); assert_eq!(64000, ss.smart_u16());
@ -48,8 +51,37 @@ mod tests {
data.put_u16(64000); data.put_u16(64000);
data.put_u32(66000); data.put_u32(66000);
data.put_u64(931127140399); data.put_u64(931127140399);
// println!("-- {:?}", data.iter().map(|x| format!("{:0x}", x)).collect::<Vec<_>>()); assert_eq!("07fa00000101d0000000d8cb80a02f", hex::encode(&data));
assert_eq!("07fa00000101d0000000d8cb80a02f", hex::encode(&data).as_str()); }
// println!("data = {}", to_hex(&data));
#[test]
fn smart_pack() {
let mut data: Vec<u8> = Vec::new();
data.put_u8(7);
data.put_unsigned(64000u16);
data.put_unsigned(66000u32);
data.put_unsigned(931127140399u64);
// println!("?? {}", hex::encode(&data));
assert_eq!("0702e803420704bf8002aec60d", hex::encode(&data));
}
#[test]
fn pack_varbinaries_and_string() {
let mut data = Vec::<u8>::new();
data.put_str("Hello, rupack!");
println!("{}",to_dump(&data));
let mut src = bipack_source(&data);
assert_eq!("Hello, rupack!", src.str().unwrap());
}
#[test]
fn test_dump() {
for l in 1..64 {
let mut d2 = Vec::new();
for u in 0..l {
d2.push(u as u8);
}
println!("{}", to_dump(&d2));
}
} }
} }

37
src/to_dump.rs Normal file
View File

@ -0,0 +1,37 @@
use string_builder::Builder;
pub fn to_dump(data: &[u8]) -> String {
let mut offset = 0usize;
let mut counter = 0;
let mut result = Builder::default();
fn ascii_dump(result: &mut Builder, counter: usize, data: &[u8], offset: usize) {
for i in counter..16 { result.append(" "); }
result.append("|");
for i in 0..counter {
let b = data[offset - counter + i];
if b >= 32 && b <= 127 {
result.append(b as char)
} else {
result.append('.');
}
}
for i in counter..16 { result.append(' '); }
result.append("|\n");
}
while offset < data.len() {
if counter == 0 {
result.append(format!("{:04X} ", offset))
}
counter += 1;
result.append(format!("{:02x} ", data[offset]));
offset += 1;
if counter == 16 {
ascii_dump(&mut result, counter, data, offset);
counter = 0;
}
}
if counter != 0 { ascii_dump(&mut result, counter, data, offset); }
result.string().unwrap()
}