refs #35 Lynon builtin compression
This commit is contained in:
parent
34bc7297bd
commit
f3d766d1b1
@ -1,36 +1,9 @@
|
|||||||
package net.sergeych.lynon
|
package net.sergeych.lynon
|
||||||
|
|
||||||
abstract class BitInput {
|
interface BitInput {
|
||||||
|
|
||||||
data class DataByte(val data: Int,val bits: Int)
|
|
||||||
|
|
||||||
/**
|
fun getBitOrNull(): Int?
|
||||||
* Return next byte, int in 0..255 range, or -1 if end of stream reached
|
|
||||||
*/
|
|
||||||
abstract fun getByte(): DataByte
|
|
||||||
|
|
||||||
private var accumulator = 0
|
|
||||||
|
|
||||||
var isEndOfStream: Boolean = false
|
|
||||||
private set
|
|
||||||
|
|
||||||
private var mask = 0
|
|
||||||
|
|
||||||
fun getBitOrNull(): Int? {
|
|
||||||
if (isEndOfStream) return null
|
|
||||||
if (mask == 0) {
|
|
||||||
val ab = getByte()
|
|
||||||
accumulator = ab.data
|
|
||||||
if (accumulator == -1) {
|
|
||||||
isEndOfStream = true
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
mask = 1 shl (ab.bits - 1)
|
|
||||||
}
|
|
||||||
val result = if (0 == accumulator and mask) 0 else 1
|
|
||||||
mask = mask shr 1
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
fun getBitsOrNull(count: Int): ULong? {
|
fun getBitsOrNull(count: Int): ULong? {
|
||||||
var result = 0UL
|
var result = 0UL
|
||||||
@ -54,8 +27,11 @@ abstract class BitInput {
|
|||||||
return getBitOrNull() ?: throw IllegalStateException("Unexpected end of stream")
|
return getBitOrNull() ?: throw IllegalStateException("Unexpected end of stream")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun unpackUnsigned(): ULong {
|
fun unpackUnsigned(): ULong =
|
||||||
val tetrades = getBits(4).toInt()
|
unpackUnsignedOrNull() ?: throw IllegalStateException("Unexpected end of stream")
|
||||||
|
|
||||||
|
fun unpackUnsignedOrNull(): ULong? {
|
||||||
|
val tetrades = getBitsOrNull(4)?.toInt() ?: return null
|
||||||
var result = 0UL
|
var result = 0UL
|
||||||
var shift = 0
|
var shift = 0
|
||||||
for (i in 0.. tetrades) {
|
for (i in 0.. tetrades) {
|
||||||
@ -84,5 +60,27 @@ abstract class BitInput {
|
|||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fun decompress(): ByteArray = decompressOrNull() ?: throw DecompressionException("Unexpected end of stream")
|
||||||
|
|
||||||
|
fun decompressOrNull(): ByteArray? {
|
||||||
|
val originalSize = unpackUnsignedOrNull()?.toInt() ?: return null
|
||||||
|
return if( getBit() == 1) {
|
||||||
|
// data is compressed
|
||||||
|
// val expectedCRC = getBits(32).toUInt()
|
||||||
|
val method = getBits(2).toInt()
|
||||||
|
if( method != 0) throw DecompressionException("Unknown compression method")
|
||||||
|
LZW.decompress(this, originalSize).asByteArray()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
getBytes(originalSize) ?: throw DecompressionException("Unexpected end of stream in uncompressed data")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Suppress("unused")
|
||||||
|
fun decompressStringOrNull(): String? = decompressOrNull()?.decodeToString()
|
||||||
|
|
||||||
|
fun decompressString(): String = decompress().decodeToString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
34
lynglib/src/commonMain/kotlin/net/sergeych/lynon/BitList.kt
Normal file
34
lynglib/src/commonMain/kotlin/net/sergeych/lynon/BitList.kt
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
package net.sergeych.lynon
|
||||||
|
|
||||||
|
@Suppress("unused")
|
||||||
|
interface BitList {
|
||||||
|
operator fun get(bitIndex: Long): Int
|
||||||
|
operator fun set(bitIndex: Long,value: Int)
|
||||||
|
val size: Long
|
||||||
|
val indices: LongRange
|
||||||
|
|
||||||
|
fun toInput(): BitInput = object : BitInput {
|
||||||
|
private var index = 0L
|
||||||
|
|
||||||
|
override fun getBitOrNull(): Int? =
|
||||||
|
if( index < size) this@BitList[index++]
|
||||||
|
else null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun bitListOf(vararg bits: Int): BitList {
|
||||||
|
return if( bits.size > 64) {
|
||||||
|
BitArray.ofBits(*bits)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
TinyBits.of(*bits)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Suppress("unused")
|
||||||
|
fun bitListOfSize(sizeInBits: Long): BitList {
|
||||||
|
return if( sizeInBits > 64) {
|
||||||
|
BitArray.withBitSize(sizeInBits)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
TinyBits()
|
||||||
|
}
|
@ -1,27 +1,6 @@
|
|||||||
package net.sergeych.lynon
|
package net.sergeych.lynon
|
||||||
|
|
||||||
abstract class BitOutput {
|
interface BitOutput {
|
||||||
|
|
||||||
abstract fun outputByte(byte: UByte)
|
|
||||||
|
|
||||||
private var accumulator = 0
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of bits in accumulator. After output is closed by [close] this value is
|
|
||||||
* not changed and represents the number of bits in the last byte; this should
|
|
||||||
* be used to properly calculate end of the bit stream
|
|
||||||
*/
|
|
||||||
private var accumulatorBits = 0
|
|
||||||
private set
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When [close] is called, represents the number of used bits in the last byte;
|
|
||||||
* bits after this number are the garbage and should be ignored
|
|
||||||
*/
|
|
||||||
val lastByteBits: Int get() {
|
|
||||||
if( !isClosed ) throw IllegalStateException("BitOutput is not closed")
|
|
||||||
return accumulatorBits
|
|
||||||
}
|
|
||||||
|
|
||||||
fun putBits(bits: ULong, count: Int) {
|
fun putBits(bits: ULong, count: Int) {
|
||||||
require(count <= 64)
|
require(count <= 64)
|
||||||
@ -41,13 +20,11 @@ abstract class BitOutput {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun putBit(bit: Int) {
|
fun putBit(bit: Int)
|
||||||
accumulator = (accumulator shl 1) or bit
|
|
||||||
if (++accumulatorBits >= 8) {
|
fun putBits(bitList: BitList) {
|
||||||
outputByte(accumulator.toUByte())
|
for (i in bitList.indices)
|
||||||
accumulator = accumulator shr 0
|
putBit(bitList[i])
|
||||||
accumulatorBits = 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun packUnsigned(value: ULong) {
|
fun packUnsigned(value: ULong) {
|
||||||
@ -71,23 +48,57 @@ abstract class BitOutput {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var isClosed = false
|
|
||||||
private set
|
|
||||||
|
|
||||||
fun close(): BitOutput {
|
|
||||||
if (!isClosed) {
|
|
||||||
if (accumulatorBits > 0) {
|
|
||||||
outputByte(accumulator.toUByte())
|
|
||||||
} else accumulatorBits = 8
|
|
||||||
isClosed = true
|
|
||||||
}
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
fun putBytes(data: ByteArray) {
|
fun putBytes(data: ByteArray) {
|
||||||
for (b in data) {
|
for (b in data) {
|
||||||
putBits(b.toULong(), 8)
|
putBits(b.toULong(), 8)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create compressed record with content and size check. Compression works with _bytes_.
|
||||||
|
*
|
||||||
|
* Structure:
|
||||||
|
*
|
||||||
|
* | size | meaning |
|
||||||
|
* |------|--------------------------------------------------|
|
||||||
|
* | packed unsigned | size of uncompressed content in bytes |
|
||||||
|
* | 1 | 0 - not compressed, 1 - compressed |
|
||||||
|
*
|
||||||
|
* __If compressed__, then:
|
||||||
|
*
|
||||||
|
* | size | meaning |
|
||||||
|
* |------|--------------------------------------|
|
||||||
|
* | 2 | 00 - LZW, other combinations reserved|
|
||||||
|
*
|
||||||
|
* After this header compressed bits follow.
|
||||||
|
*
|
||||||
|
* __If not compressed,__ then source data follows as bit stream.
|
||||||
|
*
|
||||||
|
* Compressed block overhead is 3 bits, uncompressed 1.
|
||||||
|
*/
|
||||||
|
fun compress(source: ByteArray) {
|
||||||
|
// size
|
||||||
|
packUnsigned(source.size.toULong())
|
||||||
|
// check compression is effective?
|
||||||
|
val compressed = LZW.compress(source.asUByteArray())
|
||||||
|
// check that compression is effective including header bits size:
|
||||||
|
if( compressed.size + 2 < source.size * 8L) {
|
||||||
|
println("write compressed")
|
||||||
|
putBit(1)
|
||||||
|
// LZW algorithm
|
||||||
|
putBits(0, 2)
|
||||||
|
// compressed data
|
||||||
|
putBits(compressed)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
putBit(0)
|
||||||
|
putBytes(source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun compress(source: String) {
|
||||||
|
compress(source.encodeToByteArray())
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
package net.sergeych.lynon
|
||||||
|
|
||||||
|
class DecompressionException(message: String) : IllegalArgumentException(message) {}
|
@ -28,10 +28,10 @@ open class LynonDecoder(val bin: BitInput,val settings: LynonSettings = LynonSet
|
|||||||
return decodeCached { type.deserialize(scope, this) }
|
return decodeCached { type.deserialize(scope, this) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fun unpackBinaryData(): ByteArray? {
|
fun unpackBinaryData(): ByteArray = bin.decompress()
|
||||||
val size = bin.unpackUnsigned()
|
|
||||||
return bin.getBytes(size.toInt())
|
@Suppress("unused")
|
||||||
}
|
fun unpackBinaryDataOrNull(): ByteArray? = bin.decompressOrNull()
|
||||||
|
|
||||||
fun unpackBoolean(): Boolean {
|
fun unpackBoolean(): Boolean {
|
||||||
return bin.getBit() == 1
|
return bin.getBit() == 1
|
||||||
|
@ -29,8 +29,7 @@ open class LynonEncoder(val bout: BitOutput,val settings: LynonSettings = LynonS
|
|||||||
}
|
}
|
||||||
|
|
||||||
fun encodeBinaryData(data: ByteArray) {
|
fun encodeBinaryData(data: ByteArray) {
|
||||||
bout.packUnsigned(data.size.toULong())
|
bout.compress(data)
|
||||||
bout.putBytes(data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun encodeSigned(value: Long) {
|
fun encodeSigned(value: Long) {
|
||||||
|
@ -1,20 +1,39 @@
|
|||||||
package net.sergeych.lynon
|
package net.sergeych.lynon
|
||||||
|
|
||||||
class MemoryBitInput(val packedBits: UByteArray,val lastByteBits: Int): BitInput() {
|
|
||||||
|
|
||||||
constructor(bout: MemoryBitOutput): this(bout.toUByteArray(), bout.lastByteBits)
|
class MemoryBitInput(val packedBits: UByteArray, val lastByteBits: Int) : BitInput {
|
||||||
|
|
||||||
|
constructor(ba: BitArray) : this(ba.bytes, ba.lastByteBits) {}
|
||||||
|
constructor(mba: MemoryBitOutput) : this(mba.toBitArray()) {}
|
||||||
|
|
||||||
private var index = 0
|
private var index = 0
|
||||||
|
|
||||||
override fun getByte(): DataByte {
|
/**
|
||||||
return if( index < packedBits.size ) {
|
* Return next byte, int in 0..255 range, or -1 if end of stream reached
|
||||||
DataByte(
|
*/
|
||||||
packedBits[index++].toInt(),
|
private var accumulator = 0
|
||||||
if( index == packedBits.size ) lastByteBits else 8
|
|
||||||
)
|
private var isEndOfStream: Boolean = false
|
||||||
} else {
|
private set
|
||||||
DataByte(-1,0)
|
|
||||||
|
private var mask = 0
|
||||||
|
|
||||||
|
override fun getBitOrNull(): Int? {
|
||||||
|
if (isEndOfStream) return null
|
||||||
|
if (mask == 0) {
|
||||||
|
if (index < packedBits.size) {
|
||||||
|
accumulator = packedBits[index++].toInt()
|
||||||
|
val n = if (index == packedBits.size) lastByteBits else 8
|
||||||
|
mask = 1 shl (n - 1)
|
||||||
|
} else {
|
||||||
|
isEndOfStream = true
|
||||||
|
return null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
val result = if (0 == accumulator and mask) 0 else 1
|
||||||
|
mask = mask shr 1
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@ -1,14 +1,135 @@
|
|||||||
package net.sergeych.lynon
|
package net.sergeych.lynon
|
||||||
|
|
||||||
class MemoryBitOutput: BitOutput() {
|
import kotlin.math.min
|
||||||
private val buffer = mutableListOf<UByte>()
|
|
||||||
|
|
||||||
fun toUByteArray(): UByteArray {
|
/**
|
||||||
close()
|
* BitList implementation as fixed suze array of bits; indexing works exactly same as if
|
||||||
return buffer.toTypedArray().toUByteArray()
|
* [MemoryBitInput] is used with [MemoryBitInput.getBit].
|
||||||
|
*/
|
||||||
|
class BitArray(val bytes: UByteArray, val lastByteBits: Int) : BitList {
|
||||||
|
|
||||||
|
val bytesSize: Int get() = bytes.size
|
||||||
|
override val size by lazy { bytes.size * 8L - (8 - lastByteBits) }
|
||||||
|
|
||||||
|
override val indices by lazy { 0..<size }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return [BitInput] that can be used to read from this array
|
||||||
|
*/
|
||||||
|
fun toBitInput(): BitInput = MemoryBitInput(bytes, lastByteBits)
|
||||||
|
|
||||||
|
private fun getIndexAndMask(bitIndex: Long): Pair<Int, Int> {
|
||||||
|
val byteIndex = (bitIndex / 8).toInt()
|
||||||
|
if (byteIndex !in bytes.indices)
|
||||||
|
throw IndexOutOfBoundsException("$bitIndex is out of bounds")
|
||||||
|
val i = (bitIndex % 8).toInt()
|
||||||
|
return byteIndex to (
|
||||||
|
if (byteIndex == bytes.lastIndex) {
|
||||||
|
if (i >= lastByteBits)
|
||||||
|
throw IndexOutOfBoundsException("$bitIndex is out of bounds (last)")
|
||||||
|
1 shl (lastByteBits - i - 1)
|
||||||
|
} else {
|
||||||
|
1 shl (7 - i)
|
||||||
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun outputByte(byte: UByte) {
|
override operator fun get(bitIndex: Long): Int =
|
||||||
|
getIndexAndMask(bitIndex).let { (byteIndex, mask) ->
|
||||||
|
if (bytes[byteIndex].toInt() and mask == 0) 0 else 1
|
||||||
|
}
|
||||||
|
|
||||||
|
override operator fun set(bitIndex: Long, value: Int) {
|
||||||
|
require(value == 0 || value == 1)
|
||||||
|
val (byteIndex, mask) = getIndexAndMask(bitIndex)
|
||||||
|
if (value == 1)
|
||||||
|
bytes[byteIndex] = bytes[byteIndex] or mask.toUByte()
|
||||||
|
else
|
||||||
|
bytes[byteIndex] = bytes[byteIndex] and mask.inv().toUByte()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
val result = StringBuilder()
|
||||||
|
val s = min(size, 64)
|
||||||
|
for (i in 0..<s) result.append(this[i])
|
||||||
|
if (s < size) result.append("…")
|
||||||
|
return result.toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
|
||||||
|
fun withBitSize(size: Long): BitArray {
|
||||||
|
val byteSize = ((size + 7) / 8).toInt()
|
||||||
|
val lastByteBits = size % 8
|
||||||
|
return BitArray(UByteArray(byteSize), lastByteBits.toInt())
|
||||||
|
}
|
||||||
|
|
||||||
|
fun ofBits(vararg bits: Int): BitArray {
|
||||||
|
return withBitSize(bits.size.toLong()).apply {
|
||||||
|
for (i in bits.indices) {
|
||||||
|
this[i.toLong()] = bits[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class MemoryBitOutput : BitOutput {
|
||||||
|
private val buffer = mutableListOf<UByte>()
|
||||||
|
|
||||||
|
private var accumulator = 0
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of bits in accumulator. After output is closed by [close] this value is
|
||||||
|
* not changed and represents the number of bits in the last byte; this should
|
||||||
|
* be used to properly calculate end of the bit stream
|
||||||
|
*/
|
||||||
|
private var accumulatorBits = 0
|
||||||
|
private set
|
||||||
|
|
||||||
|
// /**
|
||||||
|
// * When [close] is called, represents the number of used bits in the last byte;
|
||||||
|
// * bits after this number are the garbage and should be ignored
|
||||||
|
// */
|
||||||
|
// val lastByteBits: Int
|
||||||
|
// get() {
|
||||||
|
// if (!isClosed) throw IllegalStateException("BitOutput is not closed")
|
||||||
|
// return accumulatorBits
|
||||||
|
// }
|
||||||
|
|
||||||
|
override fun putBit(bit: Int) {
|
||||||
|
accumulator = (accumulator shl 1) or bit
|
||||||
|
if (++accumulatorBits >= 8) {
|
||||||
|
outputByte(accumulator.toUByte())
|
||||||
|
accumulator = accumulator shr 8
|
||||||
|
accumulatorBits = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var isClosed = false
|
||||||
|
private set
|
||||||
|
|
||||||
|
fun close(): BitArray {
|
||||||
|
if (!isClosed) {
|
||||||
|
if (accumulatorBits > 0) {
|
||||||
|
outputByte(accumulator.toUByte())
|
||||||
|
} else accumulatorBits = 8
|
||||||
|
isClosed = true
|
||||||
|
}
|
||||||
|
return toBitArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun toBitArray(): BitArray {
|
||||||
|
if (!isClosed) {
|
||||||
|
close()
|
||||||
|
}
|
||||||
|
return BitArray(buffer.toTypedArray().toUByteArray(), accumulatorBits)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun toBitInput(): BitInput = toBitArray().toBitInput()
|
||||||
|
|
||||||
|
private fun outputByte(byte: UByte) {
|
||||||
buffer.add(byte)
|
buffer.add(byte)
|
||||||
}
|
}
|
||||||
}
|
}
|
71
lynglib/src/commonMain/kotlin/net/sergeych/lynon/TinyBits.kt
Normal file
71
lynglib/src/commonMain/kotlin/net/sergeych/lynon/TinyBits.kt
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
package net.sergeych.lynon
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bit size-aware code, short [BitList] implementation, up to 64 bits (efficiency tradeoff).
|
||||||
|
* E.g `Bits(0, 3) != Bits(0, 2). For longer, use [BitArray].
|
||||||
|
*
|
||||||
|
* Note that [bitListOf] creates [TinyBits] when possible.
|
||||||
|
*/
|
||||||
|
class TinyBits(initValue: ULong = 0U, override val size: Long = 0): BitList {
|
||||||
|
|
||||||
|
private var bits: ULong = initValue
|
||||||
|
|
||||||
|
constructor(value: ULong, size: Int): this(value, size.toLong()) {}
|
||||||
|
|
||||||
|
override val indices: LongRange by lazy { 0..<size }
|
||||||
|
|
||||||
|
override operator fun get(bitIndex: Long): Int {
|
||||||
|
if( bitIndex !in indices) throw IndexOutOfBoundsException("index out of bounds: $bitIndex")
|
||||||
|
val mask = 1UL shl (size - bitIndex - 1).toInt()
|
||||||
|
return if (bits and mask != 0UL) 1 else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun set(bitIndex: Long, value: Int) {
|
||||||
|
val mask = 1UL shl (size - bitIndex - 1).toInt()
|
||||||
|
if( value == 1)
|
||||||
|
bits = bits or mask
|
||||||
|
else
|
||||||
|
bits = bits and mask.inv()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
val result = StringBuilder()
|
||||||
|
for (i in 0..<size) result.append(this[i])
|
||||||
|
return result.toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
val value by ::bits
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add bit shifting value to the left and return _new instance_
|
||||||
|
*/
|
||||||
|
fun insertBit(bit: Int): TinyBits {
|
||||||
|
return TinyBits((bits shl 1) or bit.toULong(), size + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun equals(other: Any?): Boolean {
|
||||||
|
if (this === other) return true
|
||||||
|
if (other == null || this::class != other::class) return false
|
||||||
|
|
||||||
|
other as TinyBits
|
||||||
|
|
||||||
|
if (size != other.size) return false
|
||||||
|
if (bits != other.bits) return false
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun hashCode(): Int {
|
||||||
|
var result = size.hashCode()
|
||||||
|
result = 31 * result + bits.hashCode()
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun of(vararg bits: Int): TinyBits {
|
||||||
|
return TinyBits(0UL, bits.size).apply { bits.forEachIndexed { i, v -> this[i.toLong()] = v } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
244
lynglib/src/commonMain/kotlin/net/sergeych/lynon/huffman.kt
Normal file
244
lynglib/src/commonMain/kotlin/net/sergeych/lynon/huffman.kt
Normal file
@ -0,0 +1,244 @@
|
|||||||
|
package net.sergeych.lynon
|
||||||
|
|
||||||
|
import net.sergeych.collections.SortedList
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Experimental, reference implementation of Huffman trees and encoding.
|
||||||
|
*
|
||||||
|
* This is a reference huffman encoding implementation not yet ready;
|
||||||
|
* it was used to experiment with LZW, at the moment, LZW won the competition
|
||||||
|
* for compressed module format for its speed and sufficiently small size/
|
||||||
|
*
|
||||||
|
* This is byte-based compressor which makes it not too interesting.
|
||||||
|
*
|
||||||
|
* TODO: convert to use various source dictionary
|
||||||
|
*
|
||||||
|
* reason: version thant compress bytes is not too interesting; particular alphabets
|
||||||
|
* are often longer than byte bits and are often sparse, that requires another
|
||||||
|
* codes serialization implementation
|
||||||
|
*/
|
||||||
|
object Huffman {
|
||||||
|
|
||||||
|
sealed class Node(val freq: Int) : Comparable<Node> {
|
||||||
|
override fun compareTo(other: Node): Int {
|
||||||
|
return freq.compareTo(other.freq)
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract fun decode(bin: BitInput): Int?
|
||||||
|
|
||||||
|
class Leaf(val value: Int, freq: Int) : Node(freq) {
|
||||||
|
override fun toString(): String {
|
||||||
|
return "[$value:$freq]"
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun decode(bin: BitInput): Int {
|
||||||
|
return value//.also { println(": ${Char(value)}") }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class Internal(val left: Node, val right: Node) : Node(left.freq + right.freq) {
|
||||||
|
override fun toString(): String {
|
||||||
|
return "[${left.freq}<- :<$freq>: ->${right.freq}]"
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun decode(bin: BitInput): Int? {
|
||||||
|
return when (bin.getBitOrNull().also { print("$it") }) {
|
||||||
|
1 -> left.decode(bin)
|
||||||
|
0 -> right.decode(bin)
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data class Code(val symbol: Int, val bits: TinyBits) {
|
||||||
|
|
||||||
|
val size by bits::size
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
return "[${Char(symbol)}:$size:$bits]"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun generateCanonicCodes(tree: Node): List<Code?> {
|
||||||
|
val codes = MutableList<Code?>(256) { null }
|
||||||
|
|
||||||
|
fun traverse(node: Node, code: TinyBits) {
|
||||||
|
when (node) {
|
||||||
|
is Node.Leaf ->
|
||||||
|
codes[node.value] = (Code(node.value, code))
|
||||||
|
|
||||||
|
is Node.Internal -> {
|
||||||
|
traverse(node.left, code.insertBit(1))
|
||||||
|
traverse(node.right, code.insertBit(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
traverse(tree, TinyBits())
|
||||||
|
|
||||||
|
return makeCanonical(codes)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun makeCanonical(source: List<Code?>): List<Code?> {
|
||||||
|
val sorted = source.filterNotNull().sortedWith(canonicComparator)
|
||||||
|
|
||||||
|
val canonical = MutableList<Code?>(256) { null }
|
||||||
|
|
||||||
|
val first = sorted[0]
|
||||||
|
val prevValue = first.copy(bits = TinyBits(0UL, first.bits.size))
|
||||||
|
canonical[first.symbol] = prevValue
|
||||||
|
var prev = prevValue.bits
|
||||||
|
|
||||||
|
for (i in 1..<sorted.size) {
|
||||||
|
var bits = TinyBits(prev.value + 1U, prev.size)
|
||||||
|
val code = sorted[i]
|
||||||
|
while (code.bits.size > bits.size) {
|
||||||
|
bits = bits.insertBit(0)
|
||||||
|
}
|
||||||
|
canonical[code.symbol] = code.copy(bits = bits)//.also { println("$it") }
|
||||||
|
prev = bits
|
||||||
|
}
|
||||||
|
return canonical
|
||||||
|
}
|
||||||
|
|
||||||
|
private val canonicComparator = { a: Code, b: Code ->
|
||||||
|
if (a.bits.size == b.bits.size) {
|
||||||
|
a.symbol.compareTo(b.symbol)
|
||||||
|
} else {
|
||||||
|
a.bits.size.compareTo(b.bits.size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun buildTree(data: UByteArray): Node {
|
||||||
|
// println(data.toDump())
|
||||||
|
val frequencies = Array(256) { 0 }
|
||||||
|
data.forEach { frequencies[it.toInt()]++ }
|
||||||
|
|
||||||
|
val list = SortedList<Node>(*frequencies.mapIndexed { index, i -> Node.Leaf(index, i) }.filter { it.freq > 0 }
|
||||||
|
.toTypedArray())
|
||||||
|
|
||||||
|
// build the tree
|
||||||
|
while (list.size > 1) {
|
||||||
|
val left = list.removeAt(0)
|
||||||
|
val right = list.removeAt(0)
|
||||||
|
list.add(Node.Internal(left, right))
|
||||||
|
}
|
||||||
|
return list[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
fun decompressUsingCodes(bin: BitInput, codes: List<Code?>): UByteArray {
|
||||||
|
val result = mutableListOf<UByte>()
|
||||||
|
val table = codes.filterNotNull().associateBy { it.bits }
|
||||||
|
|
||||||
|
outer@ while (true) {
|
||||||
|
var input = TinyBits()
|
||||||
|
while (true) {
|
||||||
|
bin.getBitOrNull()?.let { input = input.insertBit(it) }
|
||||||
|
?: break@outer
|
||||||
|
val data = table[input]
|
||||||
|
if (data != null) {
|
||||||
|
// println("Code found: ${data.bits} -> [${data.symbol.toChar()}]")
|
||||||
|
result.add(data.symbol.toUByte())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.toUByteArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun serializeCanonicCodes(bout: BitOutput, codes: List<Code?>) {
|
||||||
|
var minSize: Int? = null
|
||||||
|
var maxSize: Int? = null
|
||||||
|
for (i in 1..<codes.size) {
|
||||||
|
val s = codes[i]?.size?.toInt() ?: continue
|
||||||
|
if (minSize == null || s < minSize) minSize = s
|
||||||
|
if (maxSize == null || s > maxSize) maxSize = s
|
||||||
|
}
|
||||||
|
val size = maxSize!! - minSize!! + 1
|
||||||
|
val sizeInBits = sizeInBits(size)
|
||||||
|
bout.packUnsigned(minSize.toULong())
|
||||||
|
bout.packUnsigned(sizeInBits.toULong())
|
||||||
|
for (c in codes) {
|
||||||
|
if (c != null)
|
||||||
|
bout.putBits(c.bits.size.toInt() - minSize + 1, sizeInBits)
|
||||||
|
else
|
||||||
|
bout.putBits(0, sizeInBits)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun deserializeCanonicCodes(bin: BitInput): List<Code?> {
|
||||||
|
val minSize = bin.unpackUnsigned().toInt()
|
||||||
|
val sizeInBits = bin.unpackUnsigned().toInt()
|
||||||
|
val sorted = mutableListOf<Code>().also { codes ->
|
||||||
|
for (i in 0..<256) {
|
||||||
|
val s = bin.getBits(sizeInBits).toInt()
|
||||||
|
if (s > 0) {
|
||||||
|
codes.add(Code(i, TinyBits(0U, s - 1 + minSize)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.sortedWith(canonicComparator)
|
||||||
|
|
||||||
|
val result = MutableList<Code?>(256) { null }
|
||||||
|
var prev = sorted[0].copy(bits = TinyBits(0U, sorted[0].bits.size))
|
||||||
|
result[prev.symbol] = prev
|
||||||
|
|
||||||
|
for (i in 1..<sorted.size) {
|
||||||
|
val code = sorted[i]
|
||||||
|
var bits = TinyBits(prev.bits.value + 1u, prev.bits.size)
|
||||||
|
while (bits.size < code.bits.size) bits = bits.insertBit(0)
|
||||||
|
result[code.symbol] = code.copy(bits = bits).also {
|
||||||
|
prev = it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
fun compress(data: UByteArray): BitArray {
|
||||||
|
|
||||||
|
val root = buildTree(data)
|
||||||
|
|
||||||
|
val codes = generateCanonicCodes(root)
|
||||||
|
|
||||||
|
// serializa table
|
||||||
|
|
||||||
|
// test encode:
|
||||||
|
val bout = MemoryBitOutput()
|
||||||
|
serializeCanonicCodes(bout, codes)
|
||||||
|
for (i in data) {
|
||||||
|
val code = codes[i.toInt()]!!
|
||||||
|
// println(">> $code")
|
||||||
|
bout.putBits(code.bits)
|
||||||
|
}
|
||||||
|
// println(bout.toBitArray().bytes.toDump())
|
||||||
|
val compressed = bout.toBitArray()
|
||||||
|
// println("Size: ${compressed.bytes.size / data.size.toDouble() }")
|
||||||
|
// println("compression ratio: ${compressed.bytes.size / data.size.toDouble() }")
|
||||||
|
|
||||||
|
// test decompress
|
||||||
|
// val bin = MemoryBitInput(compressed)
|
||||||
|
// val codes2 = deserializeCanonicCodes(bin)
|
||||||
|
// for ((a, b) in codes.zip(codes2)) {
|
||||||
|
// if (a != b) {
|
||||||
|
// println("Codes mismatch: $a != $b")
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// require(codes == codes2)
|
||||||
|
// val result = decompressUsingCodes(bin, codes2)
|
||||||
|
//
|
||||||
|
//// println(result.toUByteArray().toDump())
|
||||||
|
// check(data contentEquals result.toUByteArray())
|
||||||
|
// if( !(data contentEquals result.toUByteArray()) )
|
||||||
|
// throw RuntimeException("Data mismatch")
|
||||||
|
// println(data.toDump())
|
||||||
|
//
|
||||||
|
return compressed
|
||||||
|
}
|
||||||
|
|
||||||
|
fun decompress(bin: BitInput): UByteArray {
|
||||||
|
val codes = deserializeCanonicCodes(bin)
|
||||||
|
return decompressUsingCodes(bin, codes)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
128
lynglib/src/commonMain/kotlin/net/sergeych/lynon/lzw.kt
Normal file
128
lynglib/src/commonMain/kotlin/net/sergeych/lynon/lzw.kt
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
package net.sergeych.lynon
|
||||||
|
|
||||||
|
import net.sergeych.bintools.ByteChunk
|
||||||
|
import kotlin.math.roundToInt
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LZW lightweight pure kotlin compression.
|
||||||
|
*/
|
||||||
|
object LZW {
|
||||||
|
|
||||||
|
val MAX_CODE_SIZE = 17
|
||||||
|
val STOP_CODE = (1 shl MAX_CODE_SIZE) - 1
|
||||||
|
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
|
||||||
|
|
||||||
|
|
||||||
|
fun compress(input: ByteArray, bitOutput: BitOutput)
|
||||||
|
= compress(input.asUByteArray(), bitOutput)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compresses the input string using LZW algorithm
|
||||||
|
* @param input The string to compress
|
||||||
|
* @return List of compressed codes
|
||||||
|
*/
|
||||||
|
fun compress(input: UByteArray, bitOutput: BitOutput) {
|
||||||
|
// Initialize dictionary with all possible single characters
|
||||||
|
val dictionary = mutableMapOf<ByteChunk, Int>()
|
||||||
|
for (i in 0..255) {
|
||||||
|
// 23
|
||||||
|
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
var nextCode = 256
|
||||||
|
var current = ByteChunk(ubyteArrayOf())
|
||||||
|
// val result = mutableListOf<Int>()
|
||||||
|
|
||||||
|
for (char in input) {
|
||||||
|
val combined = current + char
|
||||||
|
if (dictionary.containsKey(combined)) {
|
||||||
|
current = combined
|
||||||
|
} else {
|
||||||
|
val size = sizeInBits(dictionary.size)
|
||||||
|
bitOutput.putBits(dictionary[current]!!, size)
|
||||||
|
if (dictionary.size >= MAX_DICT_SIZE) {
|
||||||
|
bitOutput.putBits(STOP_CODE, size)
|
||||||
|
dictionary.clear()
|
||||||
|
nextCode = 256
|
||||||
|
for (i in 0..255) {
|
||||||
|
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
dictionary[combined] = nextCode++
|
||||||
|
current = ByteChunk(ubyteArrayOf(char))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current.size > 0) {
|
||||||
|
val size = sizeInBits(dictionary.size)
|
||||||
|
bitOutput.putBits(dictionary[current]!!, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun compress(input: UByteArray): BitArray {
|
||||||
|
return MemoryBitOutput().apply {
|
||||||
|
compress(input, this)
|
||||||
|
}.toBitArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompresses a list of LZW codes back to the original string. Note that usage of apriori existing
|
||||||
|
* size is crucial: it let repeal explosion style attacks.
|
||||||
|
*
|
||||||
|
* @param compressed The list of compressed codes
|
||||||
|
* @param resultSize The expected size of the decompressed string
|
||||||
|
*
|
||||||
|
* @throws DecompressionException if something goes wrong
|
||||||
|
* @return The decompressed string
|
||||||
|
*/
|
||||||
|
fun decompress(compressed: BitInput, resultSize: Int): UByteArray {
|
||||||
|
// Initialize dictionary with all possible single characters
|
||||||
|
val dictionary = mutableMapOf<Int, UByteArray>()
|
||||||
|
for (i in 0..255) {
|
||||||
|
dictionary[i] = ubyteArrayOf(i.toUByte())
|
||||||
|
}
|
||||||
|
|
||||||
|
var nextCode = 256
|
||||||
|
val firstCode = compressed.getBits(9).toInt()
|
||||||
|
var previous = dictionary[firstCode]
|
||||||
|
?: throw DecompressionException("Invalid first compressed code: $firstCode")
|
||||||
|
val result = mutableListOf<UByte>()
|
||||||
|
result += previous
|
||||||
|
|
||||||
|
while (result.size < resultSize) {
|
||||||
|
val codeSize = sizeInBits(nextCode + 1)
|
||||||
|
val code = compressed.getBitsOrNull(codeSize)?.toInt() ?: break
|
||||||
|
|
||||||
|
if (code == STOP_CODE) {
|
||||||
|
nextCode = 256
|
||||||
|
dictionary.clear()
|
||||||
|
for (i in 0..255)
|
||||||
|
dictionary[i] = ubyteArrayOf(i.toUByte())
|
||||||
|
previous = dictionary[compressed.getBits(9).toInt()]!!
|
||||||
|
} else {
|
||||||
|
|
||||||
|
val current = if (code in dictionary) {
|
||||||
|
dictionary[code]!!
|
||||||
|
} else if (code == nextCode) {
|
||||||
|
// Special case for pattern like cScSc
|
||||||
|
previous + previous[0]
|
||||||
|
} else {
|
||||||
|
throw DecompressionException("Invalid compressed code: $code")
|
||||||
|
}
|
||||||
|
|
||||||
|
result += current
|
||||||
|
dictionary[nextCode++] = previous + current[0]
|
||||||
|
previous = current
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.size != resultSize)
|
||||||
|
throw DecompressionException("Decompressed size is not equal to expected: real/expected = ${result.size}/$resultSize")
|
||||||
|
return result.toTypedArray().toUByteArray()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private operator fun ByteChunk.plus(byte: UByte): ByteChunk {
|
||||||
|
return ByteChunk(data + byte)
|
||||||
|
}
|
@ -1,114 +0,0 @@
|
|||||||
package net.sergeych.lynon
|
|
||||||
|
|
||||||
import net.sergeych.bintools.ByteChunk
|
|
||||||
import kotlin.math.roundToInt
|
|
||||||
|
|
||||||
/**
|
|
||||||
* LZW compression algorithm: work in progress.
|
|
||||||
*
|
|
||||||
* Uses Lyng but input/output. Uses automatic code size.
|
|
||||||
*/
|
|
||||||
class LZW {
|
|
||||||
companion object {
|
|
||||||
|
|
||||||
val MAX_CODE_SIZE = 17
|
|
||||||
val STOP_CODE = (1 shl MAX_CODE_SIZE) - 1
|
|
||||||
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compresses the input string using LZW algorithm
|
|
||||||
* @param input The string to compress
|
|
||||||
* @return List of compressed codes
|
|
||||||
*/
|
|
||||||
fun compress(input: UByteArray,bitOutput: BitOutput) {
|
|
||||||
// Initialize dictionary with all possible single characters
|
|
||||||
val dictionary = mutableMapOf<ByteChunk, Int>()
|
|
||||||
for (i in 0..255) {
|
|
||||||
// 23
|
|
||||||
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
|
|
||||||
}
|
|
||||||
|
|
||||||
var nextCode = 256
|
|
||||||
var current = ByteChunk(ubyteArrayOf())
|
|
||||||
// val result = mutableListOf<Int>()
|
|
||||||
|
|
||||||
for (char in input) {
|
|
||||||
val combined = current + char
|
|
||||||
if (dictionary.containsKey(combined)) {
|
|
||||||
current = combined
|
|
||||||
} else {
|
|
||||||
val size = sizeInBits(dictionary.size)
|
|
||||||
bitOutput.putBits(dictionary[current]!!,size)
|
|
||||||
if( dictionary.size >= MAX_DICT_SIZE ) {
|
|
||||||
bitOutput.putBits(STOP_CODE,size)
|
|
||||||
dictionary.clear()
|
|
||||||
nextCode = 256
|
|
||||||
for (i in 0..255) {
|
|
||||||
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
dictionary[combined] = nextCode++
|
|
||||||
current = ByteChunk(ubyteArrayOf(char))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current.size > 0) {
|
|
||||||
val size = sizeInBits(dictionary.size)
|
|
||||||
bitOutput.putBits(dictionary[current]!!,size)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decompresses a list of LZW codes back to the original string
|
|
||||||
* @param compressed The list of compressed codes
|
|
||||||
* @return The decompressed string
|
|
||||||
*/
|
|
||||||
fun decompress(compressed: BitInput): UByteArray {
|
|
||||||
// Initialize dictionary with all possible single characters
|
|
||||||
val dictionary = mutableMapOf<Int, UByteArray>()
|
|
||||||
for (i in 0..255) {
|
|
||||||
dictionary[i] = ubyteArrayOf(i.toUByte())
|
|
||||||
}
|
|
||||||
|
|
||||||
var nextCode = 256
|
|
||||||
var previous = dictionary[compressed.getBits(9).toInt()]!!
|
|
||||||
val result = mutableListOf<UByte>()
|
|
||||||
|
|
||||||
while( !compressed.isEndOfStream ) {
|
|
||||||
val codeSize = sizeInBits(nextCode + 1)
|
|
||||||
val code = compressed.getBitsOrNull(codeSize)?.toInt() ?: break
|
|
||||||
|
|
||||||
if( code == STOP_CODE ) {
|
|
||||||
nextCode = 256
|
|
||||||
dictionary.clear()
|
|
||||||
for (i in 0..255)
|
|
||||||
dictionary[i] = ubyteArrayOf(i.toUByte())
|
|
||||||
previous = dictionary[compressed.getBits(9).toInt()]!!
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
|
|
||||||
val current = if (code in dictionary) {
|
|
||||||
dictionary[code]!!
|
|
||||||
} else if (code == nextCode) {
|
|
||||||
// Special case for pattern like cScSc
|
|
||||||
previous + previous[0]
|
|
||||||
} else {
|
|
||||||
throw IllegalArgumentException("Invalid compressed code: $code")
|
|
||||||
}
|
|
||||||
|
|
||||||
result += current
|
|
||||||
dictionary[nextCode++] = previous + current[0]
|
|
||||||
previous = current
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result.toTypedArray().toUByteArray()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private operator fun ByteChunk.plus(byte: UByte): ByteChunk {
|
|
||||||
return ByteChunk(data + byte)
|
|
||||||
}
|
|
@ -11,5 +11,5 @@ class LynonPacker(bout: MemoryBitOutput = MemoryBitOutput(), settings: LynonSett
|
|||||||
* Variant of [LynonDecoder] that reads from a given `source` using [MemoryBitInput]
|
* Variant of [LynonDecoder] that reads from a given `source` using [MemoryBitInput]
|
||||||
*/
|
*/
|
||||||
class LynonUnpacker(source: BitInput) : LynonDecoder(source) {
|
class LynonUnpacker(source: BitInput) : LynonDecoder(source) {
|
||||||
constructor(packer: LynonPacker) : this(MemoryBitInput(packer.bout as MemoryBitOutput))
|
constructor(packer: LynonPacker) : this((packer.bout as MemoryBitOutput).toBitInput())
|
||||||
}
|
}
|
@ -6,7 +6,7 @@ import net.sergeych.lynon.*
|
|||||||
import java.nio.file.Files
|
import java.nio.file.Files
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
|
import kotlin.test.assertContentEquals
|
||||||
class LynonTests {
|
class LynonTests {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -210,11 +210,13 @@ class LynonTests {
|
|||||||
assertEquals(null, bin.getBitsOrNull(3))
|
assertEquals(null, bin.getBitsOrNull(3))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val original = Files.readString(Path.of("../sample_texts/dikkens_hard_times.txt"))
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testLzw() {
|
fun testLzw() {
|
||||||
// Example usage
|
// Example usage
|
||||||
// val original = "TOBEORNOTTOBEORTOBEORNOT"
|
// val original = "TOBEORNOTTOBEORTOBEORNOT"
|
||||||
val original = Files.readString(Path.of("../sample_texts/dikkens_hard_times.txt"))
|
|
||||||
// println("Original: $original")
|
// println("Original: $original")
|
||||||
println("Length: ${original.length}")
|
println("Length: ${original.length}")
|
||||||
|
|
||||||
@ -222,14 +224,112 @@ class LynonTests {
|
|||||||
val out = MemoryBitOutput()
|
val out = MemoryBitOutput()
|
||||||
LZW.compress(original.encodeToByteArray().toUByteArray(), out)
|
LZW.compress(original.encodeToByteArray().toUByteArray(), out)
|
||||||
// println("\nCompressed codes: ${out.toUByteArray().toDump()}")
|
// println("\nCompressed codes: ${out.toUByteArray().toDump()}")
|
||||||
println("Number of codes: ${out.toUByteArray().size}")
|
println("Number of codes: ${out.toBitArray().bytesSize}")
|
||||||
|
println("Copression rate: ${out.toBitArray().bytesSize.toDouble() / original.length.toDouble()}")
|
||||||
// // Decompress
|
// // Decompress
|
||||||
val decompressed = LZW.decompress(MemoryBitInput(out)).toByteArray().decodeToString()
|
val decompressed = LZW.decompress(MemoryBitInput(out), original.length).toByteArray().decodeToString()
|
||||||
// println("\nDecompressed: $decompressed")
|
// println("\nDecompressed: $decompressed")
|
||||||
println("Length: ${decompressed.length}")
|
println("Length: ${decompressed.length}")
|
||||||
|
|
||||||
// Verification
|
// Verification
|
||||||
println("\nOriginal and decompressed match: ${original == decompressed}")
|
println("\nOriginal and decompressed match: ${original == decompressed}")
|
||||||
|
assertEquals(original, decompressed)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@Test
|
||||||
|
fun testTinyBits() {
|
||||||
|
var a0 = TinyBits()
|
||||||
|
|
||||||
|
assertEquals(a0, a0)
|
||||||
|
a0 = a0.insertBit(0)
|
||||||
|
a0 = a0.insertBit(1)
|
||||||
|
a0 = a0.insertBit(1)
|
||||||
|
a0 = a0.insertBit(1)
|
||||||
|
a0 = a0.insertBit(0)
|
||||||
|
a0 = a0.insertBit(1)
|
||||||
|
// println(a0)
|
||||||
|
assertEquals("011101", a0.toString())
|
||||||
|
val bin = MemoryBitInput(MemoryBitOutput().apply { putBits(a0) })
|
||||||
|
var result = TinyBits()
|
||||||
|
for( i in a0.indices) result = result.insertBit(bin.getBit())
|
||||||
|
assertEquals(a0, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testHuffman() {
|
||||||
|
val x = original.encodeToByteArray().toUByteArray()
|
||||||
|
// val x ="hello, world!".toByteArray().asUByteArray()// original.encodeToByteArray().toUByteArray()
|
||||||
|
println("Original : ${x.size}")
|
||||||
|
val lzw = LZW.compress(x).bytes
|
||||||
|
println("LZW : ${lzw.size}")
|
||||||
|
val ba = Huffman.compress(x)
|
||||||
|
val huff = ba.bytes
|
||||||
|
println("Huffman : ${huff.size}")
|
||||||
|
val lzwhuff = Huffman.compress(lzw).bytes
|
||||||
|
println("LZW+HUFF : ${lzwhuff.size}")
|
||||||
|
val compressed = Huffman.compress(x)
|
||||||
|
val decompressed = Huffman.decompress(compressed.toBitInput())
|
||||||
|
assertContentEquals(x, decompressed)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testBitListSmall() {
|
||||||
|
var t = TinyBits()
|
||||||
|
for( i in listOf(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1) )
|
||||||
|
t = t.insertBit(i)
|
||||||
|
assertEquals(1, t[0])
|
||||||
|
assertEquals(1, t[1])
|
||||||
|
assertEquals(0, t[2])
|
||||||
|
assertEquals("1101000111101",t.toString())
|
||||||
|
t[0] = 0
|
||||||
|
t[1] = 0
|
||||||
|
t[2] = 1
|
||||||
|
assertEquals("0011000111101",t.toString())
|
||||||
|
t[12] = 0
|
||||||
|
t[11] = 1
|
||||||
|
assertEquals("0011000111110",t.toString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testBitListSerialization() {
|
||||||
|
// this also tests bitArray with first and last bytes
|
||||||
|
val bout = MemoryBitOutput()
|
||||||
|
assertEquals("1101", bitListOf(1, 1, 0, 1).toString())
|
||||||
|
bout.putBits(bitListOf(1, 1, 0, 1))
|
||||||
|
bout.putBits(bitListOf( 0, 0))
|
||||||
|
bout.putBits(bitListOf( 0, 1, 1, 1, 1, 0, 1))
|
||||||
|
val x = bout.toBitArray()
|
||||||
|
assertEquals("1101000111101",x.toString())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testCompressionWithOffsets() {
|
||||||
|
val src = "to be or not to be or not to be or not to be or not to be"
|
||||||
|
val bout = MemoryBitOutput()
|
||||||
|
bout.packUnsigned(1571UL)
|
||||||
|
LZW.compress(src.encodeToByteArray(), bout)
|
||||||
|
bout.packUnsigned(157108UL)
|
||||||
|
val bin = bout.toBitInput()
|
||||||
|
assertEquals(1571UL, bin.unpackUnsigned())
|
||||||
|
assertEquals(src, LZW.decompress(bin, src.length).asByteArray().decodeToString())
|
||||||
|
assertEquals(157108UL, bin.unpackUnsigned())
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testCompressionRecord() {
|
||||||
|
val bout = MemoryBitOutput()
|
||||||
|
val src = "to be or not to be or not to be or not to be or not to be"
|
||||||
|
val src2 = "to be or not to be"
|
||||||
|
val src3 = "ababababab"
|
||||||
|
bout.compress(src)
|
||||||
|
bout.compress(src2)
|
||||||
|
bout.compress(src3)
|
||||||
|
val bin = bout.toBitInput()
|
||||||
|
assertEquals(src, bin.decompressString())
|
||||||
|
assertEquals(src2, bin.decompressString())
|
||||||
|
assertEquals(src3, bin.decompressString())
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user