refs #35 bit granularity for bitstreams; LZW done

This commit is contained in:
Sergey Chernov 2025-07-13 00:36:58 +03:00
parent 23dafff453
commit 34bc7297bd
6 changed files with 117 additions and 56 deletions

View File

@ -2,10 +2,12 @@ package net.sergeych.lynon
abstract class BitInput { abstract class BitInput {
data class DataByte(val data: Int,val bits: Int)
/** /**
* Return next byte, int in 0..255 range, or -1 if end of stream reached * Return next byte, int in 0..255 range, or -1 if end of stream reached
*/ */
abstract fun getByte(): Int abstract fun getByte(): DataByte
private var accumulator = 0 private var accumulator = 0
@ -17,12 +19,13 @@ abstract class BitInput {
fun getBitOrNull(): Int? { fun getBitOrNull(): Int? {
if (isEndOfStream) return null if (isEndOfStream) return null
if (mask == 0) { if (mask == 0) {
accumulator = getByte() val ab = getByte()
accumulator = ab.data
if (accumulator == -1) { if (accumulator == -1) {
isEndOfStream = true isEndOfStream = true
return null return null
} }
mask = 0x80 mask = 1 shl (ab.bits - 1)
} }
val result = if (0 == accumulator and mask) 0 else 1 val result = if (0 == accumulator and mask) 0 else 1
mask = mask shr 1 mask = mask shr 1
@ -31,14 +34,14 @@ abstract class BitInput {
fun getBitsOrNull(count: Int): ULong? { fun getBitsOrNull(count: Int): ULong? {
var result = 0UL var result = 0UL
var mask = 1UL var resultMask = 1UL
for( i in 0 ..< count) { for( i in 0 ..< count) {
when(getBitOrNull()) { when(getBitOrNull()) {
null -> return null null -> return null
1 -> result = result or mask 1 -> result = result or resultMask
0 -> {} 0 -> {}
} }
mask = mask shl 1 resultMask = resultMask shl 1
} }
return result return result
} }
@ -68,6 +71,7 @@ abstract class BitInput {
return if( isNegative == 1) -value else value return if( isNegative == 1) -value else value
} }
@Suppress("unused")
fun getBool(): Boolean { fun getBool(): Boolean {
return getBit() == 1 return getBit() == 1
} }

View File

@ -5,29 +5,45 @@ abstract class BitOutput {
abstract fun outputByte(byte: UByte) abstract fun outputByte(byte: UByte)
private var accumulator = 0 private var accumulator = 0
/**
* Number of bits in accumulator. After output is closed by [close] this value is
* not changed and represents the number of bits in the last byte; this should
* be used to properly calculate end of the bit stream
*/
private var accumulatorBits = 0 private var accumulatorBits = 0
private set
/**
* When [close] is called, represents the number of used bits in the last byte;
* bits after this number are the garbage and should be ignored
*/
val lastByteBits: Int get() {
if( !isClosed ) throw IllegalStateException("BitOutput is not closed")
return accumulatorBits
}
fun putBits(bits: ULong, count: Int) { fun putBits(bits: ULong, count: Int) {
require( count <= 64 ) require(count <= 64)
var x = bits var x = bits
for( i in 0 ..< count ) { for (i in 0..<count) {
putBit( (x and 1u).toInt() ) putBit((x and 1u).toInt())
x = x shr 1 x = x shr 1
} }
} }
fun putBits(bits: Int, count: Int) { fun putBits(bits: Int, count: Int) {
require( count <= 32 ) require(count <= 32)
var x = bits var x = bits
for( i in 0 ..< count ) { for (i in 0..<count) {
putBit( (x and 1) ) putBit((x and 1))
x = x shr 1 x = x shr 1
} }
} }
fun putBit(bit: Int) { fun putBit(bit: Int) {
accumulator = (accumulator shl 1) or bit accumulator = (accumulator shl 1) or bit
if( ++accumulatorBits >= 8 ) { if (++accumulatorBits >= 8) {
outputByte(accumulator.toUByte()) outputByte(accumulator.toUByte())
accumulator = accumulator shr 0 accumulator = accumulator shr 0
accumulatorBits = 0 accumulatorBits = 0
@ -38,19 +54,18 @@ abstract class BitOutput {
val tetrades = sizeInTetrades(value) val tetrades = sizeInTetrades(value)
putBits(tetrades - 1, 4) putBits(tetrades - 1, 4)
var rest = value var rest = value
for( i in 0..<tetrades ) { for (i in 0..<tetrades) {
putBits( rest and 0xFu, 4 ) putBits(rest and 0xFu, 4)
rest = rest shr 4 rest = rest shr 4
} }
} }
@Suppress("unused") @Suppress("unused")
fun packSigned(value: Long) { fun packSigned(value: Long) {
if( value < 0 ) { if (value < 0) {
putBit(1) putBit(1)
packUnsigned((-value).toULong()) packUnsigned((-value).toULong())
} } else {
else {
putBit(0) putBit(0)
packUnsigned(value.toULong()) packUnsigned(value.toULong())
} }
@ -59,17 +74,18 @@ abstract class BitOutput {
var isClosed = false var isClosed = false
private set private set
fun close() { fun close(): BitOutput {
if( !isClosed ) { if (!isClosed) {
if (accumulatorBits > 0) { if (accumulatorBits > 0) {
while (accumulatorBits != 0) putBit(0) outputByte(accumulator.toUByte())
} } else accumulatorBits = 8
isClosed = true isClosed = true
} }
return this
} }
fun putBytes(data: ByteArray) { fun putBytes(data: ByteArray) {
for( b in data ) { for (b in data) {
putBits(b.toULong(), 8) putBits(b.toULong(), 8)
} }
} }

View File

@ -1,15 +1,19 @@
package net.sergeych.lynon package net.sergeych.lynon
class MemoryBitInput(val packedBits: UByteArray): BitInput() { class MemoryBitInput(val packedBits: UByteArray,val lastByteBits: Int): BitInput() {
constructor(bout: MemoryBitOutput): this(bout.toUByteArray())
constructor(bout: MemoryBitOutput): this(bout.toUByteArray(), bout.lastByteBits)
private var index = 0 private var index = 0
override fun getByte(): Int { override fun getByte(): DataByte {
if( index < packedBits.size ) { return if( index < packedBits.size ) {
return packedBits[index++].toInt() DataByte(
packedBits[index++].toInt(),
if( index == packedBits.size ) lastByteBits else 8
)
} else { } else {
return -1 DataByte(-1,0)
} }
} }

View File

@ -7,13 +7,11 @@ import kotlin.math.roundToInt
* LZW compression algorithm: work in progress. * LZW compression algorithm: work in progress.
* *
* Uses Lyng but input/output. Uses automatic code size. * Uses Lyng but input/output. Uses automatic code size.
*
* TODO: - reset dictionary
*/ */
class LZW { class LZW {
companion object { companion object {
val MAX_CODE_SIZE = 12 val MAX_CODE_SIZE = 17
val STOP_CODE = (1 shl MAX_CODE_SIZE) - 1 val STOP_CODE = (1 shl MAX_CODE_SIZE) - 1
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt() val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
@ -42,7 +40,16 @@ class LZW {
} else { } else {
val size = sizeInBits(dictionary.size) val size = sizeInBits(dictionary.size)
bitOutput.putBits(dictionary[current]!!,size) bitOutput.putBits(dictionary[current]!!,size)
dictionary[combined] = nextCode++ if( dictionary.size >= MAX_DICT_SIZE ) {
bitOutput.putBits(STOP_CODE,size)
dictionary.clear()
nextCode = 256
for (i in 0..255) {
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
}
}
else
dictionary[combined] = nextCode++
current = ByteChunk(ubyteArrayOf(char)) current = ByteChunk(ubyteArrayOf(char))
} }
} }
@ -72,18 +79,29 @@ class LZW {
while( !compressed.isEndOfStream ) { while( !compressed.isEndOfStream ) {
val codeSize = sizeInBits(nextCode + 1) val codeSize = sizeInBits(nextCode + 1)
val code = compressed.getBitsOrNull(codeSize)?.toInt() ?: break val code = compressed.getBitsOrNull(codeSize)?.toInt() ?: break
val current = if ( code in dictionary) {
dictionary[code]!!
} else if (code == nextCode) {
// Special case for pattern like cScSc
previous + previous[0]
} else {
throw IllegalArgumentException("Invalid compressed code: $code")
}
result += current if( code == STOP_CODE ) {
dictionary[nextCode++] = previous + current[0] nextCode = 256
previous = current dictionary.clear()
for (i in 0..255)
dictionary[i] = ubyteArrayOf(i.toUByte())
previous = dictionary[compressed.getBits(9).toInt()]!!
}
else {
val current = if (code in dictionary) {
dictionary[code]!!
} else if (code == nextCode) {
// Special case for pattern like cScSc
previous + previous[0]
} else {
throw IllegalArgumentException("Invalid compressed code: $code")
}
result += current
dictionary[nextCode++] = previous + current[0]
previous = current
}
} }
return result.toTypedArray().toUByteArray() return result.toTypedArray().toUByteArray()

View File

@ -5,10 +5,11 @@ package net.sergeych.lynon
*/ */
class LynonPacker(bout: MemoryBitOutput = MemoryBitOutput(), settings: LynonSettings = LynonSettings.default) class LynonPacker(bout: MemoryBitOutput = MemoryBitOutput(), settings: LynonSettings = LynonSettings.default)
: LynonEncoder(bout, settings) { : LynonEncoder(bout, settings) {
fun toUByteArray(): UByteArray = (bout as MemoryBitOutput).toUByteArray()
} }
/** /**
* Variant of [LynonDecoder] that reads from a given `source` using [MemoryBitInput] * Variant of [LynonDecoder] that reads from a given `source` using [MemoryBitInput]
*/ */
class LynonUnpacker(source: UByteArray) : LynonDecoder(MemoryBitInput(source)) class LynonUnpacker(source: BitInput) : LynonDecoder(source) {
constructor(packer: LynonPacker) : this(MemoryBitInput(packer.bout as MemoryBitOutput))
}

View File

@ -40,7 +40,7 @@ class LynonTests {
bout.putBits(3, 4) bout.putBits(3, 4)
bout.close() bout.close()
val bin = MemoryBitInput(bout.toUByteArray()) val bin = MemoryBitInput(bout)
assertEquals(2UL, bin.getBits(3)) assertEquals(2UL, bin.getBits(3))
assertEquals(1UL, bin.getBits(7)) assertEquals(1UL, bin.getBits(7))
assertEquals(197UL, bin.getBits(8)) assertEquals(197UL, bin.getBits(8))
@ -52,7 +52,7 @@ class LynonTests {
val bout = MemoryBitOutput() val bout = MemoryBitOutput()
bout.packUnsigned(1471792UL) bout.packUnsigned(1471792UL)
bout.close() bout.close()
val bin = MemoryBitInput(bout.toUByteArray()) val bin = MemoryBitInput(bout)
assertEquals(1471792UL, bin.unpackUnsigned()) assertEquals(1471792UL, bin.unpackUnsigned())
} }
@ -61,7 +61,7 @@ class LynonTests {
val bout = MemoryBitOutput() val bout = MemoryBitOutput()
bout.packUnsigned(ULong.MAX_VALUE) bout.packUnsigned(ULong.MAX_VALUE)
bout.close() bout.close()
val bin = MemoryBitInput(bout.toUByteArray()) val bin = MemoryBitInput(bout)
assertEquals(ULong.MAX_VALUE, bin.unpackUnsigned()) assertEquals(ULong.MAX_VALUE, bin.unpackUnsigned())
} }
@ -70,7 +70,7 @@ class LynonTests {
val bout = MemoryBitOutput() val bout = MemoryBitOutput()
bout.packUnsigned(7UL) bout.packUnsigned(7UL)
bout.close() bout.close()
val bin = MemoryBitInput(bout.toUByteArray()) val bin = MemoryBitInput(bout)
assertEquals(7UL, bin.unpackUnsigned()) assertEquals(7UL, bin.unpackUnsigned())
} }
@ -81,7 +81,7 @@ class LynonTests {
bout.packSigned(1471792L) bout.packSigned(1471792L)
// bout.packSigned(147179L) // bout.packSigned(147179L)
bout.close() bout.close()
val bin = MemoryBitInput(bout.toUByteArray()) val bin = MemoryBitInput(bout)
assertEquals(-1471792L, bin.unpackSigned()) assertEquals(-1471792L, bin.unpackSigned())
assertEquals(1471792L, bin.unpackSigned()) assertEquals(1471792L, bin.unpackSigned())
} }
@ -126,7 +126,7 @@ class LynonTests {
for (s in source) { for (s in source) {
encoder.encodeObj(scope, s) encoder.encodeObj(scope, s)
} }
val decoder = LynonUnpacker(encoder.toUByteArray()) val decoder = LynonUnpacker(encoder)
val restored = mutableListOf<Obj>() val restored = mutableListOf<Obj>()
for (i in source.indices) { for (i in source.indices) {
restored.add(decoder.unpackObject(scope, ObjString.type)) restored.add(decoder.unpackObject(scope, ObjString.type))
@ -142,7 +142,7 @@ class LynonTests {
encodeObj(scope, ObjBool(false)) encodeObj(scope, ObjBool(false))
encodeObj(scope, ObjBool(true)) encodeObj(scope, ObjBool(true))
encodeObj(scope, ObjBool(true)) encodeObj(scope, ObjBool(true))
}.toUByteArray()) })
assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type)) assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type))
assertEquals(ObjFalse, decoder.unpackObject(scope, ObjBool.type)) assertEquals(ObjFalse, decoder.unpackObject(scope, ObjBool.type))
assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type)) assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type))
@ -162,7 +162,7 @@ class LynonTests {
encodeObj(scope, ObjReal(Double.POSITIVE_INFINITY)) encodeObj(scope, ObjReal(Double.POSITIVE_INFINITY))
encodeObj(scope, ObjReal(Double.MIN_VALUE)) encodeObj(scope, ObjReal(Double.MIN_VALUE))
encodeObj(scope, ObjReal(Double.MAX_VALUE)) encodeObj(scope, ObjReal(Double.MAX_VALUE))
}.toUByteArray()) })
assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type)) assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type))
assertEquals(ObjReal(Math.PI), decoder.unpackObject(scope, ObjReal.type)) assertEquals(ObjReal(Math.PI), decoder.unpackObject(scope, ObjReal.type))
assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type)) assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type))
@ -183,7 +183,7 @@ class LynonTests {
encodeObj(scope, ObjInt(Long.MIN_VALUE)) encodeObj(scope, ObjInt(Long.MIN_VALUE))
encodeObj(scope, ObjInt(Long.MAX_VALUE)) encodeObj(scope, ObjInt(Long.MAX_VALUE))
encodeObj(scope, ObjInt(Long.MAX_VALUE)) encodeObj(scope, ObjInt(Long.MAX_VALUE))
}.toUByteArray()) })
assertEquals(ObjInt(0), decoder.unpackObject(scope, ObjInt.type)) assertEquals(ObjInt(0), decoder.unpackObject(scope, ObjInt.type))
assertEquals(ObjInt(-1), decoder.unpackObject(scope, ObjInt.type)) assertEquals(ObjInt(-1), decoder.unpackObject(scope, ObjInt.type))
assertEquals(ObjInt(23), decoder.unpackObject(scope, ObjInt.type)) assertEquals(ObjInt(23), decoder.unpackObject(scope, ObjInt.type))
@ -192,6 +192,24 @@ class LynonTests {
assertEquals(ObjInt(Long.MAX_VALUE), decoder.unpackObject(scope, ObjInt.type)) assertEquals(ObjInt(Long.MAX_VALUE), decoder.unpackObject(scope, ObjInt.type))
} }
@Test
fun testLastvalue() {
var bin = MemoryBitInput(MemoryBitOutput().apply {
putBits(5, 3)
})
assertEquals(5UL, bin.getBits(3))
assertEquals(null, bin.getBitsOrNull(3))
bin = MemoryBitInput(MemoryBitOutput().apply {
putBits(5, 3)
putBits(1024, 11)
putBits(2, 2)
})
assertEquals(5UL, bin.getBits(3))
assertEquals(1024UL, bin.getBits(11))
assertEquals(2UL, bin.getBits(2))
assertEquals(null, bin.getBitsOrNull(3))
}
@Test @Test
fun testLzw() { fun testLzw() {
// Example usage // Example usage
@ -207,7 +225,7 @@ class LynonTests {
println("Number of codes: ${out.toUByteArray().size}") println("Number of codes: ${out.toUByteArray().size}")
// // Decompress // // Decompress
val decompressed = LZW.decompress(MemoryBitInput(out.toUByteArray())).toByteArray().decodeToString() val decompressed = LZW.decompress(MemoryBitInput(out)).toByteArray().decodeToString()
// println("\nDecompressed: $decompressed") // println("\nDecompressed: $decompressed")
println("Length: ${decompressed.length}") println("Length: ${decompressed.length}")