refs #35 bit granularity for bitstreams; LZW done

This commit is contained in:
Sergey Chernov 2025-07-13 00:36:58 +03:00
parent 23dafff453
commit 34bc7297bd
6 changed files with 117 additions and 56 deletions

View File

@ -2,10 +2,12 @@ package net.sergeych.lynon
abstract class BitInput {
data class DataByte(val data: Int,val bits: Int)
/**
* Return next byte, int in 0..255 range, or -1 if end of stream reached
*/
abstract fun getByte(): Int
abstract fun getByte(): DataByte
private var accumulator = 0
@ -17,12 +19,13 @@ abstract class BitInput {
fun getBitOrNull(): Int? {
if (isEndOfStream) return null
if (mask == 0) {
accumulator = getByte()
val ab = getByte()
accumulator = ab.data
if (accumulator == -1) {
isEndOfStream = true
return null
}
mask = 0x80
mask = 1 shl (ab.bits - 1)
}
val result = if (0 == accumulator and mask) 0 else 1
mask = mask shr 1
@ -31,14 +34,14 @@ abstract class BitInput {
fun getBitsOrNull(count: Int): ULong? {
var result = 0UL
var mask = 1UL
var resultMask = 1UL
for( i in 0 ..< count) {
when(getBitOrNull()) {
null -> return null
1 -> result = result or mask
1 -> result = result or resultMask
0 -> {}
}
mask = mask shl 1
resultMask = resultMask shl 1
}
return result
}
@ -68,6 +71,7 @@ abstract class BitInput {
return if( isNegative == 1) -value else value
}
@Suppress("unused")
fun getBool(): Boolean {
return getBit() == 1
}

View File

@ -5,7 +5,23 @@ abstract class BitOutput {
abstract fun outputByte(byte: UByte)
private var accumulator = 0
/**
* Number of bits in accumulator. After output is closed by [close] this value is
* not changed and represents the number of bits in the last byte; this should
* be used to properly calculate end of the bit stream
*/
private var accumulatorBits = 0
private set
/**
* When [close] is called, represents the number of used bits in the last byte;
* bits after this number are the garbage and should be ignored
*/
val lastByteBits: Int get() {
if( !isClosed ) throw IllegalStateException("BitOutput is not closed")
return accumulatorBits
}
fun putBits(bits: ULong, count: Int) {
require(count <= 64)
@ -49,8 +65,7 @@ abstract class BitOutput {
if (value < 0) {
putBit(1)
packUnsigned((-value).toULong())
}
else {
} else {
putBit(0)
packUnsigned(value.toULong())
}
@ -59,13 +74,14 @@ abstract class BitOutput {
var isClosed = false
private set
fun close() {
fun close(): BitOutput {
if (!isClosed) {
if (accumulatorBits > 0) {
while (accumulatorBits != 0) putBit(0)
}
outputByte(accumulator.toUByte())
} else accumulatorBits = 8
isClosed = true
}
return this
}
fun putBytes(data: ByteArray) {

View File

@ -1,15 +1,19 @@
package net.sergeych.lynon
class MemoryBitInput(val packedBits: UByteArray): BitInput() {
constructor(bout: MemoryBitOutput): this(bout.toUByteArray())
class MemoryBitInput(val packedBits: UByteArray,val lastByteBits: Int): BitInput() {
constructor(bout: MemoryBitOutput): this(bout.toUByteArray(), bout.lastByteBits)
private var index = 0
override fun getByte(): Int {
if( index < packedBits.size ) {
return packedBits[index++].toInt()
override fun getByte(): DataByte {
return if( index < packedBits.size ) {
DataByte(
packedBits[index++].toInt(),
if( index == packedBits.size ) lastByteBits else 8
)
} else {
return -1
DataByte(-1,0)
}
}

View File

@ -7,13 +7,11 @@ import kotlin.math.roundToInt
* LZW compression algorithm: work in progress.
*
* Uses Lyng but input/output. Uses automatic code size.
*
* TODO: - reset dictionary
*/
class LZW {
companion object {
val MAX_CODE_SIZE = 12
val MAX_CODE_SIZE = 17
val STOP_CODE = (1 shl MAX_CODE_SIZE) - 1
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
@ -42,6 +40,15 @@ class LZW {
} else {
val size = sizeInBits(dictionary.size)
bitOutput.putBits(dictionary[current]!!,size)
if( dictionary.size >= MAX_DICT_SIZE ) {
bitOutput.putBits(STOP_CODE,size)
dictionary.clear()
nextCode = 256
for (i in 0..255) {
dictionary[ByteChunk(ubyteArrayOf(i.toUByte()))] = i
}
}
else
dictionary[combined] = nextCode++
current = ByteChunk(ubyteArrayOf(char))
}
@ -72,6 +79,16 @@ class LZW {
while( !compressed.isEndOfStream ) {
val codeSize = sizeInBits(nextCode + 1)
val code = compressed.getBitsOrNull(codeSize)?.toInt() ?: break
if( code == STOP_CODE ) {
nextCode = 256
dictionary.clear()
for (i in 0..255)
dictionary[i] = ubyteArrayOf(i.toUByte())
previous = dictionary[compressed.getBits(9).toInt()]!!
}
else {
val current = if (code in dictionary) {
dictionary[code]!!
} else if (code == nextCode) {
@ -85,6 +102,7 @@ class LZW {
dictionary[nextCode++] = previous + current[0]
previous = current
}
}
return result.toTypedArray().toUByteArray()
}

View File

@ -5,10 +5,11 @@ package net.sergeych.lynon
*/
class LynonPacker(bout: MemoryBitOutput = MemoryBitOutput(), settings: LynonSettings = LynonSettings.default)
: LynonEncoder(bout, settings) {
fun toUByteArray(): UByteArray = (bout as MemoryBitOutput).toUByteArray()
}
/**
* Variant of [LynonDecoder] that reads from a given `source` using [MemoryBitInput]
*/
class LynonUnpacker(source: UByteArray) : LynonDecoder(MemoryBitInput(source))
class LynonUnpacker(source: BitInput) : LynonDecoder(source) {
constructor(packer: LynonPacker) : this(MemoryBitInput(packer.bout as MemoryBitOutput))
}

View File

@ -40,7 +40,7 @@ class LynonTests {
bout.putBits(3, 4)
bout.close()
val bin = MemoryBitInput(bout.toUByteArray())
val bin = MemoryBitInput(bout)
assertEquals(2UL, bin.getBits(3))
assertEquals(1UL, bin.getBits(7))
assertEquals(197UL, bin.getBits(8))
@ -52,7 +52,7 @@ class LynonTests {
val bout = MemoryBitOutput()
bout.packUnsigned(1471792UL)
bout.close()
val bin = MemoryBitInput(bout.toUByteArray())
val bin = MemoryBitInput(bout)
assertEquals(1471792UL, bin.unpackUnsigned())
}
@ -61,7 +61,7 @@ class LynonTests {
val bout = MemoryBitOutput()
bout.packUnsigned(ULong.MAX_VALUE)
bout.close()
val bin = MemoryBitInput(bout.toUByteArray())
val bin = MemoryBitInput(bout)
assertEquals(ULong.MAX_VALUE, bin.unpackUnsigned())
}
@ -70,7 +70,7 @@ class LynonTests {
val bout = MemoryBitOutput()
bout.packUnsigned(7UL)
bout.close()
val bin = MemoryBitInput(bout.toUByteArray())
val bin = MemoryBitInput(bout)
assertEquals(7UL, bin.unpackUnsigned())
}
@ -81,7 +81,7 @@ class LynonTests {
bout.packSigned(1471792L)
// bout.packSigned(147179L)
bout.close()
val bin = MemoryBitInput(bout.toUByteArray())
val bin = MemoryBitInput(bout)
assertEquals(-1471792L, bin.unpackSigned())
assertEquals(1471792L, bin.unpackSigned())
}
@ -126,7 +126,7 @@ class LynonTests {
for (s in source) {
encoder.encodeObj(scope, s)
}
val decoder = LynonUnpacker(encoder.toUByteArray())
val decoder = LynonUnpacker(encoder)
val restored = mutableListOf<Obj>()
for (i in source.indices) {
restored.add(decoder.unpackObject(scope, ObjString.type))
@ -142,7 +142,7 @@ class LynonTests {
encodeObj(scope, ObjBool(false))
encodeObj(scope, ObjBool(true))
encodeObj(scope, ObjBool(true))
}.toUByteArray())
})
assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type))
assertEquals(ObjFalse, decoder.unpackObject(scope, ObjBool.type))
assertEquals(ObjTrue, decoder.unpackObject(scope, ObjBool.type))
@ -162,7 +162,7 @@ class LynonTests {
encodeObj(scope, ObjReal(Double.POSITIVE_INFINITY))
encodeObj(scope, ObjReal(Double.MIN_VALUE))
encodeObj(scope, ObjReal(Double.MAX_VALUE))
}.toUByteArray())
})
assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type))
assertEquals(ObjReal(Math.PI), decoder.unpackObject(scope, ObjReal.type))
assertEquals(ObjReal(-Math.PI), decoder.unpackObject(scope, ObjReal.type))
@ -183,7 +183,7 @@ class LynonTests {
encodeObj(scope, ObjInt(Long.MIN_VALUE))
encodeObj(scope, ObjInt(Long.MAX_VALUE))
encodeObj(scope, ObjInt(Long.MAX_VALUE))
}.toUByteArray())
})
assertEquals(ObjInt(0), decoder.unpackObject(scope, ObjInt.type))
assertEquals(ObjInt(-1), decoder.unpackObject(scope, ObjInt.type))
assertEquals(ObjInt(23), decoder.unpackObject(scope, ObjInt.type))
@ -192,6 +192,24 @@ class LynonTests {
assertEquals(ObjInt(Long.MAX_VALUE), decoder.unpackObject(scope, ObjInt.type))
}
@Test
fun testLastvalue() {
var bin = MemoryBitInput(MemoryBitOutput().apply {
putBits(5, 3)
})
assertEquals(5UL, bin.getBits(3))
assertEquals(null, bin.getBitsOrNull(3))
bin = MemoryBitInput(MemoryBitOutput().apply {
putBits(5, 3)
putBits(1024, 11)
putBits(2, 2)
})
assertEquals(5UL, bin.getBits(3))
assertEquals(1024UL, bin.getBits(11))
assertEquals(2UL, bin.getBits(2))
assertEquals(null, bin.getBitsOrNull(3))
}
@Test
fun testLzw() {
// Example usage
@ -207,7 +225,7 @@ class LynonTests {
println("Number of codes: ${out.toUByteArray().size}")
// // Decompress
val decompressed = LZW.decompress(MemoryBitInput(out.toUByteArray())).toByteArray().decodeToString()
val decompressed = LZW.decompress(MemoryBitInput(out)).toByteArray().decodeToString()
// println("\nDecompressed: $decompressed")
println("Length: ${decompressed.length}")