refs #35 generic implementation of Huffman compression for variable bit length source alphabet
This commit is contained in:
		
							parent
							
								
									20181c63a1
								
							
						
					
					
						commit
						12b209c724
					
				@ -84,22 +84,17 @@ data class ObjString(val value: String) : Obj() {
 | 
				
			|||||||
    override suspend fun lynonType(): LynonType = LynonType.String
 | 
					    override suspend fun lynonType(): LynonType = LynonType.String
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
 | 
					    override suspend fun serialize(scope: Scope, encoder: LynonEncoder, lynonType: LynonType?) {
 | 
				
			||||||
//        if( lynonType == null )
 | 
					        val data = value.encodeToByteArray()
 | 
				
			||||||
//        encoder.encodeCached(this) { encoder.encodeBinaryData(value.encodeToByteArray()) }
 | 
					        encoder.encodeCached(data) { encoder.encodeBinaryData(data) }
 | 
				
			||||||
//        else
 | 
					 | 
				
			||||||
            encoder.encodeBinaryData(value.encodeToByteArray())
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    companion object {
 | 
					    companion object {
 | 
				
			||||||
        val type = object : ObjClass("String") {
 | 
					        val type = object : ObjClass("String") {
 | 
				
			||||||
            override suspend fun deserialize(scope: Scope, decoder: LynonDecoder, lynonType: LynonType?): Obj =
 | 
					            override suspend fun deserialize(scope: Scope, decoder: LynonDecoder, lynonType: LynonType?): Obj =
 | 
				
			||||||
//                if( lynonType == null )
 | 
					                    decoder.decodeCached {
 | 
				
			||||||
//                    decoder.decodeCached {
 | 
					                        ObjString(decoder.unpackBinaryData().decodeToString())
 | 
				
			||||||
//                        ObjString(decoder.unpackBinaryData().decodeToString())
 | 
					                    }
 | 
				
			||||||
//                    }
 | 
					 | 
				
			||||||
//                else
 | 
					 | 
				
			||||||
                ObjString(decoder.unpackBinaryData().decodeToString())
 | 
					 | 
				
			||||||
        }.apply {
 | 
					        }.apply {
 | 
				
			||||||
            addFn("toInt") {
 | 
					            addFn("toInt") {
 | 
				
			||||||
                ObjInt(thisAs<ObjString>().value.toLong())
 | 
					                ObjInt(thisAs<ObjString>().value.toLong())
 | 
				
			||||||
 | 
				
			|||||||
@ -4,21 +4,21 @@ import net.sergeych.bintools.ByteChunk
 | 
				
			|||||||
import net.sergeych.lyng.Scope
 | 
					import net.sergeych.lyng.Scope
 | 
				
			||||||
import net.sergeych.lyng.obj.*
 | 
					import net.sergeych.lyng.obj.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum class LynonType(val objClass: ObjClass) {
 | 
					enum class LynonType(val objClass: ObjClass,val defaultFrequency: Int = 1) {
 | 
				
			||||||
    Null(ObjNull.objClass),
 | 
					    Null(ObjNull.objClass, 80),
 | 
				
			||||||
    Int0(ObjInt.type),
 | 
					    Int0(ObjInt.type, 70),
 | 
				
			||||||
    IntNegative(ObjInt.type),
 | 
					    IntNegative(ObjInt.type, 50),
 | 
				
			||||||
    IntPositive(ObjInt.type),
 | 
					    IntPositive(ObjInt.type, 100),
 | 
				
			||||||
    String(ObjString.type),
 | 
					    String(ObjString.type, 100),
 | 
				
			||||||
    Real(ObjReal.type),
 | 
					    Real(ObjReal.type),
 | 
				
			||||||
    Bool(ObjBool.type),
 | 
					    Bool(ObjBool.type, 80),
 | 
				
			||||||
    List(ObjList.type),
 | 
					    List(ObjList.type, 70),
 | 
				
			||||||
    Map(ObjMap.type),
 | 
					    Map(ObjMap.type,40),
 | 
				
			||||||
    Set(ObjSet.type),
 | 
					    Set(ObjSet.type),
 | 
				
			||||||
    Buffer(ObjBuffer.type),
 | 
					    Buffer(ObjBuffer.type, 50),
 | 
				
			||||||
    Instant(ObjInstant.type),
 | 
					    Instant(ObjInstant.type, 30),
 | 
				
			||||||
    Duration(ObjDuration.type),
 | 
					    Duration(ObjDuration.type),
 | 
				
			||||||
    Other(Obj.rootObjectType);
 | 
					    Other(Obj.rootObjectType,60);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = LynonSettings.default) {
 | 
					open class LynonEncoder(val bout: BitOutput, val settings: LynonSettings = LynonSettings.default) {
 | 
				
			||||||
 | 
				
			|||||||
@ -1,38 +1,68 @@
 | 
				
			|||||||
package net.sergeych.lynon
 | 
					package net.sergeych.lynon
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import net.sergeych.collections.SortedList
 | 
					import net.sergeych.collections.SortedList
 | 
				
			||||||
 | 
					import net.sergeych.lynon.Huffman.Alphabet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * Experimental, reference implementation of Huffman trees and encoding.
 | 
					 * Generic huffman encoding implementation using bits input/output and abstract [Alphabet].
 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This is a reference huffman encoding implementation not yet ready;
 | 
					 | 
				
			||||||
 * it was used to experiment with LZW, at the moment, LZW won the competition
 | 
					 | 
				
			||||||
 * for compressed module format for its speed and sufficiently small size/
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This is byte-based compressor which makes it not too interesting.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * TODO: convert to use various source dictionary
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * reason: version thant compress bytes is not too interesting; particular alphabets
 | 
					 | 
				
			||||||
 * are often longer than byte bits and are often sparse, that requires another
 | 
					 | 
				
			||||||
 * codes serialization implementation
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
object Huffman {
 | 
					object Huffman {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Alphabet interface: source can be variable bit size codes, not just bytes,
 | 
				
			||||||
 | 
					     * so the Huffman encoding is not limited to bytes. It works with any alphabet
 | 
				
			||||||
 | 
					     * using its _ordinals_; encoding between source symbols and ordinals are
 | 
				
			||||||
 | 
					     * performed by the alphabet. See [byteAlphabet] for example.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    interface Alphabet<T> {
 | 
				
			||||||
 | 
					        val maxOrdinal: Int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /**
 | 
				
			||||||
 | 
					         * Write correct symbol for the [ordinal] to the [bout]. This is
 | 
				
			||||||
 | 
					         * the inverse of [ordinalOf] but as [T] could be variable bit size,
 | 
				
			||||||
 | 
					         * we provide output bit stream.
 | 
				
			||||||
 | 
					         */
 | 
				
			||||||
 | 
					        fun decodeOrdinalTo(bout: BitOutput, ordinal: Int)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /**
 | 
				
			||||||
 | 
					         * Find the ordinal of the source symbol
 | 
				
			||||||
 | 
					         */
 | 
				
			||||||
 | 
					        fun ordinalOf(value: T): Int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        operator fun get(ordinal: Int): T
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Alphabet for unsigned bytes, allows to encode bytes easily
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    val byteAlphabet = object : Alphabet<UByte> {
 | 
				
			||||||
 | 
					        override val maxOrdinal: Int
 | 
				
			||||||
 | 
					            get() = 256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        override fun decodeOrdinalTo(bout: BitOutput, ordinal: Int) {
 | 
				
			||||||
 | 
					            bout.putBits(ordinal, 8)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        override fun ordinalOf(value: UByte): Int = value.toInt()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        override operator fun get(ordinal: Int): UByte = ordinal.toUByte()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    sealed class Node(val freq: Int) : Comparable<Node> {
 | 
					    sealed class Node(val freq: Int) : Comparable<Node> {
 | 
				
			||||||
        override fun compareTo(other: Node): Int {
 | 
					        override fun compareTo(other: Node): Int {
 | 
				
			||||||
            return freq.compareTo(other.freq)
 | 
					            return freq.compareTo(other.freq)
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        abstract fun decode(bin: BitInput): Int?
 | 
					        abstract fun decodeOrdinal(bin: BitInput): Int?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        class Leaf(val value: Int, freq: Int) : Node(freq) {
 | 
					        class Leaf(val ordinal: Int, freq: Int) : Node(freq) {
 | 
				
			||||||
            override fun toString(): String {
 | 
					            override fun toString(): String {
 | 
				
			||||||
                return "[$value:$freq]"
 | 
					                return "[$ordinal:$freq]"
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            override fun decode(bin: BitInput): Int {
 | 
					            override fun decodeOrdinal(bin: BitInput): Int {
 | 
				
			||||||
                return value//.also { println(": ${Char(value)}") }
 | 
					                return ordinal//.also { println(": ${Char(value)}") }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -41,33 +71,33 @@ object Huffman {
 | 
				
			|||||||
                return "[${left.freq}<- :<$freq>: ->${right.freq}]"
 | 
					                return "[${left.freq}<- :<$freq>: ->${right.freq}]"
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            override fun decode(bin: BitInput): Int? {
 | 
					            override fun decodeOrdinal(bin: BitInput): Int? {
 | 
				
			||||||
                return when (bin.getBitOrNull().also { print("$it") }) {
 | 
					                return when (bin.getBitOrNull().also { print("$it") }) {
 | 
				
			||||||
                    1 -> left.decode(bin)
 | 
					                    1 -> left.decodeOrdinal(bin)
 | 
				
			||||||
                    0 -> right.decode(bin)
 | 
					                    0 -> right.decodeOrdinal(bin)
 | 
				
			||||||
                    else -> null
 | 
					                    else -> null
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    data class Code(val symbol: Int, val bits: TinyBits) {
 | 
					    data class Code(val ordinal: Int, val bits: TinyBits) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val size by bits::size
 | 
					        val size by bits::size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        override fun toString(): String {
 | 
					        override fun toString(): String {
 | 
				
			||||||
            return "[${Char(symbol)}:$size:$bits]"
 | 
					            return "[$ordinal:$size:$bits]"
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private fun generateCanonicCodes(tree: Node): List<Code?> {
 | 
					    private fun generateCanonicCodes(tree: Node, alphabet: Alphabet<*>): List<Code?> {
 | 
				
			||||||
        val codes = MutableList<Code?>(256) { null }
 | 
					        val codes = MutableList<Code?>(alphabet.maxOrdinal) { null }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fun traverse(node: Node, code: TinyBits) {
 | 
					        fun traverse(node: Node, code: TinyBits) {
 | 
				
			||||||
            when (node) {
 | 
					            when (node) {
 | 
				
			||||||
                is Node.Leaf ->
 | 
					                is Node.Leaf ->
 | 
				
			||||||
                    codes[node.value] = (Code(node.value, code))
 | 
					                    codes[node.ordinal] = (Code(node.ordinal, code))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                is Node.Internal -> {
 | 
					                is Node.Internal -> {
 | 
				
			||||||
                    traverse(node.left, code.insertBit(1))
 | 
					                    traverse(node.left, code.insertBit(1))
 | 
				
			||||||
@ -77,17 +107,17 @@ object Huffman {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        traverse(tree, TinyBits())
 | 
					        traverse(tree, TinyBits())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return makeCanonical(codes)
 | 
					        return makeCanonical(codes, alphabet)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private fun makeCanonical(source: List<Code?>): List<Code?> {
 | 
					    private fun makeCanonical(source: List<Code?>,alphabet: Alphabet<*>): List<Code?> {
 | 
				
			||||||
        val sorted = source.filterNotNull().sortedWith(canonicComparator)
 | 
					        val sorted = source.filterNotNull().sortedWith(canonicComparator)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val canonical = MutableList<Code?>(256) { null }
 | 
					        val canonical = MutableList<Code?>(alphabet.maxOrdinal) { null }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val first = sorted[0]
 | 
					        val first = sorted[0]
 | 
				
			||||||
        val prevValue = first.copy(bits = TinyBits(0UL, first.bits.size))
 | 
					        val prevValue = first.copy(bits = TinyBits(0UL, first.bits.size))
 | 
				
			||||||
        canonical[first.symbol] = prevValue
 | 
					        canonical[first.ordinal] = prevValue
 | 
				
			||||||
        var prev = prevValue.bits
 | 
					        var prev = prevValue.bits
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (i in 1..<sorted.size) {
 | 
					        for (i in 1..<sorted.size) {
 | 
				
			||||||
@ -96,7 +126,7 @@ object Huffman {
 | 
				
			|||||||
            while (code.bits.size > bits.size) {
 | 
					            while (code.bits.size > bits.size) {
 | 
				
			||||||
                bits = bits.insertBit(0)
 | 
					                bits = bits.insertBit(0)
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            canonical[code.symbol] = code.copy(bits = bits)//.also { println("$it") }
 | 
					            canonical[code.ordinal] = code.copy(bits = bits)//.also { println("$it") }
 | 
				
			||||||
            prev = bits
 | 
					            prev = bits
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return canonical
 | 
					        return canonical
 | 
				
			||||||
@ -104,18 +134,21 @@ object Huffman {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    private val canonicComparator = { a: Code, b: Code ->
 | 
					    private val canonicComparator = { a: Code, b: Code ->
 | 
				
			||||||
        if (a.bits.size == b.bits.size) {
 | 
					        if (a.bits.size == b.bits.size) {
 | 
				
			||||||
            a.symbol.compareTo(b.symbol)
 | 
					            a.ordinal.compareTo(b.ordinal)
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            a.bits.size.compareTo(b.bits.size)
 | 
					            a.bits.size.compareTo(b.bits.size)
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private fun buildTree(data: UByteArray): Node {
 | 
					    private fun buildTree(data: Iterable<Int>,alphabet: Alphabet<*>): Node {
 | 
				
			||||||
//        println(data.toDump())
 | 
					        val frequencies = buildFrequencies(alphabet, data)
 | 
				
			||||||
        val frequencies = Array(256) { 0 }
 | 
					        return buildTree(frequencies)
 | 
				
			||||||
        data.forEach { frequencies[it.toInt()]++ }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val list = SortedList<Node>(*frequencies.mapIndexed { index, i -> Node.Leaf(index, i) }.filter { it.freq > 0 }
 | 
					    private fun buildTree(frequencies: Array<Int>): Node {
 | 
				
			||||||
 | 
					//        println(data.toDump())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        val list: SortedList<Node> = SortedList(*frequencies.mapIndexed { index, frequency -> Node.Leaf(index, frequency) }.filter { it.freq > 0 }
 | 
				
			||||||
            .toTypedArray())
 | 
					            .toTypedArray())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // build the tree
 | 
					        // build the tree
 | 
				
			||||||
@ -127,8 +160,18 @@ object Huffman {
 | 
				
			|||||||
        return list[0]
 | 
					        return list[0]
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fun decompressUsingCodes(bin: BitInput, codes: List<Code?>): UByteArray {
 | 
					    private fun buildFrequencies(
 | 
				
			||||||
        val result = mutableListOf<UByte>()
 | 
					        alphabet: Alphabet<*>,
 | 
				
			||||||
 | 
					        data: Iterable<Int>
 | 
				
			||||||
 | 
					    ): Array<Int> {
 | 
				
			||||||
 | 
					        val maxOrdinal = alphabet.maxOrdinal
 | 
				
			||||||
 | 
					        val frequencies = Array(maxOrdinal) { 0 }
 | 
				
			||||||
 | 
					        data.forEach { frequencies[it]++ }
 | 
				
			||||||
 | 
					        return frequencies
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fun decompressUsingCodes(bin: BitInput, codes: List<Code?>, alphabet: Alphabet<*>): BitArray {
 | 
				
			||||||
 | 
					        val result = MemoryBitOutput()
 | 
				
			||||||
        val table = codes.filterNotNull().associateBy { it.bits }
 | 
					        val table = codes.filterNotNull().associateBy { it.bits }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        outer@ while (true) {
 | 
					        outer@ while (true) {
 | 
				
			||||||
@ -139,12 +182,12 @@ object Huffman {
 | 
				
			|||||||
                val data = table[input]
 | 
					                val data = table[input]
 | 
				
			||||||
                if (data != null) {
 | 
					                if (data != null) {
 | 
				
			||||||
//                    println("Code found: ${data.bits} -> [${data.symbol.toChar()}]")
 | 
					//                    println("Code found: ${data.bits} -> [${data.symbol.toChar()}]")
 | 
				
			||||||
                    result.add(data.symbol.toUByte())
 | 
					                    alphabet.decodeOrdinalTo(result,data.ordinal)
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result.toUByteArray()
 | 
					        return result.toBitArray()
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private fun serializeCanonicCodes(bout: BitOutput, codes: List<Code?>) {
 | 
					    private fun serializeCanonicCodes(bout: BitOutput, codes: List<Code?>) {
 | 
				
			||||||
@ -167,11 +210,11 @@ object Huffman {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fun deserializeCanonicCodes(bin: BitInput): List<Code?> {
 | 
					    fun deserializeCanonicCodes(bin: BitInput, alphabet: Alphabet<*>): List<Code?> {
 | 
				
			||||||
        val minSize = bin.unpackUnsigned().toInt()
 | 
					        val minSize = bin.unpackUnsigned().toInt()
 | 
				
			||||||
        val sizeInBits = bin.unpackUnsigned().toInt()
 | 
					        val sizeInBits = bin.unpackUnsigned().toInt()
 | 
				
			||||||
        val sorted = mutableListOf<Code>().also { codes ->
 | 
					        val sorted = mutableListOf<Code>().also { codes ->
 | 
				
			||||||
            for (i in 0..<256) {
 | 
					            for (i in 0..<alphabet.maxOrdinal) {
 | 
				
			||||||
                val s = bin.getBits(sizeInBits).toInt()
 | 
					                val s = bin.getBits(sizeInBits).toInt()
 | 
				
			||||||
                if (s > 0) {
 | 
					                if (s > 0) {
 | 
				
			||||||
                    codes.add(Code(i, TinyBits(0U, s - 1 + minSize)))
 | 
					                    codes.add(Code(i, TinyBits(0U, s - 1 + minSize)))
 | 
				
			||||||
@ -179,66 +222,53 @@ object Huffman {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
        }.sortedWith(canonicComparator)
 | 
					        }.sortedWith(canonicComparator)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val result = MutableList<Code?>(256) { null }
 | 
					        val result = MutableList<Code?>(alphabet.maxOrdinal) { null }
 | 
				
			||||||
        var prev = sorted[0].copy(bits = TinyBits(0U, sorted[0].bits.size))
 | 
					        var prev = sorted[0].copy(bits = TinyBits(0U, sorted[0].bits.size))
 | 
				
			||||||
        result[prev.symbol] = prev
 | 
					        result[prev.ordinal] = prev
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (i in 1..<sorted.size) {
 | 
					        for (i in 1..<sorted.size) {
 | 
				
			||||||
            val code = sorted[i]
 | 
					            val code = sorted[i]
 | 
				
			||||||
            var bits = TinyBits(prev.bits.value + 1u, prev.bits.size)
 | 
					            var bits = TinyBits(prev.bits.value + 1u, prev.bits.size)
 | 
				
			||||||
            while (bits.size < code.bits.size) bits = bits.insertBit(0)
 | 
					            while (bits.size < code.bits.size) bits = bits.insertBit(0)
 | 
				
			||||||
            result[code.symbol] = code.copy(bits = bits).also {
 | 
					            result[code.ordinal] = code.copy(bits = bits).also {
 | 
				
			||||||
                prev = it
 | 
					                prev = it
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result
 | 
					        return result
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fun compress(data: UByteArray): BitArray {
 | 
					//    fun generateCanonicalCodes(frequencies: Iterable<Int>): List<Code?> {
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val root = buildTree(data)
 | 
					    fun generateCanonicalCodes(frequencies: Array<Int>,alphabet: Alphabet<*>): List<Code?> =
 | 
				
			||||||
 | 
					        generateCanonicCodes(buildTree(frequencies), alphabet)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        val codes = generateCanonicCodes(root)
 | 
					    fun <T>compress(plain: Iterable<T>,alphabet: Alphabet<T>): BitArray {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        val source = plain.map { alphabet.ordinalOf(it) }
 | 
				
			||||||
 | 
					        val root = buildTree(source,alphabet)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        val codes = generateCanonicCodes(root, alphabet)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // serializa table
 | 
					        // serializa table
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // test encode:
 | 
					        // test encode:
 | 
				
			||||||
        val bout = MemoryBitOutput()
 | 
					        val bout = MemoryBitOutput()
 | 
				
			||||||
        serializeCanonicCodes(bout, codes)
 | 
					        serializeCanonicCodes(bout, codes)
 | 
				
			||||||
        for (i in data) {
 | 
					        for (i in source) {
 | 
				
			||||||
            val code = codes[i.toInt()]!!
 | 
					            val code = codes[i]!!
 | 
				
			||||||
//            println(">> $code")
 | 
					//            println(">> $code")
 | 
				
			||||||
            bout.putBits(code.bits)
 | 
					            bout.putBits(code.bits)
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
//        println(bout.toBitArray().bytes.toDump())
 | 
					//        println(bout.toBitArray().bytes.toDump())
 | 
				
			||||||
        val compressed = bout.toBitArray()
 | 
					        val compressed = bout.toBitArray()
 | 
				
			||||||
//        println("Size: ${compressed.bytes.size / data.size.toDouble() }")
 | 
					 | 
				
			||||||
//        println("compression ratio: ${compressed.bytes.size / data.size.toDouble() }")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // test decompress
 | 
					 | 
				
			||||||
//        val bin = MemoryBitInput(compressed)
 | 
					 | 
				
			||||||
//        val codes2 = deserializeCanonicCodes(bin)
 | 
					 | 
				
			||||||
//        for ((a, b) in codes.zip(codes2)) {
 | 
					 | 
				
			||||||
//            if (a != b) {
 | 
					 | 
				
			||||||
//                println("Codes mismatch: $a != $b")
 | 
					 | 
				
			||||||
//                break
 | 
					 | 
				
			||||||
//            }
 | 
					 | 
				
			||||||
//        }
 | 
					 | 
				
			||||||
//        require(codes == codes2)
 | 
					 | 
				
			||||||
//        val result = decompressUsingCodes(bin, codes2)
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
////        println(result.toUByteArray().toDump())
 | 
					 | 
				
			||||||
//        check(data contentEquals result.toUByteArray())
 | 
					 | 
				
			||||||
//        if( !(data contentEquals result.toUByteArray()) )
 | 
					 | 
				
			||||||
//            throw RuntimeException("Data mismatch")
 | 
					 | 
				
			||||||
//        println(data.toDump())
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
        return compressed
 | 
					        return compressed
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fun decompress(bin: BitInput): UByteArray {
 | 
					    fun <T>decompress(bin: BitInput,alphabet: Alphabet<T>): UByteArray {
 | 
				
			||||||
        val codes = deserializeCanonicCodes(bin)
 | 
					        val codes = deserializeCanonicCodes(bin, alphabet)
 | 
				
			||||||
        return decompressUsingCodes(bin, codes)
 | 
					        return decompressUsingCodes(bin, codes, alphabet).asUbyteArray()
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -382,16 +382,39 @@ class LynonTests {
 | 
				
			|||||||
        println("Original : ${x.size}")
 | 
					        println("Original : ${x.size}")
 | 
				
			||||||
        val lzw = LZW.compress(x).bytes
 | 
					        val lzw = LZW.compress(x).bytes
 | 
				
			||||||
        println("LZW      : ${lzw.size}")
 | 
					        println("LZW      : ${lzw.size}")
 | 
				
			||||||
        val ba = Huffman.compress(x)
 | 
					        val ba = Huffman.compress(x, Huffman.byteAlphabet)
 | 
				
			||||||
        val huff = ba.bytes
 | 
					        val huff = ba.bytes
 | 
				
			||||||
        println("Huffman  : ${huff.size}")
 | 
					        println("Huffman  : ${huff.size}")
 | 
				
			||||||
        val lzwhuff = Huffman.compress(lzw).bytes
 | 
					        val lzwhuff = Huffman.compress(lzw, Huffman.byteAlphabet).bytes
 | 
				
			||||||
        println("LZW+HUFF : ${lzwhuff.size}")
 | 
					        println("LZW+HUFF : ${lzwhuff.size}")
 | 
				
			||||||
        val compressed = Huffman.compress(x)
 | 
					        val compressed = Huffman.compress(x,Huffman.byteAlphabet)
 | 
				
			||||||
        val decompressed = Huffman.decompress(compressed.toBitInput())
 | 
					        val decompressed = Huffman.decompress(compressed.toBitInput(),Huffman.byteAlphabet)
 | 
				
			||||||
        assertContentEquals(x, decompressed)
 | 
					        assertContentEquals(x, decompressed)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @Test
 | 
				
			||||||
 | 
					    fun testGenerateCanonicalHuffmanCodes() {
 | 
				
			||||||
 | 
					        val frequencies = LynonType.entries.map { it.defaultFrequency }.toTypedArray()
 | 
				
			||||||
 | 
					        val alphabet = object : Huffman.Alphabet<LynonType> {
 | 
				
			||||||
 | 
					            override val maxOrdinal = LynonType.entries.size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//            val bitSize = sizeInBits(maxOrdinal)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            override fun decodeOrdinalTo(bout: BitOutput, ordinal: Int) {
 | 
				
			||||||
 | 
					                TODO("Not yet implemented")
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            override fun get(ordinal: Int): LynonType {
 | 
				
			||||||
 | 
					                TODO("Not yet implemented")
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            override fun ordinalOf(value: LynonType): Int = value.ordinal
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        for(code in Huffman.generateCanonicalCodes(frequencies, alphabet)) {
 | 
				
			||||||
 | 
					            println("${code?.bits}: ${code?.ordinal?.let { LynonType.entries[it] }}")
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Test
 | 
					    @Test
 | 
				
			||||||
    fun testBitListSmall() {
 | 
					    fun testBitListSmall() {
 | 
				
			||||||
        var t = TinyBits()
 | 
					        var t = TinyBits()
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user