lzw utils
This commit is contained in:
parent
7e289382ed
commit
5d4d548275
@ -21,7 +21,9 @@ import net.sergeych.bintools.ByteChunk
|
|||||||
import kotlin.math.roundToInt
|
import kotlin.math.roundToInt
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LZW lightweight pure kotlin compression.
|
* LZW lightweight pure kotlin compression. LZW works with but streams [BitInput] and [BitOutput]
|
||||||
|
* to be used effectively in Lynon or other bit-grained formats. To safely comress byte arresy and
|
||||||
|
* strings without boilerplate use [lzwCompress], [lzwDecompress], [lzwCompressUtf8] and [lzwDecompressUtf8].
|
||||||
*/
|
*/
|
||||||
object LZW {
|
object LZW {
|
||||||
|
|
||||||
@ -30,8 +32,10 @@ object LZW {
|
|||||||
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
|
val MAX_DICT_SIZE = (STOP_CODE * 0.92).roundToInt()
|
||||||
|
|
||||||
|
|
||||||
fun compress(input: ByteArray, bitOutput: BitOutput)
|
/**
|
||||||
= compress(input.asUByteArray(), bitOutput)
|
* Compress a byte array using LZW algorithm writing the result to [bitOutput]
|
||||||
|
*/
|
||||||
|
fun compress(input: ByteArray, bitOutput: BitOutput) = compress(input.asUByteArray(), bitOutput)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compresses the input string using LZW algorithm
|
* Compresses the input string using LZW algorithm
|
||||||
@ -139,7 +143,42 @@ object LZW {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private operator fun ByteChunk.plus(byte: UByte): ByteChunk {
|
private operator fun ByteChunk.plus(byte: UByte): ByteChunk {
|
||||||
return ByteChunk(data + byte)
|
return ByteChunk(data + byte)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Safely compress binary byte data using the LZW algorithm. This can use up to one byte more space than
|
||||||
|
* with [LZW.compress] and [BitOutput] but you often need byte array operations. Source data size is also
|
||||||
|
* encoded to prevent file-bomb-like attacks. Note that content protection is not included (we assume
|
||||||
|
* LZW is robust).
|
||||||
|
* @param source the data to compress
|
||||||
|
*/
|
||||||
|
fun lzwCompress(source: UByteArray): UByteArray {
|
||||||
|
val out = MemoryBitOutput()
|
||||||
|
out.packUnsigned(source.size.toULong())
|
||||||
|
LZW.compress(source, out)
|
||||||
|
return out.toBitArray().asUByteArray()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Safely decompress data compressed with [lzwCompress], checking size. Contect checks are not implemented
|
||||||
|
* here as we assume LZW is robust.
|
||||||
|
* @param packed the compressed data
|
||||||
|
* @throws DecompressionException if something goes wrong, like size mismatch or bad compressed data
|
||||||
|
*/
|
||||||
|
fun lzwDecompress(packed: UByteArray): UByteArray {
|
||||||
|
val inp = MemoryBitInput(packed, 8)
|
||||||
|
val size = inp.unpackUnsigned()
|
||||||
|
return LZW.decompress(inp, size.toInt())
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compress a text using utf-8 encoding and [lzwCompress]
|
||||||
|
*/
|
||||||
|
fun lzwCompressUtf8(text: String) = lzwCompress(text.encodeToByteArray().toUByteArray())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompress a text compressed with [lzwCompressUtf8]. See also [lzwDecompress]
|
||||||
|
*/
|
||||||
|
fun lzwDecompressUtf8(packed: UByteArray): String = lzwDecompress(packed).asByteArray().decodeToString()
|
||||||
|
|||||||
@ -397,6 +397,13 @@ class LynonTests {
|
|||||||
assertEquals(original, decompressed)
|
assertEquals(original, decompressed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testCompressed() {
|
||||||
|
val compressed = lzwCompressUtf8(original)
|
||||||
|
println("${compressed.size/original.encodeToByteArray().size.toDouble()} compression ratio")
|
||||||
|
assertEquals(original, lzwDecompressUtf8(compressed))
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testTinyBits() {
|
fun testTinyBits() {
|
||||||
var a0 = TinyBits()
|
var a0 = TinyBits()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user