From 3f83e842fe799c60c26cc5d31b1ab3469f308fc9 Mon Sep 17 00:00:00 2001 From: sergeych Date: Mon, 23 Dec 2024 01:00:34 +0300 Subject: [PATCH] !fix Long.encodeToHex bug, rewritten with new types +fast BitSet set with enums support --- .gitignore | 1 + .../net.sergeych.bintools/simple_codecs.kt | 8 +- .../kotlin/net/sergeych/collections/BitSet.kt | 342 ++++++++++++++++++ src/commonTest/kotlin/bipack/StorageTest.kt | 16 + .../kotlin/collections/CollectionsTest.kt | 107 +++++- 5 files changed, 458 insertions(+), 16 deletions(-) create mode 100644 src/commonMain/kotlin/net/sergeych/collections/BitSet.kt diff --git a/.gitignore b/.gitignore index 5d1e05e..abe0144 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /gradle/wrapper/gradle-wrapper.properties /node_modules .kotlin +/.gigaide/gigaide.properties diff --git a/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt index 2bc409b..c0ce42a 100644 --- a/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt +++ b/src/commonMain/kotlin/net.sergeych.bintools/simple_codecs.kt @@ -84,13 +84,11 @@ private val hexDigits = "0123456789ABCDEF" fun Long.encodeToHex(length: Int = 0): String { var result = "" - var value = this - val end = if( value >= 0 ) 0L else -1L -// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)") + var value = this.toULong() do { - result = hexDigits[(value and 0x0f).toInt()] + result + result = hexDigits[(value and 0x0fu).toInt()] + result value = value shr 4 - } while (value != end) + } while (value != 0UL) while (result.length < length) result = "0" + result return result } diff --git a/src/commonMain/kotlin/net/sergeych/collections/BitSet.kt b/src/commonMain/kotlin/net/sergeych/collections/BitSet.kt new file mode 100644 index 0000000..0416487 --- /dev/null +++ b/src/commonMain/kotlin/net/sergeych/collections/BitSet.kt @@ -0,0 +1,342 @@ +package net.sergeych.collections + +import kotlinx.serialization.Serializable +import net.sergeych.bintools.encodeToHex +import net.sergeych.collections.BitSet.Companion.MAX_VALUE + +/** + * Bitset is a serializable set of __positive__ integers represented as bits in long array. + * This ought to be more effective, and sure it is more effective in serialized form, + * as long as maximum stored number is not too big, We recommend limit of 10-20k. + * + * It limits hold values to [MAX_VALUE] anyway to avoid fast memory depletion + * + * It has optimized bitwise operation based versions of [retainAll] and [removeAll], + * [intersect] and [isEmpty], that are used if their arguments, where used, are `BitSet` + * instances. Also [equals] works faster with BitSet and BitSet. + * + * Use [bitSetOf] and [bitSetOfEnum] to simply create bitsets. + * + * It also contains syntax sugar to work with enums directly: + * + * - [includes] and [includesAll] to check that enum is in set + * - [insert], [insertAll], [delete], and [deleteAll] to manipulate enum values + * - [toEnumSet] to convert to set of enums + * + */ +@Serializable +class BitSet(private val bits: MutableList = mutableListOf()) : MutableSet { + + fun set(element: Int, value: Boolean = true) = setBit(element, value) + + fun clear(element: Int) = set(element, false) + + operator fun plusAssign(element: Int) { + set(element) + } + + operator fun plus(element: Int): BitSet = BitSet(bits.toMutableList()).apply { set(element) } + + operator fun minusAssign(element: Int) { + clear(element) + } + + operator fun minus(element: Int): BitSet = BitSet(bits.toMutableList()).apply { clear(element) } + + private fun setBit(element: Int, value: Boolean): Boolean { + require(element >= 0, { "only positive numbers are allowed" }) + require(element < MAX_VALUE, { "maximum value allowed is $MAX_VALUE" }) + val offset = element shr 6 + val bit = element % 64 + return if (value) { + while (offset >= bits.size) bits.add(0) + val last = bits[offset] and masks[bit] + bits[offset] = bits[offset] or masks[bit] + last != 0L + } else { + if (offset < bits.size) { + // bigger, not existing means 0 + val last = bits[offset] and masks[bit] + bits[offset] = bits[offset] and maskNot[bit] + last != 0L + } else { + // already 0: index not in bits: + false + } + } + } + + private fun getBit(value: Int): Boolean { + val offset = value shr 6 + if (offset >= bits.size) return false + val bit = value % 64 + return (bits[offset] and masks[bit]) != 0L + } + + override fun add(element: Int): Boolean = setBit(element, true) + + override val size: Int + get() { + var count = 0 + for (w in bits) { + for (m in masks) { + if ((w and m) != 0L) count++ + } + } + return count + } + + override fun addAll(elements: Collection): Boolean { + var added = false + for (i in elements) if (setBit(i, true)) added = true + return added + + } + + override fun clear() { + bits.clear() + } + + override fun isEmpty(): Boolean { + if (bits.isEmpty()) return true + for (w in bits) if (w != 0L) return false + return true + } + + fun toList(): List { + var value = 0 + val result = mutableListOf() + for (w in bits) { + for (m in masks) { + if ((w and m) != 0L) result += value + value++ + } + } + return result + } + + fun toHex(): String = bits.toString() + " " + bits.joinToString(" ") { it.encodeToHex() } + + override fun iterator(): MutableIterator = object : MutableIterator { + private val i = toList().iterator() + private var last: Int? = null + override operator fun hasNext() = i.hasNext() + override fun next(): Int = i.next().also { last = it } + override fun remove() { + last?.let { clear(it) } ?: IllegalStateException("hasNext() was not called") + } + } + + private fun fastRetainAll(elements: BitSet): Boolean { + var result = false + for (i in bits.indices) { + if (i < elements.bits.size) { + val x = bits[i] + val y = x and elements.bits[i] + if (x != y) { + result = true + bits[i] = y + } + } else { + if (bits[i] != 0L) { + bits[i] = 0 + result = true + } + } + } + return result + } + + override fun retainAll(elements: Collection): Boolean { + return if (elements is BitSet) + fastRetainAll(elements) + else { + var value = 0 + var result = false + for ((i, _w) in bits.withIndex()) { + var w = _w + for (m in masks) { + if ((w and m) != 0L && value !in elements) { + w = w and m.inv() + bits[i] = w + result = true + } + value++ + } + } + result + } + } + + override fun removeAll(elements: Collection): Boolean { + return if (elements is BitSet) + fastRemoveAll(elements) + else { + var value = 0 + var result = false + for ((i, _w) in bits.withIndex()) { + var w = _w + for (m in masks) { + if ((w and m) != 0L && value in elements) { + w = w and m.inv() + bits[i] = w + result = true + } + value++ + } + } + result + } + } + + private fun fastRemoveAll(elements: BitSet): Boolean { + var result = false + for (i in bits.indices) { + if (i < elements.bits.size) { + val x = bits[i] + val y = x and elements.bits[i].inv() + if (x != y) { + bits[i] = y + result = true + } + } + } + println("fast2") + return result + } + + override fun remove(element: Int): Boolean = setBit(element, false) + + override fun containsAll(elements: Collection): Boolean { + for (e in elements) if (e !in this) return false + return true + } + + fun toIntSet() = toList().toSet() + + override fun contains(element: Int): Boolean = getBit(element) + + /** + * Check that this set contains and ordinal of a given enum element. + */ + infix fun includes(element: E) + where E : Enum<*> = contains(element.ordinal) + + /** + * Check that this set contains all elements using its ordinals. + */ + infix fun includesAll(elements: Collection) + where E : Enum<*> = elements.all { it.ordinal in this } + + fun intersect(other: Iterable): BitSet { + val result = toBitSet() + result.retainAll(other) + println("I: $this /\\ $other = $result") + return result + } + + override fun toString(): String { + return toList().toString() + } + + /** + * Checks that this set contains at least one element with ordinal + */ + infix inline fun includesAny(elements: Collection): Boolean + where E : Enum { + val ords = elements.map { it.ordinal }.toBitSet() + return !ords.intersect(this).isEmpty() + } + + /** + * Create an independent copy of this bitset + */ + fun toBitSet() = BitSet(bits.toMutableList()) + + inline fun toEnumSet(): Set + where T : Enum { + val values = enumValues() + val result = mutableSetOf() + for (i in this) result += values[i] + return result + } + + /** + * Insert an element of an enum by its ordinal, much like [add]. + * + * @return `true` if the element has actually been added, `false` if + * BitSet was not modified. + */ + fun insert(element: E): Boolean + where E : Enum<*> = add(element.ordinal) + + /** + * Remove an element of an enum using its ordinal, much like [remove]. + * + * @return `true` if the element has actually been removed, `false` if + * BitSet was not modified. + */ + fun delete(element: E): Boolean + where E : Enum<*> = remove(element.ordinal) + + /** + * Insert all elements using its ordinals, much like [addAll]. + * + * @return `true` if at lease one element has actually been added, `false` + * if BitSet was not modified. + */ + fun insertAll(element: Collection): Boolean + where E : Enum<*> = addAll(element.map { it.ordinal }) + + /** + * Remove all the elements using its ordinals, much like [removeAll]. + * + * @return `true` if at least one element has actually been removed, `false` if + * BitSet was not modified. + */ + fun deleteAll(elements: Collection): Boolean + where E : Enum<*> = removeAll(elements.map { it.ordinal }) + + /** + * Reduces storage size trying to compact storage. It might free some memory, depending + * on the platform implementation of lists and contents. Does not change stored values. + */ + fun compact() { + while( bits.isNotEmpty() && bits.last() == 0L ) bits.removeLast() + } + + override fun hashCode(): Int = bits.hashCode() + + override fun equals(other: Any?): Boolean { + if( other is BitSet ) { + compact(); other.compact() + return other.bits == this.bits + } + return toIntSet().equals( + if( other is Set<*>) other + else (other as Collection<*>).toSet() + ) + } + + + companion object { + val masks = Array(64) { (1L shl it) } + val maskNot = masks.map { it.inv() }.toLongArray() + + // limit size to ≈ 100kb + const val MAX_VALUE = 8_388_106 + } +} + +fun bitSetOf(vararg values: Int) = BitSet().apply { + for (i in values) add(i) +} + +fun > bitSetOfEnum(vararg values: E) = + BitSet().also { + for (v in values) it.add(v.ordinal) + } + + +fun Iterable.toBitSet(): BitSet = BitSet().also { it.addAll(this) } +fun IntArray.toBitSet(): BitSet = BitSet().also { it.addAll(this.asIterable()) } diff --git a/src/commonTest/kotlin/bipack/StorageTest.kt b/src/commonTest/kotlin/bipack/StorageTest.kt index f980f4c..c71936f 100644 --- a/src/commonTest/kotlin/bipack/StorageTest.kt +++ b/src/commonTest/kotlin/bipack/StorageTest.kt @@ -1,14 +1,17 @@ package bipack import net.sergeych.bintools.* +import net.sergeych.collections.bitSetOf import kotlin.test.Test import kotlin.test.assertEquals +import kotlin.test.assertFalse import kotlin.test.assertNull class StorageTest { @Test fun storageTest3() { val s1 = MemoryKVStorage() + val s2 = defaultNamedStorage("test_mp_bintools2") s2.clear() @@ -41,4 +44,17 @@ class StorageTest { assertEquals(42, s1.read("reason")) assertEquals(42, reason) } + + @Test + fun bitSetEquityTest() { + val a = bitSetOf(1, 12) + val b = bitSetOf(12, 1) + assertEquals(a, b) + assertEquals(b, a) + a += 1230 + assertFalse { a == b } + a -= 1230 + assertEquals(a, b) + assertEquals(b, a) + } } \ No newline at end of file diff --git a/src/commonTest/kotlin/collections/CollectionsTest.kt b/src/commonTest/kotlin/collections/CollectionsTest.kt index 802666f..c1c8052 100644 --- a/src/commonTest/kotlin/collections/CollectionsTest.kt +++ b/src/commonTest/kotlin/collections/CollectionsTest.kt @@ -6,8 +6,9 @@ import kotlinx.coroutines.test.advanceTimeBy import kotlinx.coroutines.test.resetMain import kotlinx.coroutines.test.runTest import kotlinx.coroutines.test.setMain -import net.sergeych.collections.ExpirableAsyncCache -import net.sergeych.collections.SortedList +import net.sergeych.bintools.toDump +import net.sergeych.bipack.BipackEncoder +import net.sergeych.collections.* import kotlin.test.* import kotlin.time.Duration.Companion.milliseconds @@ -15,14 +16,14 @@ class CollectionsTest { @Test fun testSortedList1() { - val a1 = SortedList(5,4,3,9,1) + val a1 = SortedList(5, 4, 3, 9, 1) assertTrue { 4 in a1 } assertTrue { 14 !in a1 } - fun >test(x: SortedList) { + fun > test(x: SortedList) { var last: T? = null for (i in x.toList()) { - if( last == null ) last = i - else if( last > i ) fail("invalid order: $last should be <= $i") + if (last == null) last = i + else if (last > i) fail("invalid order: $last should be <= $i") assertContains(x, i) } } @@ -31,16 +32,16 @@ class CollectionsTest { println(a1.toList()) assertEquals(listOf(-55, 0, 0, 0, 1, 1, 1, 2, 3, 3, 4, 5, 9, 9, 11, 22), a1.toList()) assertEquals(11, a1.find(11)) - assertEquals(listOf(0,0,0), a1.findAll(0)) + assertEquals(listOf(0, 0, 0), a1.findAll(0)) assertEquals(listOf(11), a1.findAll(11)) - assertEquals(listOf(3,3), a1.findAll(3)) + assertEquals(listOf(3, 3), a1.findAll(3)) assertTrue { a1.remove(3) } assertEquals(listOf(3), a1.findAll(3)) assertTrue { a1.remove(3) } assertEquals(listOf(), a1.findAll(3)) assertTrue { 3 !in a1 } assertEquals(3, a1.findAll(1).size) - assertEquals( 3, a1.removeAll(1)) + assertEquals(3, a1.removeAll(1)) assertTrue { 1 !in a1 } assertEquals(listOf(-55, 0, 0, 0, 2, 4, 5, 9, 9, 11, 22), a1.toList()) } @@ -49,7 +50,7 @@ class CollectionsTest { @Test fun expirableAsyncCacheTest() = runTest { val removedValues = mutableSetOf() - val m = ExpirableAsyncCache(500.milliseconds) { + val m = ExpirableAsyncCache(500.milliseconds) { removedValues += it } m.put("one", 1) @@ -67,7 +68,7 @@ class CollectionsTest { m.getOrDefault("two", 22) assertEquals(2, m.get("two")) - m.getOrPut("two") {222} + m.getOrPut("two") { 222 } assertEquals(2, m.get("two")) m.getOrPut("three") { 3 } @@ -78,4 +79,88 @@ class CollectionsTest { // delay(1000) // assertNull(m.get("one")) } + + enum class Nn { + One, Two, Three + } + + @Test + fun bitsetTest() { + fun checkAdd(vararg values: Int) { + val x = bitSetOf(*values) + println(":: ${values.toList()}: ${x.toHex()}") + assertEquals(values.toSet(), x.toIntSet()) + for (i in values) { + assertTrue(i in x, "failed $i in ${values.toList()}") + } + } + checkAdd(0, 1, 2, 3) + val src = intArrayOf(31, 32, 33, 60, 61, 62, 63, 64, 65) + checkAdd(*src) + + assertEquals(src.toSet(), src.toBitSet().toIntSet()) + assertFalse { src.toSet() != src.toBitSet() } + assertFalse { src.toSet() + 17 == src.toBitSet() } + assertEquals(src.toBitSet() + 17, src.toSet() + 17, ) + assertTrue { src.toSet() + 17 == src.toBitSet() + 17 } + assertTrue { src.toBitSet() + 17 == src.toBitSet() + 17 } + + var y = src.toBitSet() + 2 + assertTrue { y.retainAll(setOf(1, 3, 31, 32, 33)) } + assertEquals(setOf(31, 32, 33), y.toIntSet()) + assertFalse { y.retainAll(setOf(1, 3, 31, 32, 33)) } + + y = src.toBitSet() + 2 + for (i in setOf(2, 31, 32, 33)) + assertTrue(i in y, "failed $i in ${y.toList()}") + assertTrue { y.retainAll(bitSetOf(1, 3, 31, 32, 33)) } + assertEquals(setOf(31, 32, 33), y.toIntSet()) + assertFalse { y.retainAll(setOf(1, 3, 31, 32, 33)) } + + var z = src.toBitSet() + 2 + assertTrue { z.removeAll(setOf(31, 65)) } + assertEquals(listOf(2, 32, 33, 60, 61, 62, 63, 64), z.toList()) + assertFalse { z.removeAll(setOf(31, 65)) } + + z = src.toBitSet() + 2 + assertTrue { z.removeAll(bitSetOf(31, 65)) } + assertEquals(listOf(2, 32, 33, 60, 61, 62, 63, 64), z.toList()) + assertFalse { z.removeAll(setOf(31, 65)) } + + z = src.toBitSet() + 2 + assertTrue { z.removeAll(bitSetOf(31, 32)) } + assertEquals(listOf(2, 33, 60, 61, 62, 63, 64, 65), z.toList()) + assertFalse { z.removeAll(setOf(31, 4)) } + + assertTrue { + BipackEncoder.encode(src.toSet()).size > BipackEncoder.encode(src.toBitSet()).size + } + + assertFalse { z includes Nn.Two } + assertTrue { z includes Nn.Three } + assertTrue { z + 1 includesAll listOf(Nn.Three, Nn.Two) } + + assertEquals(setOf(Nn.One, Nn.Three), bitSetOf(0, 2).toEnumSet()) + assertTrue { z + 1 includesAny setOf(Nn.One, Nn.Two) } + } + + @Test + fun bitsetEnumsTest() { + val a = bitSetOfEnum(Nn.One) + assertTrue { a includes Nn.One } + assertFalse { a includes Nn.Two } + assertFalse { a includes Nn.Three } + a.insert(Nn.Three) + assertEquals(setOf(Nn.One, Nn.Three), a.toEnumSet()) + a.delete(Nn.One) + assertEquals(setOf(Nn.Three), a.toEnumSet()) + a.insertAll(listOf(Nn.Two, Nn.Three)) + assertEquals(setOf(Nn.Two, Nn.Three), a.toEnumSet()) + assertTrue { a includesAll listOf(Nn.Two, Nn.Three) } + assertFalse { a includesAll listOf(Nn.One, Nn.Two) } + assertTrue { a includesAny listOf(Nn.One, Nn.Two) } + a.deleteAll(listOf(Nn.Two, Nn.Three, Nn.One)) + assertTrue { a.isEmpty() } + assertTrue { a.toEnumSet().isEmpty() } + } } \ No newline at end of file