!fix Long.encodeToHex bug, rewritten with new types

+fast BitSet set with enums support
This commit is contained in:
Sergey Chernov 2024-12-23 01:00:34 +03:00
parent f6d2422cc1
commit 3f83e842fe
5 changed files with 458 additions and 16 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@
/gradle/wrapper/gradle-wrapper.properties
/node_modules
.kotlin
/.gigaide/gigaide.properties

View File

@ -84,13 +84,11 @@ private val hexDigits = "0123456789ABCDEF"
fun Long.encodeToHex(length: Int = 0): String {
var result = ""
var value = this
val end = if( value >= 0 ) 0L else -1L
// if (value < 0) throw IllegalArgumentException("cant convert to hex negative (ambiguous)")
var value = this.toULong()
do {
result = hexDigits[(value and 0x0f).toInt()] + result
result = hexDigits[(value and 0x0fu).toInt()] + result
value = value shr 4
} while (value != end)
} while (value != 0UL)
while (result.length < length) result = "0" + result
return result
}

View File

@ -0,0 +1,342 @@
package net.sergeych.collections
import kotlinx.serialization.Serializable
import net.sergeych.bintools.encodeToHex
import net.sergeych.collections.BitSet.Companion.MAX_VALUE
/**
* Bitset is a serializable set of __positive__ integers represented as bits in long array.
* This ought to be more effective, and sure it is more effective in serialized form,
* as long as maximum stored number is not too big, We recommend limit of 10-20k.
*
* It limits hold values to [MAX_VALUE] anyway to avoid fast memory depletion
*
* It has optimized bitwise operation based versions of [retainAll] and [removeAll],
* [intersect] and [isEmpty], that are used if their arguments, where used, are `BitSet`
* instances. Also [equals] works faster with BitSet and BitSet.
*
* Use [bitSetOf] and [bitSetOfEnum] to simply create bitsets.
*
* It also contains syntax sugar to work with enums directly:
*
* - [includes] and [includesAll] to check that enum is in set
* - [insert], [insertAll], [delete], and [deleteAll] to manipulate enum values
* - [toEnumSet] to convert to set of enums
*
*/
@Serializable
class BitSet(private val bits: MutableList<Long> = mutableListOf()) : MutableSet<Int> {
fun set(element: Int, value: Boolean = true) = setBit(element, value)
fun clear(element: Int) = set(element, false)
operator fun plusAssign(element: Int) {
set(element)
}
operator fun plus(element: Int): BitSet = BitSet(bits.toMutableList()).apply { set(element) }
operator fun minusAssign(element: Int) {
clear(element)
}
operator fun minus(element: Int): BitSet = BitSet(bits.toMutableList()).apply { clear(element) }
private fun setBit(element: Int, value: Boolean): Boolean {
require(element >= 0, { "only positive numbers are allowed" })
require(element < MAX_VALUE, { "maximum value allowed is $MAX_VALUE" })
val offset = element shr 6
val bit = element % 64
return if (value) {
while (offset >= bits.size) bits.add(0)
val last = bits[offset] and masks[bit]
bits[offset] = bits[offset] or masks[bit]
last != 0L
} else {
if (offset < bits.size) {
// bigger, not existing means 0
val last = bits[offset] and masks[bit]
bits[offset] = bits[offset] and maskNot[bit]
last != 0L
} else {
// already 0: index not in bits:
false
}
}
}
private fun getBit(value: Int): Boolean {
val offset = value shr 6
if (offset >= bits.size) return false
val bit = value % 64
return (bits[offset] and masks[bit]) != 0L
}
override fun add(element: Int): Boolean = setBit(element, true)
override val size: Int
get() {
var count = 0
for (w in bits) {
for (m in masks) {
if ((w and m) != 0L) count++
}
}
return count
}
override fun addAll(elements: Collection<Int>): Boolean {
var added = false
for (i in elements) if (setBit(i, true)) added = true
return added
}
override fun clear() {
bits.clear()
}
override fun isEmpty(): Boolean {
if (bits.isEmpty()) return true
for (w in bits) if (w != 0L) return false
return true
}
fun toList(): List<Int> {
var value = 0
val result = mutableListOf<Int>()
for (w in bits) {
for (m in masks) {
if ((w and m) != 0L) result += value
value++
}
}
return result
}
fun toHex(): String = bits.toString() + " " + bits.joinToString(" ") { it.encodeToHex() }
override fun iterator(): MutableIterator<Int> = object : MutableIterator<Int> {
private val i = toList().iterator()
private var last: Int? = null
override operator fun hasNext() = i.hasNext()
override fun next(): Int = i.next().also { last = it }
override fun remove() {
last?.let { clear(it) } ?: IllegalStateException("hasNext() was not called")
}
}
private fun fastRetainAll(elements: BitSet): Boolean {
var result = false
for (i in bits.indices) {
if (i < elements.bits.size) {
val x = bits[i]
val y = x and elements.bits[i]
if (x != y) {
result = true
bits[i] = y
}
} else {
if (bits[i] != 0L) {
bits[i] = 0
result = true
}
}
}
return result
}
override fun retainAll(elements: Collection<Int>): Boolean {
return if (elements is BitSet)
fastRetainAll(elements)
else {
var value = 0
var result = false
for ((i, _w) in bits.withIndex()) {
var w = _w
for (m in masks) {
if ((w and m) != 0L && value !in elements) {
w = w and m.inv()
bits[i] = w
result = true
}
value++
}
}
result
}
}
override fun removeAll(elements: Collection<Int>): Boolean {
return if (elements is BitSet)
fastRemoveAll(elements)
else {
var value = 0
var result = false
for ((i, _w) in bits.withIndex()) {
var w = _w
for (m in masks) {
if ((w and m) != 0L && value in elements) {
w = w and m.inv()
bits[i] = w
result = true
}
value++
}
}
result
}
}
private fun fastRemoveAll(elements: BitSet): Boolean {
var result = false
for (i in bits.indices) {
if (i < elements.bits.size) {
val x = bits[i]
val y = x and elements.bits[i].inv()
if (x != y) {
bits[i] = y
result = true
}
}
}
println("fast2")
return result
}
override fun remove(element: Int): Boolean = setBit(element, false)
override fun containsAll(elements: Collection<Int>): Boolean {
for (e in elements) if (e !in this) return false
return true
}
fun toIntSet() = toList().toSet()
override fun contains(element: Int): Boolean = getBit(element)
/**
* Check that this set contains and ordinal of a given enum element.
*/
infix fun <E> includes(element: E)
where E : Enum<*> = contains(element.ordinal)
/**
* Check that this set contains all elements using its ordinals.
*/
infix fun <E> includesAll(elements: Collection<E>)
where E : Enum<*> = elements.all { it.ordinal in this }
fun intersect(other: Iterable<Int>): BitSet {
val result = toBitSet()
result.retainAll(other)
println("I: $this /\\ $other = $result")
return result
}
override fun toString(): String {
return toList().toString()
}
/**
* Checks that this set contains at least one element with ordinal
*/
infix inline fun <reified E> includesAny(elements: Collection<E>): Boolean
where E : Enum<E> {
val ords = elements.map { it.ordinal }.toBitSet()
return !ords.intersect(this).isEmpty()
}
/**
* Create an independent copy of this bitset
*/
fun toBitSet() = BitSet(bits.toMutableList())
inline fun <reified T> toEnumSet(): Set<T>
where T : Enum<T> {
val values = enumValues<T>()
val result = mutableSetOf<T>()
for (i in this) result += values[i]
return result
}
/**
* Insert an element of an enum by its ordinal, much like [add].
*
* @return `true` if the element has actually been added, `false` if
* BitSet was not modified.
*/
fun <E> insert(element: E): Boolean
where E : Enum<*> = add(element.ordinal)
/**
* Remove an element of an enum using its ordinal, much like [remove].
*
* @return `true` if the element has actually been removed, `false` if
* BitSet was not modified.
*/
fun <E> delete(element: E): Boolean
where E : Enum<*> = remove(element.ordinal)
/**
* Insert all elements using its ordinals, much like [addAll].
*
* @return `true` if at lease one element has actually been added, `false`
* if BitSet was not modified.
*/
fun <E> insertAll(element: Collection<E>): Boolean
where E : Enum<*> = addAll(element.map { it.ordinal })
/**
* Remove all the elements using its ordinals, much like [removeAll].
*
* @return `true` if at least one element has actually been removed, `false` if
* BitSet was not modified.
*/
fun <E> deleteAll(elements: Collection<E>): Boolean
where E : Enum<*> = removeAll(elements.map { it.ordinal })
/**
* Reduces storage size trying to compact storage. It might free some memory, depending
* on the platform implementation of lists and contents. Does not change stored values.
*/
fun compact() {
while( bits.isNotEmpty() && bits.last() == 0L ) bits.removeLast()
}
override fun hashCode(): Int = bits.hashCode()
override fun equals(other: Any?): Boolean {
if( other is BitSet ) {
compact(); other.compact()
return other.bits == this.bits
}
return toIntSet().equals(
if( other is Set<*>) other
else (other as Collection<*>).toSet()
)
}
companion object {
val masks = Array(64) { (1L shl it) }
val maskNot = masks.map { it.inv() }.toLongArray()
// limit size to ≈ 100kb
const val MAX_VALUE = 8_388_106
}
}
fun bitSetOf(vararg values: Int) = BitSet().apply {
for (i in values) add(i)
}
fun <E : Enum<*>> bitSetOfEnum(vararg values: E) =
BitSet().also {
for (v in values) it.add(v.ordinal)
}
fun Iterable<Int>.toBitSet(): BitSet = BitSet().also { it.addAll(this) }
fun IntArray.toBitSet(): BitSet = BitSet().also { it.addAll(this.asIterable()) }

View File

@ -1,14 +1,17 @@
package bipack
import net.sergeych.bintools.*
import net.sergeych.collections.bitSetOf
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertNull
class StorageTest {
@Test
fun storageTest3() {
val s1 = MemoryKVStorage()
val s2 = defaultNamedStorage("test_mp_bintools2")
s2.clear()
@ -41,4 +44,17 @@ class StorageTest {
assertEquals(42, s1.read("reason"))
assertEquals(42, reason)
}
@Test
fun bitSetEquityTest() {
val a = bitSetOf(1, 12)
val b = bitSetOf(12, 1)
assertEquals(a, b)
assertEquals(b, a)
a += 1230
assertFalse { a == b }
a -= 1230
assertEquals(a, b)
assertEquals(b, a)
}
}

View File

@ -6,8 +6,9 @@ import kotlinx.coroutines.test.advanceTimeBy
import kotlinx.coroutines.test.resetMain
import kotlinx.coroutines.test.runTest
import kotlinx.coroutines.test.setMain
import net.sergeych.collections.ExpirableAsyncCache
import net.sergeych.collections.SortedList
import net.sergeych.bintools.toDump
import net.sergeych.bipack.BipackEncoder
import net.sergeych.collections.*
import kotlin.test.*
import kotlin.time.Duration.Companion.milliseconds
@ -78,4 +79,88 @@ class CollectionsTest {
// delay(1000)
// assertNull(m.get("one"))
}
enum class Nn {
One, Two, Three
}
@Test
fun bitsetTest() {
fun checkAdd(vararg values: Int) {
val x = bitSetOf(*values)
println(":: ${values.toList()}: ${x.toHex()}")
assertEquals(values.toSet(), x.toIntSet())
for (i in values) {
assertTrue(i in x, "failed $i in ${values.toList()}")
}
}
checkAdd(0, 1, 2, 3)
val src = intArrayOf(31, 32, 33, 60, 61, 62, 63, 64, 65)
checkAdd(*src)
assertEquals(src.toSet(), src.toBitSet().toIntSet())
assertFalse { src.toSet() != src.toBitSet() }
assertFalse { src.toSet() + 17 == src.toBitSet() }
assertEquals(src.toBitSet() + 17, src.toSet() + 17, )
assertTrue { src.toSet() + 17 == src.toBitSet() + 17 }
assertTrue { src.toBitSet() + 17 == src.toBitSet() + 17 }
var y = src.toBitSet() + 2
assertTrue { y.retainAll(setOf(1, 3, 31, 32, 33)) }
assertEquals(setOf(31, 32, 33), y.toIntSet())
assertFalse { y.retainAll(setOf(1, 3, 31, 32, 33)) }
y = src.toBitSet() + 2
for (i in setOf(2, 31, 32, 33))
assertTrue(i in y, "failed $i in ${y.toList()}")
assertTrue { y.retainAll(bitSetOf(1, 3, 31, 32, 33)) }
assertEquals(setOf(31, 32, 33), y.toIntSet())
assertFalse { y.retainAll(setOf(1, 3, 31, 32, 33)) }
var z = src.toBitSet() + 2
assertTrue { z.removeAll(setOf(31, 65)) }
assertEquals(listOf(2, 32, 33, 60, 61, 62, 63, 64), z.toList())
assertFalse { z.removeAll(setOf(31, 65)) }
z = src.toBitSet() + 2
assertTrue { z.removeAll(bitSetOf(31, 65)) }
assertEquals(listOf(2, 32, 33, 60, 61, 62, 63, 64), z.toList())
assertFalse { z.removeAll(setOf(31, 65)) }
z = src.toBitSet() + 2
assertTrue { z.removeAll(bitSetOf(31, 32)) }
assertEquals(listOf(2, 33, 60, 61, 62, 63, 64, 65), z.toList())
assertFalse { z.removeAll(setOf(31, 4)) }
assertTrue {
BipackEncoder.encode(src.toSet()).size > BipackEncoder.encode(src.toBitSet()).size
}
assertFalse { z includes Nn.Two }
assertTrue { z includes Nn.Three }
assertTrue { z + 1 includesAll listOf(Nn.Three, Nn.Two) }
assertEquals(setOf(Nn.One, Nn.Three), bitSetOf(0, 2).toEnumSet())
assertTrue { z + 1 includesAny setOf(Nn.One, Nn.Two) }
}
@Test
fun bitsetEnumsTest() {
val a = bitSetOfEnum(Nn.One)
assertTrue { a includes Nn.One }
assertFalse { a includes Nn.Two }
assertFalse { a includes Nn.Three }
a.insert(Nn.Three)
assertEquals(setOf(Nn.One, Nn.Three), a.toEnumSet())
a.delete(Nn.One)
assertEquals(setOf(Nn.Three), a.toEnumSet())
a.insertAll(listOf(Nn.Two, Nn.Three))
assertEquals(setOf(Nn.Two, Nn.Three), a.toEnumSet())
assertTrue { a includesAll listOf(Nn.Two, Nn.Three) }
assertFalse { a includesAll listOf(Nn.One, Nn.Two) }
assertTrue { a includesAny listOf(Nn.One, Nn.Two) }
a.deleteAll(listOf(Nn.Two, Nn.Three, Nn.One))
assertTrue { a.isEmpty() }
assertTrue { a.toEnumSet<Nn>().isEmpty() }
}
}