| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- // Package intmap contains a fast hashmap implementation for maps with keys of any integer type
- package intmap
- import (
- "iter"
- "math"
- )
- // IntKey is a type constraint for values that can be used as keys in Map
- type IntKey interface {
- ~int | ~uint | ~int64 | ~uint64 | ~int32 | ~uint32 | ~int16 | ~uint16 | ~int8 | ~uint8 | ~uintptr
- }
- // pair represents a key-value pair in Map.
- //
- // It is an important detail that V is before K in the memory layout. Despite it feeling more natural to have K first!
- // We must have sizeof(pair[K,struct{}]) == sizeof(K), to minimize memory consumption when using a Set.
- // If V is last, then &p.V can point to invalid memory, which is not permitted. This makes the Go compiler emit
- // some padding for the pair struct in that case.
- // See https://github.com/kamstrup/intmap/pull/6#issuecomment-3581008879
- type pair[K IntKey, V any] struct {
- V V
- K K
- }
- const fillFactorBase64 = 7
- const fillFactor64 = fillFactorBase64 / 10.0
- func phiMix64(x int) int {
- h := int64(x) * int64(0x9E3779B9)
- return int(h ^ (h >> 16))
- }
- // Map is a hashmap where the keys are some any integer type.
- // It is valid to call methods that read a nil map, similar to a standard Go map.
- // Methods valid on a nil map are Has, Get, Len, and ForEach.
- type Map[K IntKey, V any] struct {
- data []pair[K, V] // key-value pairs
- size int
- zeroVal V // value of 'zero' key
- hasZeroKey bool // do we have 'zero' key in the map?
- }
- // New creates a new map with keys being any integer subtype.
- // The map can store up to the given capacity before reallocation and rehashing occurs.
- func New[K IntKey, V any](capacity int) *Map[K, V] {
- return &Map[K, V]{
- data: make([]pair[K, V], arraySize(capacity, fillFactor64)),
- }
- }
- // Has checks if the given key exists in the map.
- // Calling this method on a nil map will return false.
- func (m *Map[K, V]) Has(key K) bool {
- if m == nil {
- return false
- }
- if key == K(0) {
- return m.hasZeroKey
- }
- idx := m.startIndex(key)
- p := m.data[idx]
- if p.K == K(0) { // end of chain already
- return false
- }
- if p.K == key { // we check zero prior to this call
- return true
- }
- // hash collision, seek next hash match, bailing on first empty
- for {
- idx = m.nextIndex(idx)
- p = m.data[idx]
- if p.K == K(0) {
- return false
- }
- if p.K == key {
- return true
- }
- }
- }
- // Get returns the value if the key is found.
- // If you just need to check for existence it is easier to use Has.
- // Calling this method on a nil map will return the zero value for V and false.
- func (m *Map[K, V]) Get(key K) (V, bool) {
- if m == nil {
- var zero V
- return zero, false
- }
- if key == K(0) {
- if m.hasZeroKey {
- return m.zeroVal, true
- }
- var zero V
- return zero, false
- }
- idx := m.startIndex(key)
- p := m.data[idx]
- if p.K == K(0) { // end of chain already
- var zero V
- return zero, false
- }
- if p.K == key { // we check zero prior to this call
- return p.V, true
- }
- // hash collision, seek next hash match, bailing on first empty
- for {
- idx = m.nextIndex(idx)
- p = m.data[idx]
- if p.K == K(0) {
- var zero V
- return zero, false
- }
- if p.K == key {
- return p.V, true
- }
- }
- }
- // Put adds or updates key with value val.
- func (m *Map[K, V]) Put(key K, val V) {
- if key == K(0) {
- if !m.hasZeroKey {
- m.size++
- }
- m.zeroVal = val
- m.hasZeroKey = true
- return
- }
- idx := m.startIndex(key)
- p := &m.data[idx]
- if p.K == K(0) { // end of chain already
- p.K = key
- p.V = val
- if m.size >= m.sizeThreshold() {
- m.rehash()
- } else {
- m.size++
- }
- return
- } else if p.K == key { // overwrite existing value
- p.V = val
- return
- }
- // hash collision, seek next empty or key match
- for {
- idx = m.nextIndex(idx)
- p = &m.data[idx]
- if p.K == K(0) {
- p.K = key
- p.V = val
- if m.size >= m.sizeThreshold() {
- m.rehash()
- } else {
- m.size++
- }
- return
- } else if p.K == key {
- p.V = val
- return
- }
- }
- }
- // PutIfNotExists adds the key-value pair only if the key does not already exist
- // in the map, and returns the current value associated with the key and a boolean
- // indicating whether the value was newly added or not.
- func (m *Map[K, V]) PutIfNotExists(key K, val V) (V, bool) {
- if key == K(0) {
- if m.hasZeroKey {
- return m.zeroVal, false
- }
- m.zeroVal = val
- m.hasZeroKey = true
- m.size++
- return val, true
- }
- idx := m.startIndex(key)
- p := &m.data[idx]
- if p.K == K(0) { // end of chain already
- p.K = key
- p.V = val
- m.size++
- if m.size >= m.sizeThreshold() {
- m.rehash()
- }
- return val, true
- } else if p.K == key {
- return p.V, false
- }
- // hash collision, seek next hash match, bailing on first empty
- for {
- idx = m.nextIndex(idx)
- p = &m.data[idx]
- if p.K == K(0) {
- p.K = key
- p.V = val
- m.size++
- if m.size >= m.sizeThreshold() {
- m.rehash()
- }
- return val, true
- } else if p.K == key {
- return p.V, false
- }
- }
- }
- // ForEach iterates through key-value pairs in the map while the function f returns true.
- // This method returns immediately if invoked on a nil map.
- //
- // The iteration order of a Map is not defined, so please avoid relying on it.
- func (m *Map[K, V]) ForEach(f func(K, V) bool) {
- if m == nil {
- return
- }
- if m.hasZeroKey && !f(K(0), m.zeroVal) {
- return
- }
- forEach64(m.data, f)
- }
- // All returns an iterator over key-value pairs from m.
- // The iterator returns immediately if invoked on a nil map.
- //
- // The iteration order of a Map is not defined, so please avoid relying on it.
- func (m *Map[K, V]) All() iter.Seq2[K, V] {
- return m.ForEach
- }
- // Keys returns an iterator over keys in m.
- // The iterator returns immediately if invoked on a nil map.
- //
- // The iteration order of a Map is not defined, so please avoid relying on it.
- func (m *Map[K, V]) Keys() iter.Seq[K] {
- return func(yield func(k K) bool) {
- if m == nil {
- return
- }
- if m.hasZeroKey && !yield(K(0)) {
- return
- }
- for _, p := range m.data {
- if p.K != K(0) && !yield(p.K) {
- return
- }
- }
- }
- }
- // Values returns an iterator over values in m.
- // The iterator returns immediately if invoked on a nil map.
- //
- // The iteration order of a Map is not defined, so please avoid relying on it.
- func (m *Map[K, V]) Values() iter.Seq[V] {
- return func(yield func(v V) bool) {
- if m == nil {
- return
- }
- if m.hasZeroKey && !yield(m.zeroVal) {
- return
- }
- for _, p := range m.data {
- if p.K != K(0) && !yield(p.V) {
- return
- }
- }
- }
- }
- // Clear removes all items from the map, but keeps the internal buffers for reuse.
- func (m *Map[K, V]) Clear() {
- var zero V
- m.hasZeroKey = false
- m.zeroVal = zero
- // compiles down to runtime.memclr()
- for i := range m.data {
- m.data[i] = pair[K, V]{}
- }
- m.size = 0
- }
- func (m *Map[K, V]) rehash() {
- oldData := m.data
- m.data = make([]pair[K, V], 2*len(m.data))
- // reset size
- if m.hasZeroKey {
- m.size = 1
- } else {
- m.size = 0
- }
- forEach64(oldData, func(k K, v V) bool {
- m.Put(k, v)
- return true
- })
- }
- // Len returns the number of elements in the map.
- // The length of a nil map is defined to be zero.
- func (m *Map[K, V]) Len() int {
- if m == nil {
- return 0
- }
- return m.size
- }
- func (m *Map[K, V]) sizeThreshold() int {
- return int(uint64(len(m.data)) * fillFactorBase64 / 10)
- }
- func (m *Map[K, V]) startIndex(key K) int {
- return startIndex(int(key), len(m.data))
- }
- func (m *Map[K, V]) nextIndex(idx int) int {
- return nextIndex(idx, len(m.data))
- }
- func forEach64[K IntKey, V any](pairs []pair[K, V], f func(k K, v V) bool) {
- for _, p := range pairs {
- if p.K != K(0) && !f(p.K, p.V) {
- return
- }
- }
- }
- // Del deletes a key and its value, returning true iff the key was found
- func (m *Map[K, V]) Del(key K) bool {
- if key == K(0) {
- if m.hasZeroKey {
- m.hasZeroKey = false
- m.size--
- return true
- }
- return false
- }
- idx := m.startIndex(key)
- p := m.data[idx]
- if p.K == key {
- // any keys that were pushed back needs to be shifted nack into the empty slot
- // to avoid breaking the chain
- m.shiftKeys(idx)
- m.size--
- return true
- } else if p.K == K(0) { // end of chain already
- return false
- }
- for {
- idx = m.nextIndex(idx)
- p = m.data[idx]
- if p.K == key {
- // any keys that were pushed back needs to be shifted nack into the empty slot
- // to avoid breaking the chain
- m.shiftKeys(idx)
- m.size--
- return true
- } else if p.K == K(0) {
- return false
- }
- }
- }
- func (m *Map[K, V]) shiftKeys(idx int) int {
- // Shift entries with the same hash.
- // We need to do this on deletion to ensure we don't have zeroes in the hash chain
- for {
- var p pair[K, V]
- lastIdx := idx
- idx = m.nextIndex(idx)
- for {
- p = m.data[idx]
- if p.K == K(0) {
- m.data[lastIdx] = pair[K, V]{}
- return lastIdx
- }
- slot := m.startIndex(p.K)
- if lastIdx <= idx {
- if lastIdx >= slot || slot > idx {
- break
- }
- } else {
- if lastIdx >= slot && slot > idx {
- break
- }
- }
- idx = m.nextIndex(idx)
- }
- m.data[lastIdx] = p
- }
- }
- func nextPowerOf2(x uint32) uint32 {
- if x == math.MaxUint32 {
- return x
- }
- if x == 0 {
- return 1
- }
- x--
- x |= x >> 1
- x |= x >> 2
- x |= x >> 4
- x |= x >> 8
- x |= x >> 16
- return x + 1
- }
- func arraySize(exp int, fill float64) int {
- s := nextPowerOf2(uint32(math.Ceil(float64(exp) / fill)))
- if s < 2 {
- s = 2
- }
- return int(s)
- }
- func startIndex(key, len int) int {
- return phiMix64(key) & (len - 1)
- }
- func nextIndex(idx, len int) int {
- return (idx + 1) & (len - 1)
- }
|