map64.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. // Package intmap contains a fast hashmap implementation for maps with keys of any integer type
  2. package intmap
  3. import (
  4. "iter"
  5. "math"
  6. )
  7. // IntKey is a type constraint for values that can be used as keys in Map
  8. type IntKey interface {
  9. ~int | ~uint | ~int64 | ~uint64 | ~int32 | ~uint32 | ~int16 | ~uint16 | ~int8 | ~uint8 | ~uintptr
  10. }
  11. // pair represents a key-value pair in Map.
  12. //
  13. // It is an important detail that V is before K in the memory layout. Despite it feeling more natural to have K first!
  14. // We must have sizeof(pair[K,struct{}]) == sizeof(K), to minimize memory consumption when using a Set.
  15. // If V is last, then &p.V can point to invalid memory, which is not permitted. This makes the Go compiler emit
  16. // some padding for the pair struct in that case.
  17. // See https://github.com/kamstrup/intmap/pull/6#issuecomment-3581008879
  18. type pair[K IntKey, V any] struct {
  19. V V
  20. K K
  21. }
  22. const fillFactorBase64 = 7
  23. const fillFactor64 = fillFactorBase64 / 10.0
  24. func phiMix64(x int) int {
  25. h := int64(x) * int64(0x9E3779B9)
  26. return int(h ^ (h >> 16))
  27. }
  28. // Map is a hashmap where the keys are some any integer type.
  29. // It is valid to call methods that read a nil map, similar to a standard Go map.
  30. // Methods valid on a nil map are Has, Get, Len, and ForEach.
  31. type Map[K IntKey, V any] struct {
  32. data []pair[K, V] // key-value pairs
  33. size int
  34. zeroVal V // value of 'zero' key
  35. hasZeroKey bool // do we have 'zero' key in the map?
  36. }
  37. // New creates a new map with keys being any integer subtype.
  38. // The map can store up to the given capacity before reallocation and rehashing occurs.
  39. func New[K IntKey, V any](capacity int) *Map[K, V] {
  40. return &Map[K, V]{
  41. data: make([]pair[K, V], arraySize(capacity, fillFactor64)),
  42. }
  43. }
  44. // Has checks if the given key exists in the map.
  45. // Calling this method on a nil map will return false.
  46. func (m *Map[K, V]) Has(key K) bool {
  47. if m == nil {
  48. return false
  49. }
  50. if key == K(0) {
  51. return m.hasZeroKey
  52. }
  53. idx := m.startIndex(key)
  54. p := m.data[idx]
  55. if p.K == K(0) { // end of chain already
  56. return false
  57. }
  58. if p.K == key { // we check zero prior to this call
  59. return true
  60. }
  61. // hash collision, seek next hash match, bailing on first empty
  62. for {
  63. idx = m.nextIndex(idx)
  64. p = m.data[idx]
  65. if p.K == K(0) {
  66. return false
  67. }
  68. if p.K == key {
  69. return true
  70. }
  71. }
  72. }
  73. // Get returns the value if the key is found.
  74. // If you just need to check for existence it is easier to use Has.
  75. // Calling this method on a nil map will return the zero value for V and false.
  76. func (m *Map[K, V]) Get(key K) (V, bool) {
  77. if m == nil {
  78. var zero V
  79. return zero, false
  80. }
  81. if key == K(0) {
  82. if m.hasZeroKey {
  83. return m.zeroVal, true
  84. }
  85. var zero V
  86. return zero, false
  87. }
  88. idx := m.startIndex(key)
  89. p := m.data[idx]
  90. if p.K == K(0) { // end of chain already
  91. var zero V
  92. return zero, false
  93. }
  94. if p.K == key { // we check zero prior to this call
  95. return p.V, true
  96. }
  97. // hash collision, seek next hash match, bailing on first empty
  98. for {
  99. idx = m.nextIndex(idx)
  100. p = m.data[idx]
  101. if p.K == K(0) {
  102. var zero V
  103. return zero, false
  104. }
  105. if p.K == key {
  106. return p.V, true
  107. }
  108. }
  109. }
  110. // Put adds or updates key with value val.
  111. func (m *Map[K, V]) Put(key K, val V) {
  112. if key == K(0) {
  113. if !m.hasZeroKey {
  114. m.size++
  115. }
  116. m.zeroVal = val
  117. m.hasZeroKey = true
  118. return
  119. }
  120. idx := m.startIndex(key)
  121. p := &m.data[idx]
  122. if p.K == K(0) { // end of chain already
  123. p.K = key
  124. p.V = val
  125. if m.size >= m.sizeThreshold() {
  126. m.rehash()
  127. } else {
  128. m.size++
  129. }
  130. return
  131. } else if p.K == key { // overwrite existing value
  132. p.V = val
  133. return
  134. }
  135. // hash collision, seek next empty or key match
  136. for {
  137. idx = m.nextIndex(idx)
  138. p = &m.data[idx]
  139. if p.K == K(0) {
  140. p.K = key
  141. p.V = val
  142. if m.size >= m.sizeThreshold() {
  143. m.rehash()
  144. } else {
  145. m.size++
  146. }
  147. return
  148. } else if p.K == key {
  149. p.V = val
  150. return
  151. }
  152. }
  153. }
  154. // PutIfNotExists adds the key-value pair only if the key does not already exist
  155. // in the map, and returns the current value associated with the key and a boolean
  156. // indicating whether the value was newly added or not.
  157. func (m *Map[K, V]) PutIfNotExists(key K, val V) (V, bool) {
  158. if key == K(0) {
  159. if m.hasZeroKey {
  160. return m.zeroVal, false
  161. }
  162. m.zeroVal = val
  163. m.hasZeroKey = true
  164. m.size++
  165. return val, true
  166. }
  167. idx := m.startIndex(key)
  168. p := &m.data[idx]
  169. if p.K == K(0) { // end of chain already
  170. p.K = key
  171. p.V = val
  172. m.size++
  173. if m.size >= m.sizeThreshold() {
  174. m.rehash()
  175. }
  176. return val, true
  177. } else if p.K == key {
  178. return p.V, false
  179. }
  180. // hash collision, seek next hash match, bailing on first empty
  181. for {
  182. idx = m.nextIndex(idx)
  183. p = &m.data[idx]
  184. if p.K == K(0) {
  185. p.K = key
  186. p.V = val
  187. m.size++
  188. if m.size >= m.sizeThreshold() {
  189. m.rehash()
  190. }
  191. return val, true
  192. } else if p.K == key {
  193. return p.V, false
  194. }
  195. }
  196. }
  197. // ForEach iterates through key-value pairs in the map while the function f returns true.
  198. // This method returns immediately if invoked on a nil map.
  199. //
  200. // The iteration order of a Map is not defined, so please avoid relying on it.
  201. func (m *Map[K, V]) ForEach(f func(K, V) bool) {
  202. if m == nil {
  203. return
  204. }
  205. if m.hasZeroKey && !f(K(0), m.zeroVal) {
  206. return
  207. }
  208. forEach64(m.data, f)
  209. }
  210. // All returns an iterator over key-value pairs from m.
  211. // The iterator returns immediately if invoked on a nil map.
  212. //
  213. // The iteration order of a Map is not defined, so please avoid relying on it.
  214. func (m *Map[K, V]) All() iter.Seq2[K, V] {
  215. return m.ForEach
  216. }
  217. // Keys returns an iterator over keys in m.
  218. // The iterator returns immediately if invoked on a nil map.
  219. //
  220. // The iteration order of a Map is not defined, so please avoid relying on it.
  221. func (m *Map[K, V]) Keys() iter.Seq[K] {
  222. return func(yield func(k K) bool) {
  223. if m == nil {
  224. return
  225. }
  226. if m.hasZeroKey && !yield(K(0)) {
  227. return
  228. }
  229. for _, p := range m.data {
  230. if p.K != K(0) && !yield(p.K) {
  231. return
  232. }
  233. }
  234. }
  235. }
  236. // Values returns an iterator over values in m.
  237. // The iterator returns immediately if invoked on a nil map.
  238. //
  239. // The iteration order of a Map is not defined, so please avoid relying on it.
  240. func (m *Map[K, V]) Values() iter.Seq[V] {
  241. return func(yield func(v V) bool) {
  242. if m == nil {
  243. return
  244. }
  245. if m.hasZeroKey && !yield(m.zeroVal) {
  246. return
  247. }
  248. for _, p := range m.data {
  249. if p.K != K(0) && !yield(p.V) {
  250. return
  251. }
  252. }
  253. }
  254. }
  255. // Clear removes all items from the map, but keeps the internal buffers for reuse.
  256. func (m *Map[K, V]) Clear() {
  257. var zero V
  258. m.hasZeroKey = false
  259. m.zeroVal = zero
  260. // compiles down to runtime.memclr()
  261. for i := range m.data {
  262. m.data[i] = pair[K, V]{}
  263. }
  264. m.size = 0
  265. }
  266. func (m *Map[K, V]) rehash() {
  267. oldData := m.data
  268. m.data = make([]pair[K, V], 2*len(m.data))
  269. // reset size
  270. if m.hasZeroKey {
  271. m.size = 1
  272. } else {
  273. m.size = 0
  274. }
  275. forEach64(oldData, func(k K, v V) bool {
  276. m.Put(k, v)
  277. return true
  278. })
  279. }
  280. // Len returns the number of elements in the map.
  281. // The length of a nil map is defined to be zero.
  282. func (m *Map[K, V]) Len() int {
  283. if m == nil {
  284. return 0
  285. }
  286. return m.size
  287. }
  288. func (m *Map[K, V]) sizeThreshold() int {
  289. return int(uint64(len(m.data)) * fillFactorBase64 / 10)
  290. }
  291. func (m *Map[K, V]) startIndex(key K) int {
  292. return startIndex(int(key), len(m.data))
  293. }
  294. func (m *Map[K, V]) nextIndex(idx int) int {
  295. return nextIndex(idx, len(m.data))
  296. }
  297. func forEach64[K IntKey, V any](pairs []pair[K, V], f func(k K, v V) bool) {
  298. for _, p := range pairs {
  299. if p.K != K(0) && !f(p.K, p.V) {
  300. return
  301. }
  302. }
  303. }
  304. // Del deletes a key and its value, returning true iff the key was found
  305. func (m *Map[K, V]) Del(key K) bool {
  306. if key == K(0) {
  307. if m.hasZeroKey {
  308. m.hasZeroKey = false
  309. m.size--
  310. return true
  311. }
  312. return false
  313. }
  314. idx := m.startIndex(key)
  315. p := m.data[idx]
  316. if p.K == key {
  317. // any keys that were pushed back needs to be shifted nack into the empty slot
  318. // to avoid breaking the chain
  319. m.shiftKeys(idx)
  320. m.size--
  321. return true
  322. } else if p.K == K(0) { // end of chain already
  323. return false
  324. }
  325. for {
  326. idx = m.nextIndex(idx)
  327. p = m.data[idx]
  328. if p.K == key {
  329. // any keys that were pushed back needs to be shifted nack into the empty slot
  330. // to avoid breaking the chain
  331. m.shiftKeys(idx)
  332. m.size--
  333. return true
  334. } else if p.K == K(0) {
  335. return false
  336. }
  337. }
  338. }
  339. func (m *Map[K, V]) shiftKeys(idx int) int {
  340. // Shift entries with the same hash.
  341. // We need to do this on deletion to ensure we don't have zeroes in the hash chain
  342. for {
  343. var p pair[K, V]
  344. lastIdx := idx
  345. idx = m.nextIndex(idx)
  346. for {
  347. p = m.data[idx]
  348. if p.K == K(0) {
  349. m.data[lastIdx] = pair[K, V]{}
  350. return lastIdx
  351. }
  352. slot := m.startIndex(p.K)
  353. if lastIdx <= idx {
  354. if lastIdx >= slot || slot > idx {
  355. break
  356. }
  357. } else {
  358. if lastIdx >= slot && slot > idx {
  359. break
  360. }
  361. }
  362. idx = m.nextIndex(idx)
  363. }
  364. m.data[lastIdx] = p
  365. }
  366. }
  367. func nextPowerOf2(x uint32) uint32 {
  368. if x == math.MaxUint32 {
  369. return x
  370. }
  371. if x == 0 {
  372. return 1
  373. }
  374. x--
  375. x |= x >> 1
  376. x |= x >> 2
  377. x |= x >> 4
  378. x |= x >> 8
  379. x |= x >> 16
  380. return x + 1
  381. }
  382. func arraySize(exp int, fill float64) int {
  383. s := nextPowerOf2(uint32(math.Ceil(float64(exp) / fill)))
  384. if s < 2 {
  385. s = 2
  386. }
  387. return int(s)
  388. }
  389. func startIndex(key, len int) int {
  390. return phiMix64(key) & (len - 1)
  391. }
  392. func nextIndex(idx, len int) int {
  393. return (idx + 1) & (len - 1)
  394. }