sparse.go 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. package hyperloglog
  2. import (
  3. "math/bits"
  4. "slices"
  5. "github.com/kamstrup/intmap"
  6. )
  7. func getIndex(k uint32, p, pp uint8) uint32 {
  8. if k&1 == 1 {
  9. return bextr32(k, 32-p, p)
  10. }
  11. return bextr32(k, pp-p+1, p)
  12. }
  13. // Encode a hash to be used in the sparse representation.
  14. func encodeHash(x uint64, p, pp uint8) uint32 {
  15. idx := uint32(bextr(x, 64-pp, pp))
  16. if bextr(x, 64-pp, pp-p) == 0 {
  17. zeros := bits.LeadingZeros64((bextr(x, 0, 64-pp)<<pp)|(1<<pp-1)) + 1
  18. return idx<<7 | uint32(zeros<<1) | 1
  19. }
  20. return idx << 1
  21. }
  22. // Decode a hash from the sparse representation.
  23. func decodeHash(k uint32, p, pp uint8) (uint32, uint8) {
  24. var r uint8
  25. if k&1 == 1 {
  26. r = uint8(bextr32(k, 1, 6)) + pp - p
  27. } else {
  28. // We can use the 64bit clz implementation and reduce the result
  29. // by 32 to get a clz for a 32bit word.
  30. r = uint8(bits.LeadingZeros64(uint64(k<<(32-pp+p-1))) - 31) // -32 + 1
  31. }
  32. return getIndex(k, p, pp), r
  33. }
  34. type set struct {
  35. m *intmap.Set[uint32]
  36. }
  37. var nilSet set
  38. func makeSet(size int) set {
  39. return set{m: intmap.NewSet[uint32](size)}
  40. }
  41. func (s set) ForEach(fn func(v uint32)) {
  42. s.m.ForEach(func(v uint32) bool {
  43. fn(v)
  44. return true
  45. })
  46. }
  47. func (s set) Merge(other set) {
  48. other.m.ForEach(func(v uint32) bool {
  49. s.m.Add(v)
  50. return true
  51. })
  52. }
  53. func (s set) Len() int {
  54. return s.m.Len()
  55. }
  56. func (s set) add(v uint32) bool {
  57. return s.m.Add(v)
  58. }
  59. func (s set) Clone() set {
  60. if s == nilSet {
  61. return nilSet
  62. }
  63. newS := intmap.NewSet[uint32](s.m.Len())
  64. s.m.ForEach(func(v uint32) bool {
  65. newS.Add(v)
  66. return true
  67. })
  68. return set{m: newS}
  69. }
  70. func (s *set) AppendBinary(data []byte) ([]byte, error) {
  71. // 4 bytes for the size of the set, and 4 bytes for each key.
  72. // list.
  73. data = slices.Grow(data, 4+(4*s.m.Len()))
  74. // Length of the set. We only need 32 bits because the size of the set
  75. // couldn't exceed that on 32 bit architectures.
  76. sl := s.m.Len()
  77. data = append(data,
  78. byte(sl>>24),
  79. byte(sl>>16),
  80. byte(sl>>8),
  81. byte(sl),
  82. )
  83. // Marshal each element in the set.
  84. s.m.ForEach(func(k uint32) bool {
  85. data = append(data,
  86. byte(k>>24),
  87. byte(k>>16),
  88. byte(k>>8),
  89. byte(k),
  90. )
  91. return true
  92. })
  93. return data, nil
  94. }