compressed.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. package hyperloglog
  2. import (
  3. "encoding/binary"
  4. "slices"
  5. )
  6. // Original author of this file is github.com/clarkduvall/hyperloglog
  7. type iterator struct {
  8. i int
  9. last uint32
  10. v *compressedList
  11. }
  12. func (iter *iterator) Next() uint32 {
  13. n, i := iter.v.decode(iter.i, iter.last)
  14. iter.last = n
  15. iter.i = i
  16. return n
  17. }
  18. func (iter *iterator) Peek() (uint32, int) {
  19. return iter.v.decode(iter.i, iter.last)
  20. }
  21. func (iter *iterator) Advance(last uint32, i int) {
  22. iter.last = last
  23. iter.i = i
  24. }
  25. func (iter iterator) HasNext() bool {
  26. return iter.i < iter.v.Len()
  27. }
  28. type compressedList struct {
  29. count uint32
  30. last uint32
  31. b variableLengthList
  32. }
  33. func (v *compressedList) Clone() *compressedList {
  34. if v == nil {
  35. return nil
  36. }
  37. newV := &compressedList{
  38. count: v.count,
  39. last: v.last,
  40. }
  41. newV.b = make(variableLengthList, len(v.b))
  42. copy(newV.b, v.b)
  43. return newV
  44. }
  45. func (v *compressedList) AppendBinary(data []byte) ([]byte, error) {
  46. // At least 4 bytes for the two fixed sized values
  47. data = slices.Grow(data, 4+4)
  48. // Marshal the count and last values.
  49. data = append(data,
  50. // Number of items in the list.
  51. byte(v.count>>24),
  52. byte(v.count>>16),
  53. byte(v.count>>8),
  54. byte(v.count),
  55. // The last item in the list.
  56. byte(v.last>>24),
  57. byte(v.last>>16),
  58. byte(v.last>>8),
  59. byte(v.last),
  60. )
  61. // Append the variableLengthList
  62. return v.b.AppendBinary(data)
  63. }
  64. func (v *compressedList) UnmarshalBinary(data []byte) error {
  65. if len(data) < 12 {
  66. return ErrorTooShort
  67. }
  68. // Set the count.
  69. v.count, data = binary.BigEndian.Uint32(data[:4]), data[4:]
  70. // Set the last value.
  71. v.last, data = binary.BigEndian.Uint32(data[:4]), data[4:]
  72. // Set the list.
  73. sz, data := binary.BigEndian.Uint32(data[:4]), data[4:]
  74. v.b = make([]uint8, sz)
  75. if uint32(len(data)) < sz {
  76. return ErrorTooShort
  77. }
  78. for i := uint32(0); i < sz; i++ {
  79. v.b[i] = data[i]
  80. }
  81. return nil
  82. }
  83. func newCompressedList(capacity int) *compressedList {
  84. v := &compressedList{}
  85. v.b = make(variableLengthList, 0, capacity)
  86. return v
  87. }
  88. func (v *compressedList) Len() int {
  89. return len(v.b)
  90. }
  91. func (v *compressedList) decode(i int, last uint32) (uint32, int) {
  92. n, i := v.b.decode(i)
  93. return n + last, i
  94. }
  95. func (v *compressedList) Append(x uint32) {
  96. v.count++
  97. v.b = v.b.Append(x - v.last)
  98. v.last = x
  99. }
  100. func (v *compressedList) Iter() iterator {
  101. return iterator{0, 0, v}
  102. }
  103. type variableLengthList []uint8
  104. func (v variableLengthList) AppendBinary(data []byte) ([]byte, error) {
  105. // 4 bytes for the size of the list, and a byte for each element in the
  106. // list.
  107. data = slices.Grow(data, 4+len(v))
  108. // Length of the list. We only need 32 bits because the size of the set
  109. // couldn't exceed that on 32 bit architectures.
  110. sz := len(v)
  111. data = append(data,
  112. byte(sz>>24),
  113. byte(sz>>16),
  114. byte(sz>>8),
  115. byte(sz),
  116. )
  117. // Marshal each element in the list.
  118. data = append(data, v...)
  119. return data, nil
  120. }
  121. func (v variableLengthList) decode(i int) (uint32, int) {
  122. var x uint32
  123. j := i
  124. for ; v[j]&0x80 != 0; j++ {
  125. x |= uint32(v[j]&0x7f) << (uint(j-i) * 7)
  126. }
  127. x |= uint32(v[j]) << (uint(j-i) * 7)
  128. return x, j + 1
  129. }
  130. func (v variableLengthList) Append(x uint32) variableLengthList {
  131. for x&0xffffff80 != 0 {
  132. v = append(v, uint8((x&0x7f)|0x80))
  133. x >>= 7
  134. }
  135. return append(v, uint8(x&0x7f))
  136. }