murmur.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*
  2. The bloom library relied on the excellent murmur library
  3. by Sébastien Paolacci. Unfortunately, it involved some heap
  4. allocation. We want to avoid any heap allocation whatsoever
  5. in the hashing process. To preserve backward compatibility, we roll
  6. our own hashing functions. They are designed to be strictly equivalent
  7. to Paolacci's implementation.
  8. License on original code:
  9. Copyright 2013, Sébastien Paolacci.
  10. All rights reserved.
  11. Redistribution and use in source and binary forms, with or without
  12. modification, are permitted provided that the following conditions are met:
  13. * Redistributions of source code must retain the above copyright
  14. notice, this list of conditions and the following disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. notice, this list of conditions and the following disclaimer in the
  17. documentation and/or other materials provided with the distribution.
  18. * Neither the name of the library nor the
  19. names of its contributors may be used to endorse or promote products
  20. derived from this software without specific prior written permission.
  21. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  22. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  23. WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24. DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
  25. DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26. (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27. LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  28. ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. package bloom
  33. import (
  34. "math/bits"
  35. "unsafe"
  36. )
  37. const (
  38. c1_128 = 0x87c37b91114253d5
  39. c2_128 = 0x4cf5ad432745937f
  40. block_size = 16
  41. )
  42. // digest128 represents a partial evaluation of a 128 bites hash.
  43. type digest128 struct {
  44. h1 uint64 // Unfinalized running hash part 1.
  45. h2 uint64 // Unfinalized running hash part 2.
  46. }
  47. // bmix will hash blocks (16 bytes)
  48. func (d *digest128) bmix(p []byte) {
  49. nblocks := len(p) / block_size
  50. for i := 0; i < nblocks; i++ {
  51. t := (*[2]uint64)(unsafe.Pointer(&p[i*block_size]))
  52. k1, k2 := t[0], t[1]
  53. d.bmix_words(k1, k2)
  54. }
  55. }
  56. // bmix_words will hash two 64-bit words (16 bytes)
  57. func (d *digest128) bmix_words(k1, k2 uint64) {
  58. h1, h2 := d.h1, d.h2
  59. k1 *= c1_128
  60. k1 = bits.RotateLeft64(k1, 31)
  61. k1 *= c2_128
  62. h1 ^= k1
  63. h1 = bits.RotateLeft64(h1, 27)
  64. h1 += h2
  65. h1 = h1*5 + 0x52dce729
  66. k2 *= c2_128
  67. k2 = bits.RotateLeft64(k2, 33)
  68. k2 *= c1_128
  69. h2 ^= k2
  70. h2 = bits.RotateLeft64(h2, 31)
  71. h2 += h1
  72. h2 = h2*5 + 0x38495ab5
  73. d.h1, d.h2 = h1, h2
  74. }
  75. // sum128 computers two 64-bit hash value. It is assumed that
  76. // bmix was first called on the data to process complete blocks
  77. // of 16 bytes. The 'tail' is a slice representing the 'tail' (leftover
  78. // elements, fewer than 16). If pad_tail is true, we make it seem like
  79. // there is an extra element with value 1 appended to the tail.
  80. // The length parameter represents the full length of the data (including
  81. // the blocks of 16 bytes, and, if pad_tail is true, an extra byte).
  82. func (d *digest128) sum128(pad_tail bool, length uint, tail []byte) (h1, h2 uint64) {
  83. h1, h2 = d.h1, d.h2
  84. var k1, k2 uint64
  85. if pad_tail {
  86. switch (len(tail) + 1) & 15 {
  87. case 15:
  88. k2 ^= uint64(1) << 48
  89. break
  90. case 14:
  91. k2 ^= uint64(1) << 40
  92. break
  93. case 13:
  94. k2 ^= uint64(1) << 32
  95. break
  96. case 12:
  97. k2 ^= uint64(1) << 24
  98. break
  99. case 11:
  100. k2 ^= uint64(1) << 16
  101. break
  102. case 10:
  103. k2 ^= uint64(1) << 8
  104. break
  105. case 9:
  106. k2 ^= uint64(1) << 0
  107. k2 *= c2_128
  108. k2 = bits.RotateLeft64(k2, 33)
  109. k2 *= c1_128
  110. h2 ^= k2
  111. break
  112. case 8:
  113. k1 ^= uint64(1) << 56
  114. break
  115. case 7:
  116. k1 ^= uint64(1) << 48
  117. break
  118. case 6:
  119. k1 ^= uint64(1) << 40
  120. break
  121. case 5:
  122. k1 ^= uint64(1) << 32
  123. break
  124. case 4:
  125. k1 ^= uint64(1) << 24
  126. break
  127. case 3:
  128. k1 ^= uint64(1) << 16
  129. break
  130. case 2:
  131. k1 ^= uint64(1) << 8
  132. break
  133. case 1:
  134. k1 ^= uint64(1) << 0
  135. k1 *= c1_128
  136. k1 = bits.RotateLeft64(k1, 31)
  137. k1 *= c2_128
  138. h1 ^= k1
  139. }
  140. }
  141. switch len(tail) & 15 {
  142. case 15:
  143. k2 ^= uint64(tail[14]) << 48
  144. fallthrough
  145. case 14:
  146. k2 ^= uint64(tail[13]) << 40
  147. fallthrough
  148. case 13:
  149. k2 ^= uint64(tail[12]) << 32
  150. fallthrough
  151. case 12:
  152. k2 ^= uint64(tail[11]) << 24
  153. fallthrough
  154. case 11:
  155. k2 ^= uint64(tail[10]) << 16
  156. fallthrough
  157. case 10:
  158. k2 ^= uint64(tail[9]) << 8
  159. fallthrough
  160. case 9:
  161. k2 ^= uint64(tail[8]) << 0
  162. k2 *= c2_128
  163. k2 = bits.RotateLeft64(k2, 33)
  164. k2 *= c1_128
  165. h2 ^= k2
  166. fallthrough
  167. case 8:
  168. k1 ^= uint64(tail[7]) << 56
  169. fallthrough
  170. case 7:
  171. k1 ^= uint64(tail[6]) << 48
  172. fallthrough
  173. case 6:
  174. k1 ^= uint64(tail[5]) << 40
  175. fallthrough
  176. case 5:
  177. k1 ^= uint64(tail[4]) << 32
  178. fallthrough
  179. case 4:
  180. k1 ^= uint64(tail[3]) << 24
  181. fallthrough
  182. case 3:
  183. k1 ^= uint64(tail[2]) << 16
  184. fallthrough
  185. case 2:
  186. k1 ^= uint64(tail[1]) << 8
  187. fallthrough
  188. case 1:
  189. k1 ^= uint64(tail[0]) << 0
  190. k1 *= c1_128
  191. k1 = bits.RotateLeft64(k1, 31)
  192. k1 *= c2_128
  193. h1 ^= k1
  194. }
  195. h1 ^= uint64(length)
  196. h2 ^= uint64(length)
  197. h1 += h2
  198. h2 += h1
  199. h1 = fmix64(h1)
  200. h2 = fmix64(h2)
  201. h1 += h2
  202. h2 += h1
  203. return h1, h2
  204. }
  205. func fmix64(k uint64) uint64 {
  206. k ^= k >> 33
  207. k *= 0xff51afd7ed558ccd
  208. k ^= k >> 33
  209. k *= 0xc4ceb9fe1a85ec53
  210. k ^= k >> 33
  211. return k
  212. }
  213. // sum256 will compute 4 64-bit hash values from the input.
  214. // It is designed to never allocate memory on the heap. So it
  215. // works without any byte buffer whatsoever.
  216. // It is designed to be strictly equivalent to
  217. //
  218. // a1 := []byte{1}
  219. // hasher := murmur3.New128()
  220. // hasher.Write(data) // #nosec
  221. // v1, v2 := hasher.Sum128()
  222. // hasher.Write(a1) // #nosec
  223. // v3, v4 := hasher.Sum128()
  224. //
  225. // See TestHashRandom.
  226. func (d *digest128) sum256(data []byte) (hash1, hash2, hash3, hash4 uint64) {
  227. // We always start from zero.
  228. d.h1, d.h2 = 0, 0
  229. // Process as many bytes as possible.
  230. d.bmix(data)
  231. // We have enough to compute the first two 64-bit numbers
  232. length := uint(len(data))
  233. tail_length := length % block_size
  234. tail := data[length-tail_length:]
  235. hash1, hash2 = d.sum128(false, length, tail)
  236. // Next we want to 'virtually' append 1 to the input, but,
  237. // we do not want to append to an actual array!!!
  238. if tail_length+1 == block_size {
  239. // We are left with no tail!!!
  240. word1 := *(*uint64)(unsafe.Pointer(&tail[0]))
  241. word2 := uint64(*(*uint32)(unsafe.Pointer(&tail[8])))
  242. word2 = word2 | (uint64(tail[12]) << 32) | (uint64(tail[13]) << 40) | (uint64(tail[14]) << 48)
  243. // We append 1.
  244. word2 = word2 | (uint64(1) << 56)
  245. // We process the resulting 2 words.
  246. d.bmix_words(word1, word2)
  247. tail := data[length:] // empty slice, deliberate.
  248. hash3, hash4 = d.sum128(false, length+1, tail)
  249. } else {
  250. // We still have a tail (fewer than 15 bytes) but we
  251. // need to append '1' to it.
  252. hash3, hash4 = d.sum128(true, length+1, tail)
  253. }
  254. return hash1, hash2, hash3, hash4
  255. }