| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- /*
- The bloom library relied on the excellent murmur library
- by Sébastien Paolacci. Unfortunately, it involved some heap
- allocation. We want to avoid any heap allocation whatsoever
- in the hashing process. To preserve backward compatibility, we roll
- our own hashing functions. They are designed to be strictly equivalent
- to Paolacci's implementation.
- License on original code:
- Copyright 2013, Sébastien Paolacci.
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the library nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- package bloom
- import (
- "math/bits"
- "unsafe"
- )
- const (
- c1_128 = 0x87c37b91114253d5
- c2_128 = 0x4cf5ad432745937f
- block_size = 16
- )
- // digest128 represents a partial evaluation of a 128 bites hash.
- type digest128 struct {
- h1 uint64 // Unfinalized running hash part 1.
- h2 uint64 // Unfinalized running hash part 2.
- }
- // bmix will hash blocks (16 bytes)
- func (d *digest128) bmix(p []byte) {
- nblocks := len(p) / block_size
- for i := 0; i < nblocks; i++ {
- t := (*[2]uint64)(unsafe.Pointer(&p[i*block_size]))
- k1, k2 := t[0], t[1]
- d.bmix_words(k1, k2)
- }
- }
- // bmix_words will hash two 64-bit words (16 bytes)
- func (d *digest128) bmix_words(k1, k2 uint64) {
- h1, h2 := d.h1, d.h2
- k1 *= c1_128
- k1 = bits.RotateLeft64(k1, 31)
- k1 *= c2_128
- h1 ^= k1
- h1 = bits.RotateLeft64(h1, 27)
- h1 += h2
- h1 = h1*5 + 0x52dce729
- k2 *= c2_128
- k2 = bits.RotateLeft64(k2, 33)
- k2 *= c1_128
- h2 ^= k2
- h2 = bits.RotateLeft64(h2, 31)
- h2 += h1
- h2 = h2*5 + 0x38495ab5
- d.h1, d.h2 = h1, h2
- }
- // sum128 computers two 64-bit hash value. It is assumed that
- // bmix was first called on the data to process complete blocks
- // of 16 bytes. The 'tail' is a slice representing the 'tail' (leftover
- // elements, fewer than 16). If pad_tail is true, we make it seem like
- // there is an extra element with value 1 appended to the tail.
- // The length parameter represents the full length of the data (including
- // the blocks of 16 bytes, and, if pad_tail is true, an extra byte).
- func (d *digest128) sum128(pad_tail bool, length uint, tail []byte) (h1, h2 uint64) {
- h1, h2 = d.h1, d.h2
- var k1, k2 uint64
- if pad_tail {
- switch (len(tail) + 1) & 15 {
- case 15:
- k2 ^= uint64(1) << 48
- break
- case 14:
- k2 ^= uint64(1) << 40
- break
- case 13:
- k2 ^= uint64(1) << 32
- break
- case 12:
- k2 ^= uint64(1) << 24
- break
- case 11:
- k2 ^= uint64(1) << 16
- break
- case 10:
- k2 ^= uint64(1) << 8
- break
- case 9:
- k2 ^= uint64(1) << 0
- k2 *= c2_128
- k2 = bits.RotateLeft64(k2, 33)
- k2 *= c1_128
- h2 ^= k2
- break
- case 8:
- k1 ^= uint64(1) << 56
- break
- case 7:
- k1 ^= uint64(1) << 48
- break
- case 6:
- k1 ^= uint64(1) << 40
- break
- case 5:
- k1 ^= uint64(1) << 32
- break
- case 4:
- k1 ^= uint64(1) << 24
- break
- case 3:
- k1 ^= uint64(1) << 16
- break
- case 2:
- k1 ^= uint64(1) << 8
- break
- case 1:
- k1 ^= uint64(1) << 0
- k1 *= c1_128
- k1 = bits.RotateLeft64(k1, 31)
- k1 *= c2_128
- h1 ^= k1
- }
- }
- switch len(tail) & 15 {
- case 15:
- k2 ^= uint64(tail[14]) << 48
- fallthrough
- case 14:
- k2 ^= uint64(tail[13]) << 40
- fallthrough
- case 13:
- k2 ^= uint64(tail[12]) << 32
- fallthrough
- case 12:
- k2 ^= uint64(tail[11]) << 24
- fallthrough
- case 11:
- k2 ^= uint64(tail[10]) << 16
- fallthrough
- case 10:
- k2 ^= uint64(tail[9]) << 8
- fallthrough
- case 9:
- k2 ^= uint64(tail[8]) << 0
- k2 *= c2_128
- k2 = bits.RotateLeft64(k2, 33)
- k2 *= c1_128
- h2 ^= k2
- fallthrough
- case 8:
- k1 ^= uint64(tail[7]) << 56
- fallthrough
- case 7:
- k1 ^= uint64(tail[6]) << 48
- fallthrough
- case 6:
- k1 ^= uint64(tail[5]) << 40
- fallthrough
- case 5:
- k1 ^= uint64(tail[4]) << 32
- fallthrough
- case 4:
- k1 ^= uint64(tail[3]) << 24
- fallthrough
- case 3:
- k1 ^= uint64(tail[2]) << 16
- fallthrough
- case 2:
- k1 ^= uint64(tail[1]) << 8
- fallthrough
- case 1:
- k1 ^= uint64(tail[0]) << 0
- k1 *= c1_128
- k1 = bits.RotateLeft64(k1, 31)
- k1 *= c2_128
- h1 ^= k1
- }
- h1 ^= uint64(length)
- h2 ^= uint64(length)
- h1 += h2
- h2 += h1
- h1 = fmix64(h1)
- h2 = fmix64(h2)
- h1 += h2
- h2 += h1
- return h1, h2
- }
- func fmix64(k uint64) uint64 {
- k ^= k >> 33
- k *= 0xff51afd7ed558ccd
- k ^= k >> 33
- k *= 0xc4ceb9fe1a85ec53
- k ^= k >> 33
- return k
- }
- // sum256 will compute 4 64-bit hash values from the input.
- // It is designed to never allocate memory on the heap. So it
- // works without any byte buffer whatsoever.
- // It is designed to be strictly equivalent to
- //
- // a1 := []byte{1}
- // hasher := murmur3.New128()
- // hasher.Write(data) // #nosec
- // v1, v2 := hasher.Sum128()
- // hasher.Write(a1) // #nosec
- // v3, v4 := hasher.Sum128()
- //
- // See TestHashRandom.
- func (d *digest128) sum256(data []byte) (hash1, hash2, hash3, hash4 uint64) {
- // We always start from zero.
- d.h1, d.h2 = 0, 0
- // Process as many bytes as possible.
- d.bmix(data)
- // We have enough to compute the first two 64-bit numbers
- length := uint(len(data))
- tail_length := length % block_size
- tail := data[length-tail_length:]
- hash1, hash2 = d.sum128(false, length, tail)
- // Next we want to 'virtually' append 1 to the input, but,
- // we do not want to append to an actual array!!!
- if tail_length+1 == block_size {
- // We are left with no tail!!!
- word1 := *(*uint64)(unsafe.Pointer(&tail[0]))
- word2 := uint64(*(*uint32)(unsafe.Pointer(&tail[8])))
- word2 = word2 | (uint64(tail[12]) << 32) | (uint64(tail[13]) << 40) | (uint64(tail[14]) << 48)
- // We append 1.
- word2 = word2 | (uint64(1) << 56)
- // We process the resulting 2 words.
- d.bmix_words(word1, word2)
- tail := data[length:] // empty slice, deliberate.
- hash3, hash4 = d.sum128(false, length+1, tail)
- } else {
- // We still have a tail (fewer than 15 bytes) but we
- // need to append '1' to it.
- hash3, hash4 = d.sum128(true, length+1, tail)
- }
- return hash1, hash2, hash3, hash4
- }
|