| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394 |
- // chacha20_ref.go - Reference ChaCha20.
- //
- // To the extent possible under law, Yawning Angel has waived all copyright
- // and related or neighboring rights to chacha20, using the Creative
- // Commons "CC0" public domain dedication. See LICENSE or
- // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
- // +build !go1.9
- package chacha20
- import (
- "encoding/binary"
- "math"
- "unsafe"
- )
- func blocksRef(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
- if isIetf {
- var totalBlocks uint64
- totalBlocks = uint64(x[12]) + uint64(nrBlocks)
- if totalBlocks > math.MaxUint32 {
- panic("chacha20: Exceeded keystream per nonce limit")
- }
- }
- // This routine ignores x[0]...x[4] in favor the const values since it's
- // ever so slightly faster.
- for n := 0; n < nrBlocks; n++ {
- x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
- x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
- for i := chachaRounds; i > 0; i -= 2 {
- // quarterround(x, 0, 4, 8, 12)
- x0 += x4
- x12 ^= x0
- x12 = (x12 << 16) | (x12 >> 16)
- x8 += x12
- x4 ^= x8
- x4 = (x4 << 12) | (x4 >> 20)
- x0 += x4
- x12 ^= x0
- x12 = (x12 << 8) | (x12 >> 24)
- x8 += x12
- x4 ^= x8
- x4 = (x4 << 7) | (x4 >> 25)
- // quarterround(x, 1, 5, 9, 13)
- x1 += x5
- x13 ^= x1
- x13 = (x13 << 16) | (x13 >> 16)
- x9 += x13
- x5 ^= x9
- x5 = (x5 << 12) | (x5 >> 20)
- x1 += x5
- x13 ^= x1
- x13 = (x13 << 8) | (x13 >> 24)
- x9 += x13
- x5 ^= x9
- x5 = (x5 << 7) | (x5 >> 25)
- // quarterround(x, 2, 6, 10, 14)
- x2 += x6
- x14 ^= x2
- x14 = (x14 << 16) | (x14 >> 16)
- x10 += x14
- x6 ^= x10
- x6 = (x6 << 12) | (x6 >> 20)
- x2 += x6
- x14 ^= x2
- x14 = (x14 << 8) | (x14 >> 24)
- x10 += x14
- x6 ^= x10
- x6 = (x6 << 7) | (x6 >> 25)
- // quarterround(x, 3, 7, 11, 15)
- x3 += x7
- x15 ^= x3
- x15 = (x15 << 16) | (x15 >> 16)
- x11 += x15
- x7 ^= x11
- x7 = (x7 << 12) | (x7 >> 20)
- x3 += x7
- x15 ^= x3
- x15 = (x15 << 8) | (x15 >> 24)
- x11 += x15
- x7 ^= x11
- x7 = (x7 << 7) | (x7 >> 25)
- // quarterround(x, 0, 5, 10, 15)
- x0 += x5
- x15 ^= x0
- x15 = (x15 << 16) | (x15 >> 16)
- x10 += x15
- x5 ^= x10
- x5 = (x5 << 12) | (x5 >> 20)
- x0 += x5
- x15 ^= x0
- x15 = (x15 << 8) | (x15 >> 24)
- x10 += x15
- x5 ^= x10
- x5 = (x5 << 7) | (x5 >> 25)
- // quarterround(x, 1, 6, 11, 12)
- x1 += x6
- x12 ^= x1
- x12 = (x12 << 16) | (x12 >> 16)
- x11 += x12
- x6 ^= x11
- x6 = (x6 << 12) | (x6 >> 20)
- x1 += x6
- x12 ^= x1
- x12 = (x12 << 8) | (x12 >> 24)
- x11 += x12
- x6 ^= x11
- x6 = (x6 << 7) | (x6 >> 25)
- // quarterround(x, 2, 7, 8, 13)
- x2 += x7
- x13 ^= x2
- x13 = (x13 << 16) | (x13 >> 16)
- x8 += x13
- x7 ^= x8
- x7 = (x7 << 12) | (x7 >> 20)
- x2 += x7
- x13 ^= x2
- x13 = (x13 << 8) | (x13 >> 24)
- x8 += x13
- x7 ^= x8
- x7 = (x7 << 7) | (x7 >> 25)
- // quarterround(x, 3, 4, 9, 14)
- x3 += x4
- x14 ^= x3
- x14 = (x14 << 16) | (x14 >> 16)
- x9 += x14
- x4 ^= x9
- x4 = (x4 << 12) | (x4 >> 20)
- x3 += x4
- x14 ^= x3
- x14 = (x14 << 8) | (x14 >> 24)
- x9 += x14
- x4 ^= x9
- x4 = (x4 << 7) | (x4 >> 25)
- }
- // On amd64 at least, this is a rather big boost.
- if useUnsafe {
- if in != nil {
- inArr := (*[16]uint32)(unsafe.Pointer(&in[n*BlockSize]))
- outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
- outArr[0] = inArr[0] ^ (x0 + sigma0)
- outArr[1] = inArr[1] ^ (x1 + sigma1)
- outArr[2] = inArr[2] ^ (x2 + sigma2)
- outArr[3] = inArr[3] ^ (x3 + sigma3)
- outArr[4] = inArr[4] ^ (x4 + x[4])
- outArr[5] = inArr[5] ^ (x5 + x[5])
- outArr[6] = inArr[6] ^ (x6 + x[6])
- outArr[7] = inArr[7] ^ (x7 + x[7])
- outArr[8] = inArr[8] ^ (x8 + x[8])
- outArr[9] = inArr[9] ^ (x9 + x[9])
- outArr[10] = inArr[10] ^ (x10 + x[10])
- outArr[11] = inArr[11] ^ (x11 + x[11])
- outArr[12] = inArr[12] ^ (x12 + x[12])
- outArr[13] = inArr[13] ^ (x13 + x[13])
- outArr[14] = inArr[14] ^ (x14 + x[14])
- outArr[15] = inArr[15] ^ (x15 + x[15])
- } else {
- outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
- outArr[0] = x0 + sigma0
- outArr[1] = x1 + sigma1
- outArr[2] = x2 + sigma2
- outArr[3] = x3 + sigma3
- outArr[4] = x4 + x[4]
- outArr[5] = x5 + x[5]
- outArr[6] = x6 + x[6]
- outArr[7] = x7 + x[7]
- outArr[8] = x8 + x[8]
- outArr[9] = x9 + x[9]
- outArr[10] = x10 + x[10]
- outArr[11] = x11 + x[11]
- outArr[12] = x12 + x[12]
- outArr[13] = x13 + x[13]
- outArr[14] = x14 + x[14]
- outArr[15] = x15 + x[15]
- }
- } else {
- // Slow path, either the architecture cares about alignment, or is not little endian.
- x0 += sigma0
- x1 += sigma1
- x2 += sigma2
- x3 += sigma3
- x4 += x[4]
- x5 += x[5]
- x6 += x[6]
- x7 += x[7]
- x8 += x[8]
- x9 += x[9]
- x10 += x[10]
- x11 += x[11]
- x12 += x[12]
- x13 += x[13]
- x14 += x[14]
- x15 += x[15]
- if in != nil {
- binary.LittleEndian.PutUint32(out[0:4], binary.LittleEndian.Uint32(in[0:4])^x0)
- binary.LittleEndian.PutUint32(out[4:8], binary.LittleEndian.Uint32(in[4:8])^x1)
- binary.LittleEndian.PutUint32(out[8:12], binary.LittleEndian.Uint32(in[8:12])^x2)
- binary.LittleEndian.PutUint32(out[12:16], binary.LittleEndian.Uint32(in[12:16])^x3)
- binary.LittleEndian.PutUint32(out[16:20], binary.LittleEndian.Uint32(in[16:20])^x4)
- binary.LittleEndian.PutUint32(out[20:24], binary.LittleEndian.Uint32(in[20:24])^x5)
- binary.LittleEndian.PutUint32(out[24:28], binary.LittleEndian.Uint32(in[24:28])^x6)
- binary.LittleEndian.PutUint32(out[28:32], binary.LittleEndian.Uint32(in[28:32])^x7)
- binary.LittleEndian.PutUint32(out[32:36], binary.LittleEndian.Uint32(in[32:36])^x8)
- binary.LittleEndian.PutUint32(out[36:40], binary.LittleEndian.Uint32(in[36:40])^x9)
- binary.LittleEndian.PutUint32(out[40:44], binary.LittleEndian.Uint32(in[40:44])^x10)
- binary.LittleEndian.PutUint32(out[44:48], binary.LittleEndian.Uint32(in[44:48])^x11)
- binary.LittleEndian.PutUint32(out[48:52], binary.LittleEndian.Uint32(in[48:52])^x12)
- binary.LittleEndian.PutUint32(out[52:56], binary.LittleEndian.Uint32(in[52:56])^x13)
- binary.LittleEndian.PutUint32(out[56:60], binary.LittleEndian.Uint32(in[56:60])^x14)
- binary.LittleEndian.PutUint32(out[60:64], binary.LittleEndian.Uint32(in[60:64])^x15)
- in = in[BlockSize:]
- } else {
- binary.LittleEndian.PutUint32(out[0:4], x0)
- binary.LittleEndian.PutUint32(out[4:8], x1)
- binary.LittleEndian.PutUint32(out[8:12], x2)
- binary.LittleEndian.PutUint32(out[12:16], x3)
- binary.LittleEndian.PutUint32(out[16:20], x4)
- binary.LittleEndian.PutUint32(out[20:24], x5)
- binary.LittleEndian.PutUint32(out[24:28], x6)
- binary.LittleEndian.PutUint32(out[28:32], x7)
- binary.LittleEndian.PutUint32(out[32:36], x8)
- binary.LittleEndian.PutUint32(out[36:40], x9)
- binary.LittleEndian.PutUint32(out[40:44], x10)
- binary.LittleEndian.PutUint32(out[44:48], x11)
- binary.LittleEndian.PutUint32(out[48:52], x12)
- binary.LittleEndian.PutUint32(out[52:56], x13)
- binary.LittleEndian.PutUint32(out[56:60], x14)
- binary.LittleEndian.PutUint32(out[60:64], x15)
- }
- out = out[BlockSize:]
- }
- // Stoping at 2^70 bytes per nonce is the user's responsibility.
- ctr := uint64(x[13])<<32 | uint64(x[12])
- ctr++
- x[12] = uint32(ctr)
- x[13] = uint32(ctr >> 32)
- }
- }
- func hChaChaRef(x *[stateSize]uint32, out *[32]byte) {
- x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
- x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11]
- for i := chachaRounds; i > 0; i -= 2 {
- // quarterround(x, 0, 4, 8, 12)
- x0 += x4
- x12 ^= x0
- x12 = (x12 << 16) | (x12 >> 16)
- x8 += x12
- x4 ^= x8
- x4 = (x4 << 12) | (x4 >> 20)
- x0 += x4
- x12 ^= x0
- x12 = (x12 << 8) | (x12 >> 24)
- x8 += x12
- x4 ^= x8
- x4 = (x4 << 7) | (x4 >> 25)
- // quarterround(x, 1, 5, 9, 13)
- x1 += x5
- x13 ^= x1
- x13 = (x13 << 16) | (x13 >> 16)
- x9 += x13
- x5 ^= x9
- x5 = (x5 << 12) | (x5 >> 20)
- x1 += x5
- x13 ^= x1
- x13 = (x13 << 8) | (x13 >> 24)
- x9 += x13
- x5 ^= x9
- x5 = (x5 << 7) | (x5 >> 25)
- // quarterround(x, 2, 6, 10, 14)
- x2 += x6
- x14 ^= x2
- x14 = (x14 << 16) | (x14 >> 16)
- x10 += x14
- x6 ^= x10
- x6 = (x6 << 12) | (x6 >> 20)
- x2 += x6
- x14 ^= x2
- x14 = (x14 << 8) | (x14 >> 24)
- x10 += x14
- x6 ^= x10
- x6 = (x6 << 7) | (x6 >> 25)
- // quarterround(x, 3, 7, 11, 15)
- x3 += x7
- x15 ^= x3
- x15 = (x15 << 16) | (x15 >> 16)
- x11 += x15
- x7 ^= x11
- x7 = (x7 << 12) | (x7 >> 20)
- x3 += x7
- x15 ^= x3
- x15 = (x15 << 8) | (x15 >> 24)
- x11 += x15
- x7 ^= x11
- x7 = (x7 << 7) | (x7 >> 25)
- // quarterround(x, 0, 5, 10, 15)
- x0 += x5
- x15 ^= x0
- x15 = (x15 << 16) | (x15 >> 16)
- x10 += x15
- x5 ^= x10
- x5 = (x5 << 12) | (x5 >> 20)
- x0 += x5
- x15 ^= x0
- x15 = (x15 << 8) | (x15 >> 24)
- x10 += x15
- x5 ^= x10
- x5 = (x5 << 7) | (x5 >> 25)
- // quarterround(x, 1, 6, 11, 12)
- x1 += x6
- x12 ^= x1
- x12 = (x12 << 16) | (x12 >> 16)
- x11 += x12
- x6 ^= x11
- x6 = (x6 << 12) | (x6 >> 20)
- x1 += x6
- x12 ^= x1
- x12 = (x12 << 8) | (x12 >> 24)
- x11 += x12
- x6 ^= x11
- x6 = (x6 << 7) | (x6 >> 25)
- // quarterround(x, 2, 7, 8, 13)
- x2 += x7
- x13 ^= x2
- x13 = (x13 << 16) | (x13 >> 16)
- x8 += x13
- x7 ^= x8
- x7 = (x7 << 12) | (x7 >> 20)
- x2 += x7
- x13 ^= x2
- x13 = (x13 << 8) | (x13 >> 24)
- x8 += x13
- x7 ^= x8
- x7 = (x7 << 7) | (x7 >> 25)
- // quarterround(x, 3, 4, 9, 14)
- x3 += x4
- x14 ^= x3
- x14 = (x14 << 16) | (x14 >> 16)
- x9 += x14
- x4 ^= x9
- x4 = (x4 << 12) | (x4 >> 20)
- x3 += x4
- x14 ^= x3
- x14 = (x14 << 8) | (x14 >> 24)
- x9 += x14
- x4 ^= x9
- x4 = (x4 << 7) | (x4 >> 25)
- }
- // HChaCha returns x0...x3 | x12...x15, which corresponds to the
- // indexes of the ChaCha constant and the indexes of the IV.
- if useUnsafe {
- outArr := (*[16]uint32)(unsafe.Pointer(&out[0]))
- outArr[0] = x0
- outArr[1] = x1
- outArr[2] = x2
- outArr[3] = x3
- outArr[4] = x12
- outArr[5] = x13
- outArr[6] = x14
- outArr[7] = x15
- } else {
- binary.LittleEndian.PutUint32(out[0:4], x0)
- binary.LittleEndian.PutUint32(out[4:8], x1)
- binary.LittleEndian.PutUint32(out[8:12], x2)
- binary.LittleEndian.PutUint32(out[12:16], x3)
- binary.LittleEndian.PutUint32(out[16:20], x12)
- binary.LittleEndian.PutUint32(out[20:24], x13)
- binary.LittleEndian.PutUint32(out[24:28], x14)
- binary.LittleEndian.PutUint32(out[28:32], x15)
- }
- return
- }
|