chacha_generic.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package ChaCha20 implements the core ChaCha20 function as specified
  5. // in https://tools.ietf.org/html/rfc7539#section-2.3.
  6. package chacha20
  7. import (
  8. "crypto/cipher"
  9. "encoding/binary"
  10. "github.com/Psiphon-Labs/chacha20/internal/subtle"
  11. )
  12. // assert that *Cipher implements cipher.Stream
  13. var _ cipher.Stream = (*Cipher)(nil)
  14. // Cipher is a stateful instance of ChaCha20 using a particular key
  15. // and nonce. A *Cipher implements the cipher.Stream interface.
  16. type Cipher struct {
  17. key [8]uint32
  18. counter uint32 // incremented after each block
  19. overflow bool
  20. nonce [3]uint32
  21. buf [bufSize]byte // buffer for unused keystream bytes
  22. len int // number of unused keystream bytes at end of buf
  23. }
  24. // New creates a new ChaCha20 stream cipher with the given key and nonce.
  25. // The initial counter value is set to 0.
  26. func New(key [8]uint32, nonce [3]uint32) *Cipher {
  27. return &Cipher{key: key, nonce: nonce}
  28. }
  29. // ChaCha20 constants spelling "expand 32-byte k"
  30. const (
  31. j0 uint32 = 0x61707865
  32. j1 uint32 = 0x3320646e
  33. j2 uint32 = 0x79622d32
  34. j3 uint32 = 0x6b206574
  35. )
  36. func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
  37. a += b
  38. d ^= a
  39. d = (d << 16) | (d >> 16)
  40. c += d
  41. b ^= c
  42. b = (b << 12) | (b >> 20)
  43. a += b
  44. d ^= a
  45. d = (d << 8) | (d >> 24)
  46. c += d
  47. b ^= c
  48. b = (b << 7) | (b >> 25)
  49. return a, b, c, d
  50. }
  51. // XORKeyStream XORs each byte in the given slice with a byte from the
  52. // cipher's key stream. Dst and src must overlap entirely or not at all.
  53. //
  54. // If len(dst) < len(src), XORKeyStream will panic. It is acceptable
  55. // to pass a dst bigger than src, and in that case, XORKeyStream will
  56. // only update dst[:len(src)] and will not touch the rest of dst.
  57. //
  58. // Multiple calls to XORKeyStream behave as if the concatenation of
  59. // the src buffers was passed in a single run. That is, Cipher
  60. // maintains state and does not reset at each XORKeyStream call.
  61. func (s *Cipher) XORKeyStream(dst, src []byte) {
  62. if len(dst) < len(src) {
  63. panic("chacha20: output smaller than input")
  64. }
  65. if subtle.InexactOverlap(dst[:len(src)], src) {
  66. panic("chacha20: invalid buffer overlap")
  67. }
  68. // xor src with buffered keystream first
  69. if s.len != 0 {
  70. buf := s.buf[len(s.buf)-s.len:]
  71. if len(src) < len(buf) {
  72. buf = buf[:len(src)]
  73. }
  74. td, ts := dst[:len(buf)], src[:len(buf)] // BCE hint
  75. for i, b := range buf {
  76. td[i] = ts[i] ^ b
  77. }
  78. s.len -= len(buf)
  79. if s.len != 0 {
  80. return
  81. }
  82. s.buf = [len(s.buf)]byte{} // zero the empty buffer
  83. src = src[len(buf):]
  84. dst = dst[len(buf):]
  85. }
  86. if len(src) == 0 {
  87. return
  88. }
  89. if haveAsm {
  90. // [Psiphon]
  91. //
  92. // Allow up to 2^32 blocks.
  93. if uint64(len(src))+uint64(s.counter)*64 > (1 << 38) {
  94. panic("chacha20: counter overflow")
  95. }
  96. s.xorKeyStreamAsm(dst, src)
  97. return
  98. }
  99. // set up a 64-byte buffer to pad out the final block if needed
  100. // (hoisted out of the main loop to avoid spills)
  101. rem := len(src) % 64 // length of final block
  102. fin := len(src) - rem // index of final block
  103. if rem > 0 {
  104. copy(s.buf[len(s.buf)-64:], src[fin:])
  105. }
  106. // pre-calculate most of the first round
  107. s1, s5, s9, s13 := quarterRound(j1, s.key[1], s.key[5], s.nonce[0])
  108. s2, s6, s10, s14 := quarterRound(j2, s.key[2], s.key[6], s.nonce[1])
  109. s3, s7, s11, s15 := quarterRound(j3, s.key[3], s.key[7], s.nonce[2])
  110. n := len(src)
  111. src, dst = src[:n:n], dst[:n:n] // BCE hint
  112. for i := 0; i < n; i += 64 {
  113. if s.overflow {
  114. panic("chacha20: counter overflow")
  115. }
  116. // calculate the remainder of the first round
  117. s0, s4, s8, s12 := quarterRound(j0, s.key[0], s.key[4], s.counter)
  118. // execute the second round
  119. x0, x5, x10, x15 := quarterRound(s0, s5, s10, s15)
  120. x1, x6, x11, x12 := quarterRound(s1, s6, s11, s12)
  121. x2, x7, x8, x13 := quarterRound(s2, s7, s8, s13)
  122. x3, x4, x9, x14 := quarterRound(s3, s4, s9, s14)
  123. // execute the remaining 18 rounds
  124. for i := 0; i < 9; i++ {
  125. x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
  126. x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
  127. x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
  128. x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
  129. x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
  130. x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
  131. x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
  132. x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
  133. }
  134. x0 += j0
  135. x1 += j1
  136. x2 += j2
  137. x3 += j3
  138. x4 += s.key[0]
  139. x5 += s.key[1]
  140. x6 += s.key[2]
  141. x7 += s.key[3]
  142. x8 += s.key[4]
  143. x9 += s.key[5]
  144. x10 += s.key[6]
  145. x11 += s.key[7]
  146. x12 += s.counter
  147. x13 += s.nonce[0]
  148. x14 += s.nonce[1]
  149. x15 += s.nonce[2]
  150. // increment the counter
  151. s.counter += 1
  152. if s.counter == 0 {
  153. // [Psiphon]
  154. //
  155. // Don't panic immediately, as the counter will wrap here when it's 2^31-1,
  156. // and that's a valid value. Do panic after overflow is set and any further
  157. // blocks are processed.
  158. //
  159. // https://tools.ietf.org/html/rfc7539#section-2.3.2: ChaCha20 "limits the
  160. // use of a single (key,nonce) combination to 2^32 blocks".
  161. //
  162. // The 2^31-1 counter value occurs in practise in QUIC header protection,
  163. // https://tools.ietf.org/html/draft-ietf-quic-tls-24#section-5.4.4, which
  164. // initializes the counter using 4 bytes of sampled ciphertext.
  165. //
  166. // In OpenSSL, chacha20 will operate on 2^32 blocks before applying its
  167. // overflow logic:
  168. // https://github.com/openssl/openssl/blob/706457b7bda7fdbab426b8dce83b318908339da4/crypto/evp/e_chacha20_poly1305.c#L94-L104.
  169. s.overflow = true
  170. }
  171. // pad to 64 bytes if needed
  172. in, out := src[i:], dst[i:]
  173. if i == fin {
  174. // src[fin:] has already been copied into s.buf before
  175. // the main loop
  176. in, out = s.buf[len(s.buf)-64:], s.buf[len(s.buf)-64:]
  177. }
  178. in, out = in[:64], out[:64] // BCE hint
  179. // XOR the key stream with the source and write out the result
  180. xor(out[0:], in[0:], x0)
  181. xor(out[4:], in[4:], x1)
  182. xor(out[8:], in[8:], x2)
  183. xor(out[12:], in[12:], x3)
  184. xor(out[16:], in[16:], x4)
  185. xor(out[20:], in[20:], x5)
  186. xor(out[24:], in[24:], x6)
  187. xor(out[28:], in[28:], x7)
  188. xor(out[32:], in[32:], x8)
  189. xor(out[36:], in[36:], x9)
  190. xor(out[40:], in[40:], x10)
  191. xor(out[44:], in[44:], x11)
  192. xor(out[48:], in[48:], x12)
  193. xor(out[52:], in[52:], x13)
  194. xor(out[56:], in[56:], x14)
  195. xor(out[60:], in[60:], x15)
  196. }
  197. // copy any trailing bytes out of the buffer and into dst
  198. if rem != 0 {
  199. s.len = 64 - rem
  200. copy(dst[fin:], s.buf[len(s.buf)-64:])
  201. }
  202. }
  203. // Advance discards bytes in the key stream until the next 64 byte block
  204. // boundary is reached and updates the counter accordingly. If the key
  205. // stream is already at a block boundary no bytes will be discarded and
  206. // the counter will be unchanged.
  207. func (s *Cipher) Advance() {
  208. s.len -= s.len % 64
  209. if s.len == 0 {
  210. s.buf = [len(s.buf)]byte{}
  211. }
  212. }
  213. // XORKeyStream crypts bytes from in to out using the given key and counters.
  214. // In and out must overlap entirely or not at all. Counter contains the raw
  215. // ChaCha20 counter bytes (i.e. block counter followed by nonce).
  216. func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
  217. s := Cipher{
  218. key: [8]uint32{
  219. binary.LittleEndian.Uint32(key[0:4]),
  220. binary.LittleEndian.Uint32(key[4:8]),
  221. binary.LittleEndian.Uint32(key[8:12]),
  222. binary.LittleEndian.Uint32(key[12:16]),
  223. binary.LittleEndian.Uint32(key[16:20]),
  224. binary.LittleEndian.Uint32(key[20:24]),
  225. binary.LittleEndian.Uint32(key[24:28]),
  226. binary.LittleEndian.Uint32(key[28:32]),
  227. },
  228. nonce: [3]uint32{
  229. binary.LittleEndian.Uint32(counter[4:8]),
  230. binary.LittleEndian.Uint32(counter[8:12]),
  231. binary.LittleEndian.Uint32(counter[12:16]),
  232. },
  233. counter: binary.LittleEndian.Uint32(counter[0:4]),
  234. }
  235. s.XORKeyStream(out, in)
  236. }
  237. // HChaCha20 uses the ChaCha20 core to generate a derived key from a key and a
  238. // nonce. It should only be used as part of the XChaCha20 construction.
  239. func HChaCha20(key *[8]uint32, nonce *[4]uint32) [8]uint32 {
  240. x0, x1, x2, x3 := j0, j1, j2, j3
  241. x4, x5, x6, x7 := key[0], key[1], key[2], key[3]
  242. x8, x9, x10, x11 := key[4], key[5], key[6], key[7]
  243. x12, x13, x14, x15 := nonce[0], nonce[1], nonce[2], nonce[3]
  244. for i := 0; i < 10; i++ {
  245. x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
  246. x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
  247. x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
  248. x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
  249. x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
  250. x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
  251. x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
  252. x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
  253. }
  254. var out [8]uint32
  255. out[0], out[1], out[2], out[3] = x0, x1, x2, x3
  256. out[4], out[5], out[6], out[7] = x12, x13, x14, x15
  257. return out
  258. }