| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- // chacha20_amd64.go - AMD64 optimized chacha20.
- //
- // To the extent possible under law, Yawning Angel has waived all copyright
- // and related or neighboring rights to chacha20, using the Creative
- // Commons "CC0" public domain dedication. See LICENSE or
- // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
- // +build amd64,!gccgo,!appengine
- package chacha20
- import (
- "math"
- )
- var usingAVX2 = false
- func blocksAmd64SSE2(x *uint32, inp, outp *byte, nrBlocks uint)
- func blocksAmd64AVX2(x *uint32, inp, outp *byte, nrBlocks uint)
- func cpuidAmd64(cpuidParams *uint32)
- func xgetbv0Amd64(xcrVec *uint32)
- func blocksAmd64(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
- // Probably unneeded, but stating this explicitly simplifies the assembly.
- if nrBlocks == 0 {
- return
- }
- if isIetf {
- var totalBlocks uint64
- totalBlocks = uint64(x[12]) + uint64(nrBlocks)
- if totalBlocks > math.MaxUint32 {
- panic("chacha20: Exceeded keystream per nonce limit")
- }
- }
- if in == nil {
- for i := range out {
- out[i] = 0
- }
- in = out
- }
- // Pointless to call the AVX2 code for just a single block, since half of
- // the output gets discarded...
- if usingAVX2 && nrBlocks > 1 {
- blocksAmd64AVX2(&x[0], &in[0], &out[0], uint(nrBlocks))
- } else {
- blocksAmd64SSE2(&x[0], &in[0], &out[0], uint(nrBlocks))
- }
- }
- func supportsAVX2() bool {
- // https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
- const (
- osXsaveBit = 1 << 27
- avx2Bit = 1 << 5
- )
- // Check to see if CPUID actually supports the leaf that indicates AVX2.
- // CPUID.(EAX=0H, ECX=0H) >= 7
- regs := [4]uint32{0x00}
- cpuidAmd64(®s[0])
- if regs[0] < 7 {
- return false
- }
- // Check to see if the OS knows how to save/restore XMM/YMM state.
- // CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
- regs = [4]uint32{0x01}
- cpuidAmd64(®s[0])
- if regs[2]&osXsaveBit == 0 {
- return false
- }
- xcrRegs := [2]uint32{}
- xgetbv0Amd64(&xcrRegs[0])
- if xcrRegs[0]&6 != 6 {
- return false
- }
- // Check for AVX2 support.
- // CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
- regs = [4]uint32{0x07}
- cpuidAmd64(®s[0])
- return regs[1]&avx2Bit != 0
- }
- func init() {
- blocksFn = blocksAmd64
- usingVectors = true
- usingAVX2 = supportsAVX2()
- }
|