asm_amd64.s 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. #include "textflag.h"
  5. // func hasAsm() bool
  6. // returns whether AES-NI is supported
  7. TEXT ·hasAsm(SB),NOSPLIT,$0
  8. XORQ AX, AX
  9. INCL AX
  10. CPUID
  11. SHRQ $25, CX
  12. ANDQ $1, CX
  13. MOVB CX, ret+0(FP)
  14. RET
  15. // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
  16. TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
  17. MOVQ nr+0(FP), CX
  18. MOVQ xk+8(FP), AX
  19. MOVQ dst+16(FP), DX
  20. MOVQ src+24(FP), BX
  21. MOVUPS 0(AX), X1
  22. MOVUPS 0(BX), X0
  23. ADDQ $16, AX
  24. PXOR X1, X0
  25. SUBQ $12, CX
  26. JE Lenc196
  27. JB Lenc128
  28. Lenc256:
  29. MOVUPS 0(AX), X1
  30. AESENC X1, X0
  31. MOVUPS 16(AX), X1
  32. AESENC X1, X0
  33. ADDQ $32, AX
  34. Lenc196:
  35. MOVUPS 0(AX), X1
  36. AESENC X1, X0
  37. MOVUPS 16(AX), X1
  38. AESENC X1, X0
  39. ADDQ $32, AX
  40. Lenc128:
  41. MOVUPS 0(AX), X1
  42. AESENC X1, X0
  43. MOVUPS 16(AX), X1
  44. AESENC X1, X0
  45. MOVUPS 32(AX), X1
  46. AESENC X1, X0
  47. MOVUPS 48(AX), X1
  48. AESENC X1, X0
  49. MOVUPS 64(AX), X1
  50. AESENC X1, X0
  51. MOVUPS 80(AX), X1
  52. AESENC X1, X0
  53. MOVUPS 96(AX), X1
  54. AESENC X1, X0
  55. MOVUPS 112(AX), X1
  56. AESENC X1, X0
  57. MOVUPS 128(AX), X1
  58. AESENC X1, X0
  59. MOVUPS 144(AX), X1
  60. AESENCLAST X1, X0
  61. MOVUPS X0, 0(DX)
  62. RET
  63. // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
  64. TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
  65. MOVQ nr+0(FP), CX
  66. MOVQ xk+8(FP), AX
  67. MOVQ dst+16(FP), DX
  68. MOVQ src+24(FP), BX
  69. MOVUPS 0(AX), X1
  70. MOVUPS 0(BX), X0
  71. ADDQ $16, AX
  72. PXOR X1, X0
  73. SUBQ $12, CX
  74. JE Ldec196
  75. JB Ldec128
  76. Ldec256:
  77. MOVUPS 0(AX), X1
  78. AESDEC X1, X0
  79. MOVUPS 16(AX), X1
  80. AESDEC X1, X0
  81. ADDQ $32, AX
  82. Ldec196:
  83. MOVUPS 0(AX), X1
  84. AESDEC X1, X0
  85. MOVUPS 16(AX), X1
  86. AESDEC X1, X0
  87. ADDQ $32, AX
  88. Ldec128:
  89. MOVUPS 0(AX), X1
  90. AESDEC X1, X0
  91. MOVUPS 16(AX), X1
  92. AESDEC X1, X0
  93. MOVUPS 32(AX), X1
  94. AESDEC X1, X0
  95. MOVUPS 48(AX), X1
  96. AESDEC X1, X0
  97. MOVUPS 64(AX), X1
  98. AESDEC X1, X0
  99. MOVUPS 80(AX), X1
  100. AESDEC X1, X0
  101. MOVUPS 96(AX), X1
  102. AESDEC X1, X0
  103. MOVUPS 112(AX), X1
  104. AESDEC X1, X0
  105. MOVUPS 128(AX), X1
  106. AESDEC X1, X0
  107. MOVUPS 144(AX), X1
  108. AESDECLAST X1, X0
  109. MOVUPS X0, 0(DX)
  110. RET
  111. // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
  112. // Note that round keys are stored in uint128 format, not uint32
  113. TEXT ·expandKeyAsm(SB),NOSPLIT,$0
  114. MOVQ nr+0(FP), CX
  115. MOVQ key+8(FP), AX
  116. MOVQ enc+16(FP), BX
  117. MOVQ dec+24(FP), DX
  118. MOVUPS (AX), X0
  119. // enc
  120. MOVUPS X0, (BX)
  121. ADDQ $16, BX
  122. PXOR X4, X4 // _expand_key_* expect X4 to be zero
  123. CMPL CX, $12
  124. JE Lexp_enc196
  125. JB Lexp_enc128
  126. Lexp_enc256:
  127. MOVUPS 16(AX), X2
  128. MOVUPS X2, (BX)
  129. ADDQ $16, BX
  130. AESKEYGENASSIST $0x01, X2, X1
  131. CALL _expand_key_256a<>(SB)
  132. AESKEYGENASSIST $0x01, X0, X1
  133. CALL _expand_key_256b<>(SB)
  134. AESKEYGENASSIST $0x02, X2, X1
  135. CALL _expand_key_256a<>(SB)
  136. AESKEYGENASSIST $0x02, X0, X1
  137. CALL _expand_key_256b<>(SB)
  138. AESKEYGENASSIST $0x04, X2, X1
  139. CALL _expand_key_256a<>(SB)
  140. AESKEYGENASSIST $0x04, X0, X1
  141. CALL _expand_key_256b<>(SB)
  142. AESKEYGENASSIST $0x08, X2, X1
  143. CALL _expand_key_256a<>(SB)
  144. AESKEYGENASSIST $0x08, X0, X1
  145. CALL _expand_key_256b<>(SB)
  146. AESKEYGENASSIST $0x10, X2, X1
  147. CALL _expand_key_256a<>(SB)
  148. AESKEYGENASSIST $0x10, X0, X1
  149. CALL _expand_key_256b<>(SB)
  150. AESKEYGENASSIST $0x20, X2, X1
  151. CALL _expand_key_256a<>(SB)
  152. AESKEYGENASSIST $0x20, X0, X1
  153. CALL _expand_key_256b<>(SB)
  154. AESKEYGENASSIST $0x40, X2, X1
  155. CALL _expand_key_256a<>(SB)
  156. JMP Lexp_dec
  157. Lexp_enc196:
  158. MOVQ 16(AX), X2
  159. AESKEYGENASSIST $0x01, X2, X1
  160. CALL _expand_key_192a<>(SB)
  161. AESKEYGENASSIST $0x02, X2, X1
  162. CALL _expand_key_192b<>(SB)
  163. AESKEYGENASSIST $0x04, X2, X1
  164. CALL _expand_key_192a<>(SB)
  165. AESKEYGENASSIST $0x08, X2, X1
  166. CALL _expand_key_192b<>(SB)
  167. AESKEYGENASSIST $0x10, X2, X1
  168. CALL _expand_key_192a<>(SB)
  169. AESKEYGENASSIST $0x20, X2, X1
  170. CALL _expand_key_192b<>(SB)
  171. AESKEYGENASSIST $0x40, X2, X1
  172. CALL _expand_key_192a<>(SB)
  173. AESKEYGENASSIST $0x80, X2, X1
  174. CALL _expand_key_192b<>(SB)
  175. JMP Lexp_dec
  176. Lexp_enc128:
  177. AESKEYGENASSIST $0x01, X0, X1
  178. CALL _expand_key_128<>(SB)
  179. AESKEYGENASSIST $0x02, X0, X1
  180. CALL _expand_key_128<>(SB)
  181. AESKEYGENASSIST $0x04, X0, X1
  182. CALL _expand_key_128<>(SB)
  183. AESKEYGENASSIST $0x08, X0, X1
  184. CALL _expand_key_128<>(SB)
  185. AESKEYGENASSIST $0x10, X0, X1
  186. CALL _expand_key_128<>(SB)
  187. AESKEYGENASSIST $0x20, X0, X1
  188. CALL _expand_key_128<>(SB)
  189. AESKEYGENASSIST $0x40, X0, X1
  190. CALL _expand_key_128<>(SB)
  191. AESKEYGENASSIST $0x80, X0, X1
  192. CALL _expand_key_128<>(SB)
  193. AESKEYGENASSIST $0x1b, X0, X1
  194. CALL _expand_key_128<>(SB)
  195. AESKEYGENASSIST $0x36, X0, X1
  196. CALL _expand_key_128<>(SB)
  197. Lexp_dec:
  198. // dec
  199. SUBQ $16, BX
  200. MOVUPS (BX), X1
  201. MOVUPS X1, (DX)
  202. DECQ CX
  203. Lexp_dec_loop:
  204. MOVUPS -16(BX), X1
  205. AESIMC X1, X0
  206. MOVUPS X0, 16(DX)
  207. SUBQ $16, BX
  208. ADDQ $16, DX
  209. DECQ CX
  210. JNZ Lexp_dec_loop
  211. MOVUPS -16(BX), X0
  212. MOVUPS X0, 16(DX)
  213. RET
  214. TEXT _expand_key_128<>(SB),NOSPLIT,$0
  215. PSHUFD $0xff, X1, X1
  216. SHUFPS $0x10, X0, X4
  217. PXOR X4, X0
  218. SHUFPS $0x8c, X0, X4
  219. PXOR X4, X0
  220. PXOR X1, X0
  221. MOVUPS X0, (BX)
  222. ADDQ $16, BX
  223. RET
  224. TEXT _expand_key_192a<>(SB),NOSPLIT,$0
  225. PSHUFD $0x55, X1, X1
  226. SHUFPS $0x10, X0, X4
  227. PXOR X4, X0
  228. SHUFPS $0x8c, X0, X4
  229. PXOR X4, X0
  230. PXOR X1, X0
  231. MOVAPS X2, X5
  232. MOVAPS X2, X6
  233. PSLLDQ $0x4, X5
  234. PSHUFD $0xff, X0, X3
  235. PXOR X3, X2
  236. PXOR X5, X2
  237. MOVAPS X0, X1
  238. SHUFPS $0x44, X0, X6
  239. MOVUPS X6, (BX)
  240. SHUFPS $0x4e, X2, X1
  241. MOVUPS X1, 16(BX)
  242. ADDQ $32, BX
  243. RET
  244. TEXT _expand_key_192b<>(SB),NOSPLIT,$0
  245. PSHUFD $0x55, X1, X1
  246. SHUFPS $0x10, X0, X4
  247. PXOR X4, X0
  248. SHUFPS $0x8c, X0, X4
  249. PXOR X4, X0
  250. PXOR X1, X0
  251. MOVAPS X2, X5
  252. PSLLDQ $0x4, X5
  253. PSHUFD $0xff, X0, X3
  254. PXOR X3, X2
  255. PXOR X5, X2
  256. MOVUPS X0, (BX)
  257. ADDQ $16, BX
  258. RET
  259. TEXT _expand_key_256a<>(SB),NOSPLIT,$0
  260. JMP _expand_key_128<>(SB)
  261. TEXT _expand_key_256b<>(SB),NOSPLIT,$0
  262. PSHUFD $0xaa, X1, X1
  263. SHUFPS $0x10, X2, X4
  264. PXOR X4, X2
  265. SHUFPS $0x8c, X2, X4
  266. PXOR X4, X2
  267. PXOR X1, X2
  268. MOVUPS X2, (BX)
  269. ADDQ $16, BX
  270. RET