blocks_arm.s 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #include "textflag.h"
  2. #define ROUND()\
  3. ADD.S R2,R0,R0;\
  4. ADC R3,R1,R1;\
  5. EOR R2<<13,R0,R8;\
  6. EOR R3>>19,R8,R8;\
  7. EOR R2>>19,R1,R11;\
  8. EOR R3<<13,R11,R11;\
  9. ADD.S R6,R4,R4;\
  10. ADC R7,R5,R5;\
  11. EOR R6<<16,R4,R2;\
  12. EOR R7>>16,R2,R2;\
  13. EOR R6>>16,R5,R3;\
  14. EOR R7<<16,R3,R3;\
  15. ADD.S R2,R1,R1;\
  16. ADC R3,R0,R0;\
  17. EOR R2<<21,R1,R6;\
  18. EOR R3>>11,R6,R6;\
  19. EOR R2>>11,R0,R7;\
  20. EOR R3<<21,R7,R7;\
  21. ADD.S R8,R4,R4;\
  22. ADC R11,R5,R5;\
  23. EOR R8<<17,R4,R2;\
  24. EOR R11>>15,R2,R2;\
  25. EOR R8>>15,R5,R3;\
  26. EOR R11<<17,R3,R3;\
  27. ADD.S R2,R1,R1;\
  28. ADC R3,R0,R0;\
  29. EOR R2<<13,R1,R8;\
  30. EOR R3>>19,R8,R8;\
  31. EOR R2>>19,R0,R11;\
  32. EOR R3<<13,R11,R11;\
  33. ADD.S R6,R5,R5;\
  34. ADC R7,R4,R4;\
  35. EOR R6<<16,R5,R2;\
  36. EOR R7>>16,R2,R2;\
  37. EOR R6>>16,R4,R3;\
  38. EOR R7<<16,R3,R3;\
  39. ADD.S R2,R0,R0;\
  40. ADC R3,R1,R1;\
  41. EOR R2<<21,R0,R6;\
  42. EOR R3>>11,R6,R6;\
  43. EOR R2>>11,R1,R7;\
  44. EOR R3<<21,R7,R7;\
  45. ADD.S R8,R5,R5;\
  46. ADC R11,R4,R4;\
  47. EOR R8<<17,R5,R2;\
  48. EOR R11>>15,R2,R2;\
  49. EOR R8>>15,R4,R3;\
  50. EOR R11<<17,R3,R3;
  51. // once(d *digest)
  52. TEXT ·once(SB),NOSPLIT,$4-4
  53. MOVW d+0(FP),R8
  54. MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
  55. MOVW 48(R8),R12
  56. MOVW 52(R8),R14
  57. EOR R12,R6,R6
  58. EOR R14,R7,R7
  59. ROUND()
  60. EOR R12,R0,R0
  61. EOR R14,R1,R1
  62. MOVW d+0(FP),R8
  63. MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
  64. RET
  65. // finalize(d *digest) uint64
  66. TEXT ·finalize(SB),NOSPLIT,$4-12
  67. MOVW d+0(FP),R8
  68. MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
  69. MOVW 48(R8),R12
  70. MOVW 52(R8),R14
  71. EOR R12,R6,R6
  72. EOR R14,R7,R7
  73. ROUND()
  74. EOR R12,R0,R0
  75. EOR R14,R1,R1
  76. EOR $255,R4
  77. ROUND()
  78. ROUND()
  79. EOR R2,R0,R0
  80. EOR R3,R1,R1
  81. EOR R6,R4,R4
  82. EOR R7,R5,R5
  83. EOR R4,R0,R0
  84. EOR R5,R1,R1
  85. MOVW R0,ret_lo+4(FP)
  86. MOVW R1,ret_hi+8(FP)
  87. RET
  88. // blocks(d *digest, data []uint8)
  89. TEXT ·blocks(SB),NOSPLIT,$4-16
  90. MOVW d+0(FP),R8
  91. MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
  92. MOVW p+4(FP),R9
  93. MOVW p_len+8(FP),R11
  94. ADD R9,R11,R11
  95. MOVW R11,endp-4(SP)
  96. AND.S $3,R9,R8
  97. BNE blocksunaligned
  98. blocksloop:
  99. MOVM.IA.W (R9),[R12,R14]
  100. EOR R12,R6,R6
  101. EOR R14,R7,R7
  102. ROUND()
  103. EOR R12,R0,R0
  104. EOR R14,R1,R1
  105. MOVW endp-4(SP),R11
  106. CMP R11,R9
  107. BLO blocksloop
  108. MOVW d+0(FP),R8
  109. MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
  110. RET
  111. blocksunaligned:
  112. MOVBU.P 8(R9),R12
  113. MOVBU -7(R9),R11
  114. ORR R11<<8,R12,R12
  115. MOVBU -6(R9),R11
  116. ORR R11<<16,R12,R12
  117. MOVBU -5(R9),R11
  118. ORR R11<<24,R12,R12
  119. MOVBU -4(R9),R14
  120. MOVBU -3(R9),R11
  121. ORR R11<<8,R14,R14
  122. MOVBU -2(R9),R11
  123. ORR R11<<16,R14,R14
  124. MOVBU -1(R9),R11
  125. ORR R11<<24,R14,R14
  126. EOR R12,R6,R6
  127. EOR R14,R7,R7
  128. ROUND()
  129. EOR R12,R0,R0
  130. EOR R14,R1,R1
  131. MOVW endp-4(SP),R11
  132. CMP R11,R9
  133. BLO blocksunaligned
  134. MOVW d+0(FP),R8
  135. MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
  136. RET