square_amd64.s 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This code was translated into a form compatible with 6a from the public
  5. // domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
  6. // +build amd64,!gccgo,!appengine
  7. // func square(out, in *[5]uint64)
  8. TEXT ·square(SB),7,$0-16
  9. MOVQ out+0(FP), DI
  10. MOVQ in+8(FP), SI
  11. MOVQ 0(SI),AX
  12. MULQ 0(SI)
  13. MOVQ AX,CX
  14. MOVQ DX,R8
  15. MOVQ 0(SI),AX
  16. SHLQ $1,AX
  17. MULQ 8(SI)
  18. MOVQ AX,R9
  19. MOVQ DX,R10
  20. MOVQ 0(SI),AX
  21. SHLQ $1,AX
  22. MULQ 16(SI)
  23. MOVQ AX,R11
  24. MOVQ DX,R12
  25. MOVQ 0(SI),AX
  26. SHLQ $1,AX
  27. MULQ 24(SI)
  28. MOVQ AX,R13
  29. MOVQ DX,R14
  30. MOVQ 0(SI),AX
  31. SHLQ $1,AX
  32. MULQ 32(SI)
  33. MOVQ AX,R15
  34. MOVQ DX,BX
  35. MOVQ 8(SI),AX
  36. MULQ 8(SI)
  37. ADDQ AX,R11
  38. ADCQ DX,R12
  39. MOVQ 8(SI),AX
  40. SHLQ $1,AX
  41. MULQ 16(SI)
  42. ADDQ AX,R13
  43. ADCQ DX,R14
  44. MOVQ 8(SI),AX
  45. SHLQ $1,AX
  46. MULQ 24(SI)
  47. ADDQ AX,R15
  48. ADCQ DX,BX
  49. MOVQ 8(SI),DX
  50. IMUL3Q $38,DX,AX
  51. MULQ 32(SI)
  52. ADDQ AX,CX
  53. ADCQ DX,R8
  54. MOVQ 16(SI),AX
  55. MULQ 16(SI)
  56. ADDQ AX,R15
  57. ADCQ DX,BX
  58. MOVQ 16(SI),DX
  59. IMUL3Q $38,DX,AX
  60. MULQ 24(SI)
  61. ADDQ AX,CX
  62. ADCQ DX,R8
  63. MOVQ 16(SI),DX
  64. IMUL3Q $38,DX,AX
  65. MULQ 32(SI)
  66. ADDQ AX,R9
  67. ADCQ DX,R10
  68. MOVQ 24(SI),DX
  69. IMUL3Q $19,DX,AX
  70. MULQ 24(SI)
  71. ADDQ AX,R9
  72. ADCQ DX,R10
  73. MOVQ 24(SI),DX
  74. IMUL3Q $38,DX,AX
  75. MULQ 32(SI)
  76. ADDQ AX,R11
  77. ADCQ DX,R12
  78. MOVQ 32(SI),DX
  79. IMUL3Q $19,DX,AX
  80. MULQ 32(SI)
  81. ADDQ AX,R13
  82. ADCQ DX,R14
  83. MOVQ ·REDMASK51(SB),SI
  84. SHLQ $13,R8:CX
  85. ANDQ SI,CX
  86. SHLQ $13,R10:R9
  87. ANDQ SI,R9
  88. ADDQ R8,R9
  89. SHLQ $13,R12:R11
  90. ANDQ SI,R11
  91. ADDQ R10,R11
  92. SHLQ $13,R14:R13
  93. ANDQ SI,R13
  94. ADDQ R12,R13
  95. SHLQ $13,BX:R15
  96. ANDQ SI,R15
  97. ADDQ R14,R15
  98. IMUL3Q $19,BX,DX
  99. ADDQ DX,CX
  100. MOVQ CX,DX
  101. SHRQ $51,DX
  102. ADDQ R9,DX
  103. ANDQ SI,CX
  104. MOVQ DX,R8
  105. SHRQ $51,DX
  106. ADDQ R11,DX
  107. ANDQ SI,R8
  108. MOVQ DX,R9
  109. SHRQ $51,DX
  110. ADDQ R13,DX
  111. ANDQ SI,R9
  112. MOVQ DX,AX
  113. SHRQ $51,DX
  114. ADDQ R15,DX
  115. ANDQ SI,AX
  116. MOVQ DX,R10
  117. SHRQ $51,DX
  118. IMUL3Q $19,DX,DX
  119. ADDQ DX,CX
  120. ANDQ SI,R10
  121. MOVQ CX,0(DI)
  122. MOVQ R8,8(DI)
  123. MOVQ R9,16(DI)
  124. MOVQ AX,24(DI)
  125. MOVQ R10,32(DI)
  126. RET