crc32_amd64p32.s 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build gc
  5. #define NOSPLIT 4
  6. #define RODATA 8
  7. // func castagnoliSSE42(crc uint32, p []byte) uint32
  8. TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
  9. MOVL crc+0(FP), AX // CRC value
  10. MOVL p+4(FP), SI // data pointer
  11. MOVL p_len+8(FP), CX // len(p)
  12. NOTL AX
  13. // If there's less than 8 bytes to process, we do it byte-by-byte.
  14. CMPQ CX, $8
  15. JL cleanup
  16. // Process individual bytes until the input is 8-byte aligned.
  17. startup:
  18. MOVQ SI, BX
  19. ANDQ $7, BX
  20. JZ aligned
  21. CRC32B (SI), AX
  22. DECQ CX
  23. INCQ SI
  24. JMP startup
  25. aligned:
  26. // The input is now 8-byte aligned and we can process 8-byte chunks.
  27. CMPQ CX, $8
  28. JL cleanup
  29. CRC32Q (SI), AX
  30. ADDQ $8, SI
  31. SUBQ $8, CX
  32. JMP aligned
  33. cleanup:
  34. // We may have some bytes left over that we process one at a time.
  35. CMPQ CX, $0
  36. JE done
  37. CRC32B (SI), AX
  38. INCQ SI
  39. DECQ CX
  40. JMP cleanup
  41. done:
  42. NOTL AX
  43. MOVL AX, ret+16(FP)
  44. RET
  45. // func haveSSE42() bool
  46. TEXT ·haveSSE42(SB), NOSPLIT, $0
  47. XORQ AX, AX
  48. INCL AX
  49. CPUID
  50. SHRQ $20, CX
  51. ANDQ $1, CX
  52. MOVB CX, ret+0(FP)
  53. RET