git: 47d997021fbc - main - libcrypto: Switch back to the generated assembly in sys/crypto/openssl
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 29 Aug 2023 21:47:25 UTC
The branch main has been updated by jhb:
URL: https://cgit.FreeBSD.org/src/commit/?id=47d997021fbc7b662e9507deec1897d514d1224c
commit 47d997021fbc7b662e9507deec1897d514d1224c
Author: John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2023-08-29 21:46:44 +0000
Commit: John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-08-29 21:46:44 +0000
libcrypto: Switch back to the generated assembly in sys/crypto/openssl
Reviewed by: markj
Differential Revision: https://reviews.freebsd.org/D41569
---
secure/lib/libcrypto/Makefile | 4 +-
.../lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S | 6390 ---------
secure/lib/libcrypto/arch/aarch64/aesv8-armx.S | 3181 -----
secure/lib/libcrypto/arch/aarch64/arm64cpuid.S | 130 -
secure/lib/libcrypto/arch/aarch64/armv8-mont.S | 2125 ---
secure/lib/libcrypto/arch/aarch64/chacha-armv8.S | 2035 ---
.../libcrypto/arch/aarch64/ecp_nistz256-armv8.S | 4243 ------
secure/lib/libcrypto/arch/aarch64/ghashv8-armx.S | 553 -
.../lib/libcrypto/arch/aarch64/keccak1600-armv8.S | 1010 --
secure/lib/libcrypto/arch/aarch64/poly1305-armv8.S | 864 --
secure/lib/libcrypto/arch/aarch64/sha1-armv8.S | 1212 --
secure/lib/libcrypto/arch/aarch64/sha256-armv8.S | 2052 ---
secure/lib/libcrypto/arch/aarch64/sha512-armv8.S | 1607 ---
secure/lib/libcrypto/arch/aarch64/vpaes-armv8.S | 1197 --
secure/lib/libcrypto/arch/amd64/aes-x86_64.S | 2680 ----
secure/lib/libcrypto/arch/amd64/aesni-gcm-x86_64.S | 811 --
secure/lib/libcrypto/arch/amd64/aesni-mb-x86_64.S | 1610 ---
.../lib/libcrypto/arch/amd64/aesni-sha1-x86_64.S | 3057 -----
.../lib/libcrypto/arch/amd64/aesni-sha256-x86_64.S | 4457 ------
secure/lib/libcrypto/arch/amd64/aesni-x86_64.S | 4507 ------
secure/lib/libcrypto/arch/amd64/bsaes-x86_64.S | 2619 ----
secure/lib/libcrypto/arch/amd64/chacha-x86_64.S | 2215 ---
secure/lib/libcrypto/arch/amd64/cmll-x86_64.S | 1947 ---
secure/lib/libcrypto/arch/amd64/e_padlock-x86_64.S | 1059 --
.../lib/libcrypto/arch/amd64/ecp_nistz256-x86_64.S | 7365 ----------
secure/lib/libcrypto/arch/amd64/ghash-x86_64.S | 1875 ---
.../lib/libcrypto/arch/amd64/keccak1600-x86_64.S | 546 -
secure/lib/libcrypto/arch/amd64/md5-x86_64.S | 705 -
secure/lib/libcrypto/arch/amd64/poly1305-x86_64.S | 2090 ---
secure/lib/libcrypto/arch/amd64/rc4-md5-x86_64.S | 1303 --
secure/lib/libcrypto/arch/amd64/rc4-x86_64.S | 657 -
secure/lib/libcrypto/arch/amd64/rsaz-avx2.S | 1766 ---
secure/lib/libcrypto/arch/amd64/rsaz-avx512.S | 902 --
secure/lib/libcrypto/arch/amd64/rsaz-x86_64.S | 2037 ---
secure/lib/libcrypto/arch/amd64/sha1-mb-x86_64.S | 7325 ----------
secure/lib/libcrypto/arch/amd64/sha1-x86_64.S | 5472 --------
secure/lib/libcrypto/arch/amd64/sha256-mb-x86_64.S | 8006 -----------
secure/lib/libcrypto/arch/amd64/sha256-x86_64.S | 5478 --------
secure/lib/libcrypto/arch/amd64/sha512-x86_64.S | 5483 --------
secure/lib/libcrypto/arch/amd64/vpaes-x86_64.S | 880 --
secure/lib/libcrypto/arch/amd64/wp-x86_64.S | 901 --
secure/lib/libcrypto/arch/amd64/x25519-x86_64.S | 824 --
secure/lib/libcrypto/arch/amd64/x86_64-gf2m.S | 333 -
secure/lib/libcrypto/arch/amd64/x86_64-mont.S | 1261 --
secure/lib/libcrypto/arch/amd64/x86_64-mont5.S | 3625 -----
secure/lib/libcrypto/arch/amd64/x86_64cpuid.S | 513 -
secure/lib/libcrypto/arch/arm/aes-armv4.S | 1198 --
secure/lib/libcrypto/arch/arm/aesv8-armx.S | 1088 --
secure/lib/libcrypto/arch/arm/armv4-gf2m.S | 236 -
secure/lib/libcrypto/arch/arm/armv4-mont.S | 961 --
secure/lib/libcrypto/arch/arm/armv4cpuid.S | 273 -
secure/lib/libcrypto/arch/arm/bsaes-armv7.S | 2561 ----
secure/lib/libcrypto/arch/arm/chacha-armv4.S | 1478 --
secure/lib/libcrypto/arch/arm/ecp_nistz256-armv4.S | 4430 ------
secure/lib/libcrypto/arch/arm/ghash-armv4.S | 565 -
secure/lib/libcrypto/arch/arm/ghashv8-armx.S | 244 -
secure/lib/libcrypto/arch/arm/keccak1600-armv4.S | 2694 ----
secure/lib/libcrypto/arch/arm/poly1305-armv4.S | 1169 --
secure/lib/libcrypto/arch/arm/sha1-armv4-large.S | 1499 --
secure/lib/libcrypto/arch/arm/sha256-armv4.S | 2823 ----
secure/lib/libcrypto/arch/arm/sha512-armv4.S | 1877 ---
secure/lib/libcrypto/arch/i386/aes-586.S | 6644 ---------
secure/lib/libcrypto/arch/i386/aesni-x86.S | 6732 ---------
secure/lib/libcrypto/arch/i386/bf-586.S | 1928 ---
secure/lib/libcrypto/arch/i386/bn-586.S | 3157 -----
secure/lib/libcrypto/arch/i386/cast-586.S | 2002 ---
secure/lib/libcrypto/arch/i386/chacha-x86.S | 2084 ---
secure/lib/libcrypto/arch/i386/cmll-x86.S | 4896 -------
secure/lib/libcrypto/arch/i386/co-586.S | 2584 ----
secure/lib/libcrypto/arch/i386/crypt586.S | 1800 ---
secure/lib/libcrypto/arch/i386/des-586.S | 3932 ------
secure/lib/libcrypto/arch/i386/e_padlock-x86.S | 2300 ----
secure/lib/libcrypto/arch/i386/ecp_nistz256-x86.S | 10584 --------------
secure/lib/libcrypto/arch/i386/ghash-x86.S | 2636 ----
secure/lib/libcrypto/arch/i386/md5-586.S | 1404 --
secure/lib/libcrypto/arch/i386/poly1305-x86.S | 3938 ------
secure/lib/libcrypto/arch/i386/rc4-586.S | 819 --
secure/lib/libcrypto/arch/i386/rc5-586.S | 1264 --
secure/lib/libcrypto/arch/i386/rmd-586.S | 3976 ------
secure/lib/libcrypto/arch/i386/sha1-586.S | 8016 -----------
secure/lib/libcrypto/arch/i386/sha256-586.S | 13612 -------------------
secure/lib/libcrypto/arch/i386/sha512-586.S | 5704 --------
secure/lib/libcrypto/arch/i386/vpaes-x86.S | 1488 --
secure/lib/libcrypto/arch/i386/wp-mmx.S | 2260 ---
secure/lib/libcrypto/arch/i386/x86-gf2m.S | 755 -
secure/lib/libcrypto/arch/i386/x86-mont.S | 995 --
secure/lib/libcrypto/arch/i386/x86cpuid.S | 1217 --
secure/lib/libcrypto/arch/powerpc/aes-ppc.S | 1561 ---
secure/lib/libcrypto/arch/powerpc/aesp8-ppc.S | 3642 -----
secure/lib/libcrypto/arch/powerpc/bn-ppc.S | 1855 ---
secure/lib/libcrypto/arch/powerpc/chacha-ppc.S | 1492 --
secure/lib/libcrypto/arch/powerpc/ghashp8-ppc.S | 569 -
secure/lib/libcrypto/arch/powerpc/poly1305-ppc.S | 1301 --
secure/lib/libcrypto/arch/powerpc/poly1305-ppcfp.S | 586 -
secure/lib/libcrypto/arch/powerpc/ppc-mont.S | 1787 ---
secure/lib/libcrypto/arch/powerpc/ppc.S | 1855 ---
secure/lib/libcrypto/arch/powerpc/ppccpuid.S | 356 -
secure/lib/libcrypto/arch/powerpc/sha1-ppc.S | 1118 --
secure/lib/libcrypto/arch/powerpc/sha256-ppc.S | 1321 --
secure/lib/libcrypto/arch/powerpc/sha256p8-ppc.S | 735 -
secure/lib/libcrypto/arch/powerpc/sha512-ppc.S | 3071 -----
secure/lib/libcrypto/arch/powerpc/sha512p8-ppc.S | 833 --
secure/lib/libcrypto/arch/powerpc/vpaes-ppc.S | 1468 --
secure/lib/libcrypto/arch/powerpc64/aes-ppc.S | 1533 ---
secure/lib/libcrypto/arch/powerpc64/aesp8-ppc.S | 3659 -----
secure/lib/libcrypto/arch/powerpc64/bn-ppc.S | 1876 ---
secure/lib/libcrypto/arch/powerpc64/chacha-ppc.S | 1499 --
.../libcrypto/arch/powerpc64/ecp_nistp521-ppc64.S | 354 -
.../libcrypto/arch/powerpc64/ecp_nistz256-ppc64.S | 4854 -------
secure/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S | 576 -
.../libcrypto/arch/powerpc64/keccak1600-ppc64.S | 670 -
secure/lib/libcrypto/arch/powerpc64/poly1305-ppc.S | 1142 --
.../lib/libcrypto/arch/powerpc64/poly1305-ppcfp.S | 596 -
secure/lib/libcrypto/arch/powerpc64/ppc-mont.S | 1790 ---
secure/lib/libcrypto/arch/powerpc64/ppc.S | 1876 ---
secure/lib/libcrypto/arch/powerpc64/ppccpuid.S | 387 -
secure/lib/libcrypto/arch/powerpc64/sha1-ppc.S | 1121 --
secure/lib/libcrypto/arch/powerpc64/sha256-ppc.S | 1324 --
secure/lib/libcrypto/arch/powerpc64/sha256p8-ppc.S | 738 -
secure/lib/libcrypto/arch/powerpc64/sha512-ppc.S | 1420 --
secure/lib/libcrypto/arch/powerpc64/sha512p8-ppc.S | 836 --
secure/lib/libcrypto/arch/powerpc64/vpaes-ppc.S | 1479 --
secure/lib/libcrypto/arch/powerpc64/x25519-ppc64.S | 349 -
secure/lib/libcrypto/arch/powerpc64le/aes-ppc.S | 1581 ---
secure/lib/libcrypto/arch/powerpc64le/aesp8-ppc.S | 3659 -----
secure/lib/libcrypto/arch/powerpc64le/bn-ppc.S | 1876 ---
secure/lib/libcrypto/arch/powerpc64le/chacha-ppc.S | 1371 --
.../arch/powerpc64le/ecp_nistp521-ppc64.S | 354 -
.../arch/powerpc64le/ecp_nistz256-ppc64.S | 4854 -------
.../lib/libcrypto/arch/powerpc64le/ghashp8-ppc.S | 576 -
.../libcrypto/arch/powerpc64le/keccak1600-ppc64.S | 670 -
.../lib/libcrypto/arch/powerpc64le/poly1305-ppc.S | 1128 --
.../libcrypto/arch/powerpc64le/poly1305-ppcfp.S | 591 -
secure/lib/libcrypto/arch/powerpc64le/ppc-mont.S | 1790 ---
secure/lib/libcrypto/arch/powerpc64le/ppc.S | 1876 ---
secure/lib/libcrypto/arch/powerpc64le/ppccpuid.S | 387 -
secure/lib/libcrypto/arch/powerpc64le/sha1-ppc.S | 1169 --
secure/lib/libcrypto/arch/powerpc64le/sha256-ppc.S | 1372 --
.../lib/libcrypto/arch/powerpc64le/sha256p8-ppc.S | 746 -
secure/lib/libcrypto/arch/powerpc64le/sha512-ppc.S | 1516 ---
.../lib/libcrypto/arch/powerpc64le/sha512p8-ppc.S | 848 --
secure/lib/libcrypto/arch/powerpc64le/vpaes-ppc.S | 1479 --
.../lib/libcrypto/arch/powerpc64le/x25519-ppc64.S | 349 -
secure/lib/libcrypto/engines/padlock/Makefile | 2 +-
secure/lib/libcrypto/modules/fips/Makefile | 4 +-
145 files changed, 5 insertions(+), 310557 deletions(-)
diff --git a/secure/lib/libcrypto/Makefile b/secure/lib/libcrypto/Makefile
index ab9044ad67f9..585e89861815 100644
--- a/secure/lib/libcrypto/Makefile
+++ b/secure/lib/libcrypto/Makefile
@@ -618,12 +618,12 @@ buildasm cleanasm:
PICFLAG+= -DOPENSSL_PIC
.if defined(ASM_${MACHINE_CPUARCH})
-.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_CPUARCH}
+.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_CPUARCH}
.if defined(ASM_amd64)
.PATH: ${LCRYPTO_SRC}/crypto/bn/asm
.endif
.elif defined(ASM_${MACHINE_ARCH})
-.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_ARCH}
+.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_ARCH}
.endif
.PATH: ${LCRYPTO_SRC}/crypto \
diff --git a/secure/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S b/secure/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S
deleted file mode 100644
index eb85dbc9f996..000000000000
--- a/secure/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S
+++ /dev/null
@@ -1,6390 +0,0 @@
-/* Do not modify. This file is auto-generated from aes-gcm-armv8_64.pl. */
-#include "arm_arch.h"
-
-#if __ARM_MAX_ARCH__>=8
-.arch armv8-a+crypto
-.text
-.globl aes_gcm_enc_128_kernel
-.type aes_gcm_enc_128_kernel,%function
-.align 4
-aes_gcm_enc_128_kernel:
- cbz x1, .L128_enc_ret
- stp x19, x20, [sp, #-112]!
- mov x16, x4
- mov x8, x5
- stp x21, x22, [sp, #16]
- stp x23, x24, [sp, #32]
- stp d8, d9, [sp, #48]
- stp d10, d11, [sp, #64]
- stp d12, d13, [sp, #80]
- stp d14, d15, [sp, #96]
-
- ldp x10, x11, [x16] //ctr96_b64, ctr96_t32
-#ifdef __AARCH64EB__
- rev x10, x10
- rev x11, x11
-#endif
- ldp x13, x14, [x8, #160] //load rk10
-#ifdef __AARCH64EB__
- ror x13, x13, #32
- ror x14, x14, #32
-#endif
- ld1 {v11.16b}, [x3]
- ext v11.16b, v11.16b, v11.16b, #8
- rev64 v11.16b, v11.16b
- lsr x5, x1, #3 //byte_len
- mov x15, x5
-
- ld1 {v18.4s}, [x8], #16 //load rk0
- add x4, x0, x1, lsr #3 //end_input_ptr
- sub x5, x5, #1 //byte_len - 1
-
- lsr x12, x11, #32
- ldr q15, [x3, #112] //load h4l | h4h
-#ifndef __AARCH64EB__
- ext v15.16b, v15.16b, v15.16b, #8
-#endif
- fmov d1, x10 //CTR block 1
- rev w12, w12 //rev_ctr32
-
- add w12, w12, #1 //increment rev_ctr32
- orr w11, w11, w11
- ld1 {v19.4s}, [x8], #16 //load rk1
-
- rev w9, w12 //CTR block 1
- add w12, w12, #1 //CTR block 1
- fmov d3, x10 //CTR block 3
-
- orr x9, x11, x9, lsl #32 //CTR block 1
- ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible
-
- fmov v1.d[1], x9 //CTR block 1
- rev w9, w12 //CTR block 2
-
- fmov d2, x10 //CTR block 2
- orr x9, x11, x9, lsl #32 //CTR block 2
- add w12, w12, #1 //CTR block 2
-
- fmov v2.d[1], x9 //CTR block 2
- rev w9, w12 //CTR block 3
-
- orr x9, x11, x9, lsl #32 //CTR block 3
- ld1 {v20.4s}, [x8], #16 //load rk2
-
- add w12, w12, #1 //CTR block 3
- fmov v3.d[1], x9 //CTR block 3
-
- ldr q14, [x3, #80] //load h3l | h3h
-#ifndef __AARCH64EB__
- ext v14.16b, v14.16b, v14.16b, #8
-#endif
- aese v1.16b, v18.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 0
- ld1 {v21.4s}, [x8], #16 //load rk3
-
- aese v2.16b, v18.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 0
- ldr q12, [x3, #32] //load h1l | h1h
-#ifndef __AARCH64EB__
- ext v12.16b, v12.16b, v12.16b, #8
-#endif
-
- aese v0.16b, v18.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 0
- ld1 {v22.4s}, [x8], #16 //load rk4
-
- aese v3.16b, v18.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 0
- ld1 {v23.4s}, [x8], #16 //load rk5
-
- aese v2.16b, v19.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 1
- trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
-
- aese v0.16b, v19.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 1
- ld1 {v24.4s}, [x8], #16 //load rk6
-
- aese v1.16b, v19.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 1
- ld1 {v25.4s}, [x8], #16 //load rk7
-
- aese v3.16b, v19.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 1
- trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
-
- aese v0.16b, v20.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 2
- ld1 {v26.4s}, [x8], #16 //load rk8
-
- aese v1.16b, v20.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 2
- ldr q13, [x3, #64] //load h2l | h2h
-#ifndef __AARCH64EB__
- ext v13.16b, v13.16b, v13.16b, #8
-#endif
-
- aese v3.16b, v20.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 2
-
- aese v2.16b, v20.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 2
- eor v17.16b, v17.16b, v9.16b //h4k | h3k
-
- aese v0.16b, v21.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 3
-
- aese v1.16b, v21.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 3
-
- aese v2.16b, v21.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 3
- ld1 {v27.4s}, [x8], #16 //load rk9
-
- aese v3.16b, v21.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 3
-
- and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
- trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
-
- aese v3.16b, v22.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 4
- add x5, x5, x0
-
- aese v2.16b, v22.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 4
- cmp x0, x5 //check if we have <= 4 blocks
-
- aese v0.16b, v22.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 4
-
- aese v3.16b, v23.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 5
-
- aese v2.16b, v23.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 5
-
- aese v0.16b, v23.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 5
-
- aese v3.16b, v24.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 6
-
- aese v1.16b, v22.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 4
-
- aese v2.16b, v24.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 6
- trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
-
- aese v0.16b, v24.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 6
-
- aese v1.16b, v23.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 5
-
- aese v3.16b, v25.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 7
-
- aese v0.16b, v25.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 7
-
- aese v1.16b, v24.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 6
-
- aese v2.16b, v25.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 7
-
- aese v0.16b, v26.16b
- aesmc v0.16b, v0.16b //AES block 0 - round 8
-
- aese v1.16b, v25.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 7
-
- aese v2.16b, v26.16b
- aesmc v2.16b, v2.16b //AES block 2 - round 8
-
- aese v3.16b, v26.16b
- aesmc v3.16b, v3.16b //AES block 3 - round 8
-
- aese v1.16b, v26.16b
- aesmc v1.16b, v1.16b //AES block 1 - round 8
-
- aese v2.16b, v27.16b //AES block 2 - round 9
-
- aese v0.16b, v27.16b //AES block 0 - round 9
-
- eor v16.16b, v16.16b, v8.16b //h2k | h1k
-
- aese v1.16b, v27.16b //AES block 1 - round 9
-
- aese v3.16b, v27.16b //AES block 3 - round 9
- b.ge .L128_enc_tail //handle tail
-
- ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
-#ifdef __AARCH64EB__
- rev x6, x6
- rev x7, x7
-#endif
- ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
-#ifdef __AARCH64EB__
- rev x21, x21
- rev x22, x22
-#endif
- ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
-#ifdef __AARCH64EB__
- rev x19, x19
- rev x20, x20
-#endif
- ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
-#ifdef __AARCH64EB__
- rev x23, x23
- rev x24, x24
-#endif
- eor x6, x6, x13 //AES block 0 - round 10 low
- eor x7, x7, x14 //AES block 0 - round 10 high
-
- eor x21, x21, x13 //AES block 2 - round 10 low
- fmov d4, x6 //AES block 0 - mov low
-
- eor x19, x19, x13 //AES block 1 - round 10 low
- eor x22, x22, x14 //AES block 2 - round 10 high
- fmov v4.d[1], x7 //AES block 0 - mov high
-
- fmov d5, x19 //AES block 1 - mov low
- eor x20, x20, x14 //AES block 1 - round 10 high
-
- eor x23, x23, x13 //AES block 3 - round 10 low
- fmov v5.d[1], x20 //AES block 1 - mov high
-
- fmov d6, x21 //AES block 2 - mov low
- eor x24, x24, x14 //AES block 3 - round 10 high
- rev w9, w12 //CTR block 4
-
- fmov v6.d[1], x22 //AES block 2 - mov high
- orr x9, x11, x9, lsl #32 //CTR block 4
-
- eor v4.16b, v4.16b, v0.16b //AES block 0 - result
- fmov d0, x10 //CTR block 4
- add w12, w12, #1 //CTR block 4
-
- fmov v0.d[1], x9 //CTR block 4
- rev w9, w12 //CTR block 5
-
- eor v5.16b, v5.16b, v1.16b //AES block 1 - result
- fmov d1, x10 //CTR block 5
- orr x9, x11, x9, lsl #32 //CTR block 5
-
- add w12, w12, #1 //CTR block 5
- add x0, x0, #64 //AES input_ptr update
- fmov v1.d[1], x9 //CTR block 5
-
- fmov d7, x23 //AES block 3 - mov low
- rev w9, w12 //CTR block 6
- st1 { v4.16b}, [x2], #16 //AES block 0 - store result
-
- fmov v7.d[1], x24 //AES block 3 - mov high
- orr x9, x11, x9, lsl #32 //CTR block 6
-
- add w12, w12, #1 //CTR block 6
- eor v6.16b, v6.16b, v2.16b //AES block 2 - result
- st1 { v5.16b}, [x2], #16 //AES block 1 - store result
-
- fmov d2, x10 //CTR block 6
- cmp x0, x5 //check if we have <= 8 blocks
-
- fmov v2.d[1], x9 //CTR block 6
- rev w9, w12 //CTR block 7
- st1 { v6.16b}, [x2], #16 //AES block 2 - store result
-
- orr x9, x11, x9, lsl #32 //CTR block 7
-
- eor v7.16b, v7.16b, v3.16b //AES block 3 - result
- st1 { v7.16b}, [x2], #16 //AES block 3 - store result
- b.ge .L128_enc_prepretail //do prepretail
-
-.L128_enc_main_loop: //main loop start
- ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext
-#ifdef __AARCH64EB__
- rev x23, x23
- rev x24, x24
-#endif
- rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
- rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
-
- aese v2.16b, v18.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
- fmov d3, x10 //CTR block 4k+3
-
- ext v11.16b, v11.16b, v11.16b, #8 //PRE 0
- rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
-
- aese v1.16b, v18.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
- add w12, w12, #1 //CTR block 4k+3
- fmov v3.d[1], x9 //CTR block 4k+3
-
- aese v0.16b, v18.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
- mov d31, v6.d[1] //GHASH block 4k+2 - mid
-
- aese v2.16b, v19.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
- mov d30, v5.d[1] //GHASH block 4k+1 - mid
-
- aese v1.16b, v19.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
- eor v4.16b, v4.16b, v11.16b //PRE 1
-
- aese v3.16b, v18.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
- eor x24, x24, x14 //AES block 4k+3 - round 10 high
-
- pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
- eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
- ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
-#ifdef __AARCH64EB__
- rev x6, x6
- rev x7, x7
-#endif
- aese v0.16b, v19.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
- rev w9, w12 //CTR block 4k+8
-
- eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
- mov d8, v4.d[1] //GHASH block 4k - mid
- orr x9, x11, x9, lsl #32 //CTR block 4k+8
-
- pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
- add w12, w12, #1 //CTR block 4k+8
- mov d10, v17.d[1] //GHASH block 4k - mid
-
- aese v0.16b, v20.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
-
- pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
- eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
-
- aese v1.16b, v20.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
-
- aese v0.16b, v21.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
- eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
-
- pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
-
- pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
- rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
-
- pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
-
- pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
- ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
-
- pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
- eor x7, x7, x14 //AES block 4k+4 - round 10 high
-
- eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
- mov d30, v7.d[1] //GHASH block 4k+3 - mid
-
- aese v3.16b, v19.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
- eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
-
- aese v2.16b, v20.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
- eor x6, x6, x13 //AES block 4k+4 - round 10 low
-
- aese v1.16b, v21.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
- eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
-
- pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
-
- aese v2.16b, v21.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
- eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
-
- pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
-
- pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
- movi v8.8b, #0xc2
-
- pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
- eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
-
- aese v1.16b, v22.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
-
- aese v3.16b, v20.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
- shl d8, d8, #56 //mod_constant
-
- aese v0.16b, v22.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
- eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
-
- aese v1.16b, v23.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
- ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
-#ifdef __AARCH64EB__
- rev x19, x19
- rev x20, x20
-#endif
- aese v3.16b, v21.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
- eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
-
- aese v0.16b, v23.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
- ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
-#ifdef __AARCH64EB__
- rev x21, x21
- rev x22, x22
-#endif
- pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
- eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
-
- aese v2.16b, v22.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
- eor x19, x19, x13 //AES block 4k+5 - round 10 low
-
- aese v3.16b, v22.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
- eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
-
- aese v1.16b, v24.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
- eor x23, x23, x13 //AES block 4k+3 - round 10 low
-
- aese v2.16b, v23.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
- eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
-
- fmov d4, x6 //AES block 4k+4 - mov low
- aese v0.16b, v24.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
- fmov v4.d[1], x7 //AES block 4k+4 - mov high
-
- add x0, x0, #64 //AES input_ptr update
- fmov d7, x23 //AES block 4k+3 - mov low
- ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
-
- aese v3.16b, v23.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
- fmov d5, x19 //AES block 4k+5 - mov low
-
- aese v0.16b, v25.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
- eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
-
- aese v2.16b, v24.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
- eor x20, x20, x14 //AES block 4k+5 - round 10 high
-
- aese v1.16b, v25.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
- fmov v5.d[1], x20 //AES block 4k+5 - mov high
-
- aese v0.16b, v26.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
- fmov v7.d[1], x24 //AES block 4k+3 - mov high
-
- aese v3.16b, v24.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
- cmp x0, x5 //.LOOP CONTROL
-
- aese v1.16b, v26.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
- eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
-
- aese v0.16b, v27.16b //AES block 4k+4 - round 9
- eor x21, x21, x13 //AES block 4k+6 - round 10 low
- eor x22, x22, x14 //AES block 4k+6 - round 10 high
-
- aese v3.16b, v25.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
- fmov d6, x21 //AES block 4k+6 - mov low
-
- aese v1.16b, v27.16b //AES block 4k+5 - round 9
- fmov v6.d[1], x22 //AES block 4k+6 - mov high
-
- aese v2.16b, v25.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
- eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
-
- fmov d0, x10 //CTR block 4k+8
- aese v3.16b, v26.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
-
- fmov v0.d[1], x9 //CTR block 4k+8
- rev w9, w12 //CTR block 4k+9
- eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
-
- aese v2.16b, v26.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
- eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
-
- add w12, w12, #1 //CTR block 4k+9
- orr x9, x11, x9, lsl #32 //CTR block 4k+9
- fmov d1, x10 //CTR block 4k+9
-
- pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
- fmov v1.d[1], x9 //CTR block 4k+9
- rev w9, w12 //CTR block 4k+10
-
- aese v2.16b, v27.16b //AES block 4k+6 - round 9
- st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
- eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
- orr x9, x11, x9, lsl #32 //CTR block 4k+10
-
- aese v3.16b, v27.16b //AES block 4k+7 - round 9
- add w12, w12, #1 //CTR block 4k+10
- ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
- fmov d2, x10 //CTR block 4k+10
-
- eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
- st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
-
- fmov v2.d[1], x9 //CTR block 4k+10
- st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
- rev w9, w12 //CTR block 4k+11
-
- orr x9, x11, x9, lsl #32 //CTR block 4k+11
- eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result
-
- eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
- st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result
- b.lt .L128_enc_main_loop
-
-.L128_enc_prepretail: //PREPRETAIL
- rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
- fmov d3, x10 //CTR block 4k+3
- rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
-
- ext v11.16b, v11.16b, v11.16b, #8 //PRE 0
- add w12, w12, #1 //CTR block 4k+3
- fmov v3.d[1], x9 //CTR block 4k+3
-
- aese v1.16b, v18.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
- rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
-
- pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
-
- rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
- eor v4.16b, v4.16b, v11.16b //PRE 1
-
- pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
-
- aese v3.16b, v18.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
- mov d30, v5.d[1] //GHASH block 4k+1 - mid
-
- pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
- mov d8, v4.d[1] //GHASH block 4k - mid
-
- mov d31, v6.d[1] //GHASH block 4k+2 - mid
- mov d10, v17.d[1] //GHASH block 4k - mid
-
- aese v1.16b, v19.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
- eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
-
- eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
-
- pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
- eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
-
- aese v3.16b, v19.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
-
- pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
- eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
-
- pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
-
- aese v0.16b, v18.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
- ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
-
- aese v2.16b, v18.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
-
- eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
- mov d30, v7.d[1] //GHASH block 4k+3 - mid
-
- aese v0.16b, v19.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
- eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
-
- pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
-
- pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
- eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
-
- pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
-
- pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
-
- aese v2.16b, v19.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
- eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
-
- aese v0.16b, v20.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
-
- pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
- movi v8.8b, #0xc2
-
- aese v2.16b, v20.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
- eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
-
- aese v3.16b, v20.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
-
- pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
- eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
-
- aese v2.16b, v21.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
-
- aese v1.16b, v20.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
- eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
-
- aese v0.16b, v21.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
-
- eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
- shl d8, d8, #56 //mod_constant
-
- aese v1.16b, v21.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
- eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
-
- aese v0.16b, v22.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
-
- pmull v28.1q, v9.1d, v8.1d
- eor v10.16b, v10.16b, v9.16b //karatsuba tidy up
-
- aese v1.16b, v22.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
-
- aese v0.16b, v23.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
- ext v9.16b, v9.16b, v9.16b, #8
-
- aese v3.16b, v21.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
-
- aese v2.16b, v22.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
- eor v10.16b, v10.16b, v11.16b
-
- aese v0.16b, v24.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
-
- aese v3.16b, v22.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
-
- aese v1.16b, v23.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
-
- aese v2.16b, v23.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
- eor v10.16b, v10.16b, v28.16b
-
- aese v3.16b, v23.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
-
- aese v1.16b, v24.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
-
- aese v2.16b, v24.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
-
- aese v3.16b, v24.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
- eor v10.16b, v10.16b, v9.16b
-
- aese v0.16b, v25.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
-
- aese v2.16b, v25.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
-
- aese v3.16b, v25.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
-
- pmull v28.1q, v10.1d, v8.1d
-
- aese v1.16b, v25.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
- ext v10.16b, v10.16b, v10.16b, #8
-
- aese v3.16b, v26.16b
- aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
-
- aese v0.16b, v26.16b
- aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
- eor v11.16b, v11.16b, v28.16b
-
- aese v1.16b, v26.16b
- aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
-
- aese v3.16b, v27.16b //AES block 4k+7 - round 9
-
- aese v2.16b, v26.16b
- aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
-
- aese v0.16b, v27.16b //AES block 4k+4 - round 9
-
- aese v1.16b, v27.16b //AES block 4k+5 - round 9
- eor v11.16b, v11.16b, v10.16b
-
- aese v2.16b, v27.16b //AES block 4k+6 - round 9
-.L128_enc_tail: //TAIL
-
- sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process
- ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
-#ifdef __AARCH64EB__
- rev x6, x6
- rev x7, x7
-#endif
- cmp x5, #48
-
- ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag
- eor x6, x6, x13 //AES block 4k+4 - round 10 low
- eor x7, x7, x14 //AES block 4k+4 - round 10 high
-
- fmov d4, x6 //AES block 4k+4 - mov low
-
- fmov v4.d[1], x7 //AES block 4k+4 - mov high
-
- eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
-
- b.gt .L128_enc_blocks_more_than_3
-
- sub w12, w12, #1
- movi v11.8b, #0
- mov v3.16b, v2.16b
-
- cmp x5, #32
- mov v2.16b, v1.16b
- movi v9.8b, #0
-
- movi v10.8b, #0
- b.gt .L128_enc_blocks_more_than_2
-
- mov v3.16b, v1.16b
- cmp x5, #16
-
- sub w12, w12, #1
- b.gt .L128_enc_blocks_more_than_1
-
- sub w12, w12, #1
- b .L128_enc_blocks_less_than_1
-.L128_enc_blocks_more_than_3: //blocks left > 3
- st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
-
- ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
-#ifdef __AARCH64EB__
- rev x6, x6
- rev x7, x7
-#endif
- rev64 v4.16b, v5.16b //GHASH final-3 block
*** 310630 LINES SKIPPED ***