/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
.text
#define IV_OFFSET
256
/*
* Warning: the length values used in this module are "unsigned int"
* in C, which is 32-bit. When they're passed in registers, use only
* the low 32 bits, because the top half is unspecified.
*
* This is called from C code, so the contents of those bits can
* depend on the C compiler's optimization decisions. This means that
* mistakes might not be obvious in testing if those bits happen to be
* zero in your build.
*
* Exception: 32-bit lea instructions use a 64-bit address because the
* address size doesn't affect the result, and that form is more
* compactly encoded and preferred by compilers over a 32-bit address.
*/
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_128,@function
.globl intel_aes_encrypt_init_128
.
align 16
intel_aes_encrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq
16 (%rsi), %rsi
xorl %eax, %eax
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x01
/* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x02
/* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x04
/* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x08
/* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x10
/* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x20
/* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x40
/* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x80
/* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x1b
/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x36
/* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.
size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_128,@function
.globl intel_aes_decrypt_init_128
.
align 16
intel_aes_decrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq
16 (%rsi), %rsi
xorl %eax, %eax
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x01
/* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x02
/* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x04
/* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x08
/* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x10
/* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x20
/* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x40
/* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x80
/* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x1b
/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd1,
0 x36
/* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.
size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
.type key_expansion128,@function
.
align 16
key_expansion128:
movd %eax, %xmm3
pshufd $
0 xff, %xmm2, %xmm2
shufps $
0 x10, %xmm1, %xmm3
pxor %xmm3, %xmm1
shufps $
0 x8c, %xmm1, %xmm3
pxor %xmm2, %xmm1
pxor %xmm3, %xmm1
movdqu %xmm1, (%rsi)
addq $
16 , %rsi
ret
.
size key_expansion128, .-key_expansion128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_128,@function
.globl intel_aes_encrypt_ecb_128
.
align 16
intel_aes_encrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu
160 (%rdi), %xmm12
xor %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
// complete loop unrolling
movdqu
16 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xdc
/* aesenclast %xmm12, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xe4
/* aesenclast %xmm12, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xec
/* aesenclast %xmm12, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xf4
/* aesenclast %xmm12, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xfc
/* aesenclast %xmm12, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xc4
/* aesenclast %xmm12, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xcc
/* aesenclast %xmm12, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xd4
/* aesenclast %xmm12, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xcc
/* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_128,@function
.globl intel_aes_decrypt_ecb_128
.
align 16
intel_aes_decrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu
160 (%rdi), %xmm12
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
// complete loop unrolling
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm12, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_128,@function
.globl intel_aes_encrypt_cbc_128
.
align 16
intel_aes_encrypt_cbc_128:
testl %r9d, %r9d
je
2 f
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
movdqu
160 (%rdi), %xmm12
xorl %eax, %eax
1 : movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmma, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmmb, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xcc
/* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addl $
16 , %eax
cmpl %eax, %r9d
jne
1 b
movdqu %xmm0, (%rdx)
2 : xor %eax, %eax
ret
.
size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_128,@function
.globl intel_aes_decrypt_cbc_128
.
align 16
intel_aes_decrypt_cbc_128:
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
/* iv */
movdqu (%rdi), %xmm2
/* first key block */
movdqu
160 (%rdi), %xmm12
/* last key block */
xorl %eax, %eax
cmpl $
128 , %r9d
jb
1 f
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
/* 1st data block */
movdqu
16 (%r8, %rax), %xmm4
/* 2d data block */
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
// complete loop unrolling
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu
16 (%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu
32 (%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu
48 (%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu
64 (%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu
80 (%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu
96 (%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu
112 (%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
4 : movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm13
pxor %xmm12, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm13, %xmm0
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.
size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_192,@function
.globl intel_aes_encrypt_init_192
.
align 16
intel_aes_encrypt_init_192:
movdqu (%rdi), %xmm1
movq
16 (%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3,
16 (%rsi)
leaq
24 (%rsi), %rsi
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x80
/* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.
size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_192,@function
.globl intel_aes_decrypt_init_192
.
align 16
intel_aes_decrypt_init_192:
movdqu (%rdi), %xmm1
movq
16 (%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3,
16 (%rsi)
leaq
24 (%rsi), %rsi
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
movups -
32 (%rsi), %xmm2
movups -
16 (%rsi), %xmm4
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd2
/* aesimc %xmm2, %xmm2 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -
32 (%rsi)
movups %xmm4, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
24 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
movups -
32 (%rsi), %xmm2
movups -
16 (%rsi), %xmm4
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd2
/* aesimc %xmm2, %xmm2 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -
32 (%rsi)
movups %xmm4, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
24 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
movups -
32 (%rsi), %xmm2
movups -
16 (%rsi), %xmm4
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd2
/* aesimc %xmm2, %xmm2 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -
32 (%rsi)
movups %xmm4, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -
24 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
movups -
32 (%rsi), %xmm2
movups -
16 (%rsi), %xmm4
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xd2
/* aesimc %xmm2, %xmm2 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -
32 (%rsi)
movups %xmm4, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x80
/* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.
size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
.type key_expansion192,@function
.
align 16
key_expansion192:
pshufd $
0 x55, %xmm2, %xmm2
xor %eax, %eax
movd %eax, %xmm4
shufps $
0 x10, %xmm1, %xmm4
pxor %xmm4, %xmm1
shufps $
0 x8c, %xmm1, %xmm4
pxor %xmm2, %xmm1
pxor %xmm4, %xmm1
movdqu %xmm1, (%rsi)
addq $
16 , %rsi
pshufd $
0 xff, %xmm1, %xmm4
movd %eax, %xmm5
shufps $
0 x00, %xmm3, %xmm5
shufps $
0 x08, %xmm3, %xmm5
pxor %xmm4, %xmm3
pxor %xmm5, %xmm3
movq %xmm3, (%rsi)
addq $
8 , %rsi
ret
.
size key_expansion192, .-key_expansion192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_192,@function
.globl intel_aes_encrypt_ecb_192
.
align 16
intel_aes_encrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu
192 (%rdi), %xmm14
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
// complete loop unrolling
movdqu
16 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
176 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xde
/* aesenclast %xmm14, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xe6
/* aesenclast %xmm14, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xee
/* aesenclast %xmm14, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xf6
/* aesenclast %xmm14, %xmm7 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xfe
/* aesenclast %xmm14, %xmm3 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xc6
/* aesenclast %xmm14, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xce
/* aesenclast %xmm14, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xd6
/* aesenclast %xmm14, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
movdqu
160 (%rdi), %xmm12
movdqu
176 (%rdi), %xmm13
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xce
/* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_192,@function
.globl intel_aes_decrypt_ecb_192
.
align 16
intel_aes_decrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu
192 (%rdi), %xmm14
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
// complete loop unrolling
movdqu
176 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
movdqu
160 (%rdi), %xmm12
movdqu
176 (%rdi), %xmm13
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm14, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_192,@function
.globl intel_aes_encrypt_cbc_192
.
align 16
intel_aes_encrypt_cbc_192:
testl %r9d, %r9d
je
2 f
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
movdqu
160 (%rdi), %xmm12
movdqu
176 (%rdi), %xmm13
movdqu
192 (%rdi), %xmm14
xorl %eax, %eax
1 : movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xce
/* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addl $
16 , %eax
cmpl %eax, %r9d
jne
1 b
movdqu %xmm0, (%rdx)
2 : xor %eax, %eax
ret
.
size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %exx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_192,@function
.globl intel_aes_decrypt_cbc_192
.
align 16
intel_aes_decrypt_cbc_192:
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu
192 (%rdi), %xmm14
xorl %eax, %eax
cmpl $
128 , %r9d
jb
1 f
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
// complete loop unrolling
movdqu
176 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu
16 (%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu
32 (%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu
48 (%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu
64 (%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu
80 (%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu
96 (%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu
112 (%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm3
movdqu
32 (%rdi), %xmm4
movdqu
48 (%rdi), %xmm5
movdqu
64 (%rdi), %xmm6
movdqu
80 (%rdi), %xmm7
movdqu
96 (%rdi), %xmm8
movdqu
112 (%rdi), %xmm9
movdqu
128 (%rdi), %xmm10
movdqu
144 (%rdi), %xmm11
movdqu
160 (%rdi), %xmm12
movdqu
176 (%rdi), %xmm13
4 : movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm15
pxor %xmm14, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm15, %xmm0
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.
size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_256,@function
.globl intel_aes_encrypt_init_256
.
align 16
intel_aes_encrypt_init_256:
movdqu (%rdi), %xmm1
movdqu
16 (%rdi), %xmm3
movdqu %xmm1, (%rsi)
movdqu %xmm3,
16 (%rsi)
leaq
32 (%rsi), %rsi
xor %eax, %eax
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
pxor %xmm6, %xmm6
pshufd $
0 xff, %xmm2, %xmm2
shufps $
0 x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $
0 x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
ret
.
size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_256,@function
.globl intel_aes_decrypt_init_256
.
align 16
intel_aes_decrypt_init_256:
movdqu (%rdi), %xmm1
movdqu
16 (%rdi), %xmm3
movdqu %xmm1, (%rsi)
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe3
/* aesimc %xmm3, %xmm4 */
movdqu %xmm4,
16 (%rsi)
leaq
32 (%rsi), %rsi
xor %eax, %eax
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion256
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xe1
/* aesimc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdb,
0 xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -
32 (%rsi)
movdqu %xmm5, -
16 (%rsi)
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xd3,
0 x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
pxor %xmm6, %xmm6
pshufd $
0 xff, %xmm2, %xmm2
shufps $
0 x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $
0 x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
ret
.
size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
.type key_expansion256,@function
.
align 16
key_expansion256:
movd %eax, %xmm6
pshufd $
0 xff, %xmm2, %xmm2
shufps $
0 x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $
0 x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
addq $
16 , %rsi
.byte
0 x66,
0 x0f,
0 x3a,
0 xdf,
0 xe1,
0 x00
/* aeskeygenassist $0, %xmm1, %xmm4 */
pshufd $
0 xaa, %xmm4, %xmm4
shufps $
0 x10, %xmm3, %xmm6
pxor %xmm6, %xmm3
shufps $
0 x8c, %xmm3, %xmm6
pxor %xmm4, %xmm3
pxor %xmm6, %xmm3
movdqu %xmm3, (%rsi)
addq $
16 , %rsi
ret
.
size key_expansion256, .-key_expansion256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_256,@function
.globl intel_aes_encrypt_ecb_256
.
align 16
intel_aes_encrypt_ecb_256:
movdqu (%rdi), %xmm2
movdqu
224 (%rdi), %xmm15
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
// complete loop unrolling
movdqu
16 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
176 (%rdi), %xmm1
movdqu
192 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xdb
/* aesenc %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xe3
/* aesenc %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xeb
/* aesenc %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xf3
/* aesenc %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xfb
/* aesenc %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xc3
/* aesenc %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdc,
0 xd3
/* aesenc %xmm11, %xmm10 */
movdqu
208 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xd9
/* aesenc %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe1
/* aesenc %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xe9
/* aesenc %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf1
/* aesenc %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xf9
/* aesenc %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc1
/* aesenc %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdc,
0 xd1
/* aesenc %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xdf
/* aesenclast %xmm15, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xe7
/* aesenclast %xmm15, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xef
/* aesenclast %xmm15, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xf7
/* aesenclast %xmm15, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xff
/* aesenclast %xmm15, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xc7
/* aesenclast %xmm15, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xcf
/* aesenclast %xmm15, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xdd,
0 xd7
/* aesenclast %xmm15, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu (%rdi), %xmm8
movdqu
16 (%rdi), %xmm2
movdqu
32 (%rdi), %xmm3
movdqu
48 (%rdi), %xmm4
movdqu
64 (%rdi), %xmm5
movdqu
80 (%rdi), %xmm6
movdqu
96 (%rdi), %xmm7
movdqu
128 (%rdi), %xmm9
movdqu
144 (%rdi), %xmm10
movdqu
160 (%rdi), %xmm11
movdqu
176 (%rdi), %xmm12
movdqu
192 (%rdi), %xmm13
movdqu
208 (%rdi), %xmm14
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm8, %xmm1
movdqu
112 (%rdi), %xmm8
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm2, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm14, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xcf
/* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_256,@function
.globl intel_aes_decrypt_ecb_256
.
align 16
intel_aes_decrypt_ecb_256:
movdqu (%rdi), %xmm2
movdqu
224 (%rdi), %xmm15
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm15, %xmm3
pxor %xmm15, %xmm4
pxor %xmm15, %xmm5
pxor %xmm15, %xmm6
pxor %xmm15, %xmm7
pxor %xmm15, %xmm8
pxor %xmm15, %xmm9
pxor %xmm15, %xmm10
// complete loop unrolling
movdqu
208 (%rdi), %xmm1
movdqu
192 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
176 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm2
movdqu
32 (%rdi), %xmm3
movdqu
48 (%rdi), %xmm4
movdqu
64 (%rdi), %xmm5
movdqu
80 (%rdi), %xmm6
movdqu
96 (%rdi), %xmm7
movdqu
112 (%rdi), %xmm8
movdqu
128 (%rdi), %xmm9
movdqu
144 (%rdi), %xmm10
movdqu
160 (%rdi), %xmm11
movdqu
176 (%rdi), %xmm12
movdqu
192 (%rdi), %xmm13
movdqu
208 (%rdi), %xmm14
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm15, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm14, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm2, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdf,
0 xc8
/* aesdeclast %xmm8, %xmm1 */
movdqu
112 (%rdi), %xmm8
movdqu %xmm1, (%rsi, %rax)
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : xor %eax, %eax
ret
.
size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_256,@function
.globl intel_aes_encrypt_cbc_256
.
align 16
intel_aes_encrypt_cbc_256:
testl %r9d, %r9d
je
2 f
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm8
movdqu
16 (%rdi), %xmm2
movdqu
32 (%rdi), %xmm3
movdqu
48 (%rdi), %xmm4
movdqu
64 (%rdi), %xmm5
movdqu
80 (%rdi), %xmm6
movdqu
96 (%rdi), %xmm7
movdqu
128 (%rdi), %xmm9
movdqu
144 (%rdi), %xmm10
movdqu
160 (%rdi), %xmm11
movdqu
176 (%rdi), %xmm12
movdqu
192 (%rdi), %xmm13
movdqu
208 (%rdi), %xmm14
movdqu
224 (%rdi), %xmm15
xorl %eax, %eax
1 : movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm8, %xmm1
movdqu
112 (%rdi), %xmm8
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm2, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdc,
0 xcf
/* aesenc %xmm7, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc8
/* aesenc %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xc9
/* aesenc %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xca
/* aesenc %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcb
/* aesenc %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcc
/* aesenc %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xcd
/* aesenc %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdc,
0 xce
/* aesenc %xmm14, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdd,
0 xcf
/* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addl $
16 , %eax
cmpl %eax, %r9d
jne
1 b
movdqu %xmm0, (%rdx)
2 : xor %eax, %eax
ret
.
size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_256,@function
.globl intel_aes_decrypt_cbc_256
.
align 16
intel_aes_decrypt_cbc_256:
// leaq IV_OFFSET(%rdi), %rdx
leaq
256 (%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu
224 (%rdi), %xmm15
xorl %eax, %eax
// cmpl $
8 *
16 , %r9d
cmpl $
128 , %r9d
jb
1 f
// leal -
8 *
16 (%r9), %r11d
leal -
128 (%r9), %r11d
2 : movdqu (%r8, %rax), %xmm3
movdqu
16 (%r8, %rax), %xmm4
movdqu
32 (%r8, %rax), %xmm5
movdqu
48 (%r8, %rax), %xmm6
movdqu
64 (%r8, %rax), %xmm7
movdqu
80 (%r8, %rax), %xmm8
movdqu
96 (%r8, %rax), %xmm9
movdqu
112 (%r8, %rax), %xmm10
pxor %xmm15, %xmm3
pxor %xmm15, %xmm4
pxor %xmm15, %xmm5
pxor %xmm15, %xmm6
pxor %xmm15, %xmm7
pxor %xmm15, %xmm8
pxor %xmm15, %xmm9
pxor %xmm15, %xmm10
// complete loop unrolling
movdqu
208 (%rdi), %xmm1
movdqu
192 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
176 (%rdi), %xmm1
movdqu
160 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
144 (%rdi), %xmm1
movdqu
128 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
112 (%rdi), %xmm1
movdqu
96 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
80 (%rdi), %xmm1
movdqu
64 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
48 (%rdi), %xmm1
movdqu
32 (%rdi), %xmm11
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xdb
/* aesdec %xmm11, %xmm3 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xe3
/* aesdec %xmm11, %xmm4 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xeb
/* aesdec %xmm11, %xmm5 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xf3
/* aesdec %xmm11, %xmm6 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xfb
/* aesdec %xmm11, %xmm7 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xc3
/* aesdec %xmm11, %xmm8 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm9 */
.byte
0 x66,
0 x45,
0 x0f,
0 x38,
0 xde,
0 xd3
/* aesdec %xmm11, %xmm10 */
movdqu
16 (%rdi), %xmm1
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xd9
/* aesdec %xmm1, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe1
/* aesdec %xmm1, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xe9
/* aesdec %xmm1, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf1
/* aesdec %xmm1, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xf9
/* aesdec %xmm1, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc1
/* aesdec %xmm1, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm1, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xde,
0 xd1
/* aesdec %xmm1, %xmm10 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xda
/* aesdeclast %xmm2, %xmm3 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xe2
/* aesdeclast %xmm2, %xmm4 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xea
/* aesdeclast %xmm2, %xmm5 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xf2
/* aesdeclast %xmm2, %xmm6 */
.byte
0 x66,
0 x0f,
0 x38,
0 xdf,
0 xfa
/* aesdeclast %xmm2, %xmm7 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xc2
/* aesdeclast %xmm2, %xmm8 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xca
/* aesdeclast %xmm2, %xmm9 */
.byte
0 x66,
0 x44,
0 x0f,
0 x38,
0 xdf,
0 xd2
/* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu
16 (%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu
32 (%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu
48 (%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu
64 (%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu
80 (%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu
96 (%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu
112 (%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4,
16 (%rsi, %rax)
movdqu %xmm5,
32 (%rsi, %rax)
movdqu %xmm6,
48 (%rsi, %rax)
movdqu %xmm7,
64 (%rsi, %rax)
movdqu %xmm8,
80 (%rsi, %rax)
movdqu %xmm9,
96 (%rsi, %rax)
movdqu %xmm10,
112 (%rsi, %rax)
// addl $
8 *
16 , %eax
addl $
128 , %eax
cmpl %r11d, %eax
jbe
2 b
1 : cmpl %eax, %r9d
je
5 f
movdqu
16 (%rdi), %xmm2
movdqu
32 (%rdi), %xmm3
movdqu
48 (%rdi), %xmm4
movdqu
64 (%rdi), %xmm5
movdqu
80 (%rdi), %xmm6
movdqu
96 (%rdi), %xmm7
movdqu
112 (%rdi), %xmm8
movdqu
128 (%rdi), %xmm9
movdqu
144 (%rdi), %xmm10
movdqu
160 (%rdi), %xmm11
movdqu
176 (%rdi), %xmm12
movdqu
192 (%rdi), %xmm13
movdqu
208 (%rdi), %xmm14
4 : movdqu (%r8, %rax), %xmm1
pxor %xmm15, %xmm1
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm14, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm13, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm12, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm11, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm10, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc9
/* aesdec %xmm9, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xde,
0 xc8
/* aesdec %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcf
/* aesdec %xmm7, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xce
/* aesdec %xmm6, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcd
/* aesdec %xmm5, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcc
/* aesdec %xmm4, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xcb
/* aesdec %xmm3, %xmm1 */
.byte
0 x66,
0 x0f,
0 x38,
0 xde,
0 xca
/* aesdec %xmm2, %xmm1 */
.byte
0 x66,
0 x41,
0 x0f,
0 x38,
0 xdf,
0 xc8
/* aesdeclast %xmm8, %xmm1 */
movdqu
112 (%rdi), %xmm8
pxor %xmm0, %xmm1
movdqu (%r8, %rax), %xmm0
/* fetch the IV before we store the block */
movdqu %xmm1, (%rsi, %rax)
/* in case input buf = output buf */
addl $
16 , %eax
cmpl %eax, %r9d
jne
4 b
5 : movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.
size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
Messung V0.5 in Prozent C=91 H=86 G=88
¤ Dauer der Verarbeitung: 0.49 Sekunden
(vorverarbeitet am 2026-06-06)
¤
*© Formatika GbR, Deutschland