Quelle aes-neon.S

Sprache: Sparc

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
*
* Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/

#include <linux/linkage.h>
#include <asm/assembler.h>

#define AES_FUNC_START(func)  SYM_FUNC_START(neon_ ## func)
#define AES_FUNC_END(func)  SYM_FUNC_END(neon_ ## func)

xtsmask  .req v7
cbciv  .req v7
vctr  .req v4

.macro  xts_reload_mask, tmp
xts_load_mask \tmp
.endm

/* special case for the neon-bs driver calling into this one for CTS */
.macro  xts_cts_skip_tw, reg, lbl
tbnz  \reg, #1, \lbl
.endm

/* multiply by polynomial 'x' in GF(2^8) */
.macro  mul_by_x, out, in, temp, const
sshr  \temp, \in, #7
shl  \out, \in, #1
and  \temp, \temp, \const
eor  \out, \out, \temp
.endm

/* multiply by polynomial 'x^2' in GF(2^8) */
.macro  mul_by_x2, out, in, temp, const
ushr  \temp, \in, #6
shl  \out, \in, #2
pmul  \temp, \temp, \const
eor  \out, \out, \temp
.endm

/* preload the entire Sbox */
.macro  prepare, sbox, shiftrows, temp
movi  v12.16b, #0x1b
ldr_l  q13, \shiftrows, \temp
ldr_l  q14, .Lror32by8, \temp
adr_l  \temp, \sbox
ld1  {v16.16b-v19.16b}, [\temp], #64
ld1  {v20.16b-v23.16b}, [\temp], #64
ld1  {v24.16b-v27.16b}, [\temp], #64
ld1  {v28.16b-v31.16b}, [\temp]
.endm

/* do preload for encryption */
.macro  enc_prepare, ignore0, ignore1, temp
prepare  crypto_aes_sbox, .LForward_ShiftRows, \temp
.endm

.macro  enc_switch_key, ignore0, ignore1, temp
/* do nothing */
.endm

/* do preload for decryption */
.macro  dec_prepare, ignore0, ignore1, temp
prepare  crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
.endm

/* apply SubBytes transformation using the preloaded Sbox */
.macro  sub_bytes, in
sub  v9.16b, \in\().16b, v15.16b
tbl  \in\().16b, {v16.16b-v19.16b}, \in\().16b
sub  v10.16b, v9.16b, v15.16b
tbx  \in\().16b, {v20.16b-v23.16b}, v9.16b
sub  v11.16b, v10.16b, v15.16b
tbx  \in\().16b, {v24.16b-v27.16b}, v10.16b
tbx  \in\().16b, {v28.16b-v31.16b}, v11.16b
.endm

/* apply MixColumns transformation */
.macro  mix_columns, in, enc
.if  \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
eor  \in\().16b, \in\().16b, v8.16b
rev32  v8.8h, v8.8h
eor  \in\().16b, \in\().16b, v8.16b
.endif

mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
rev32  v8.8h, \in\().8h
eor  v8.16b, v8.16b, v9.16b
eor  \in\().16b, \in\().16b, v8.16b
tbl  \in\().16b, {\in\().16b}, v14.16b
eor  \in\().16b, \in\().16b, v8.16b
.endm

.macro  do_block, enc, in, rounds, rk, rkp, i
ld1  {v15.4s}, [\rk]
add  \rkp, \rk, #16
mov  \i, \rounds
.La\@: eor  \in\().16b, \in\().16b, v15.16b  /* ^round key */
movi  v15.16b, #0x40
tbl  \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
sub  \i, \i, #1
ld1  {v15.4s}, [\rkp], #16
cbz  \i, .Lb\@
mix_columns \in, \enc
b  .La\@
.Lb\@: eor  \in\().16b, \in\().16b, v15.16b  /* ^round key */
.endm

.macro  encrypt_block, in, rounds, rk, rkp, i
do_block 1, \in, \rounds, \rk, \rkp, \i
.endm

.macro  decrypt_block, in, rounds, rk, rkp, i
do_block 0, \in, \rounds, \rk, \rkp, \i
.endm

/*
* Interleaved versions: functionally equivalent to the
* ones above, but applied to AES states in parallel.
*/

.macro  sub_bytes_4x, in0, in1, in2, in3
sub  v8.16b, \in0\().16b, v15.16b
tbl  \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
sub  v9.16b, \in1\().16b, v15.16b
tbl  \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub  v10.16b, \in2\().16b, v15.16b
tbl  \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
sub  v11.16b, \in3\().16b, v15.16b
tbl  \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
tbx  \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx  \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub  v8.16b, v8.16b, v15.16b
tbx  \in2\().16b, {v20.16b-v23.16b}, v10.16b
sub  v9.16b, v9.16b, v15.16b
tbx  \in3\().16b, {v20.16b-v23.16b}, v11.16b
sub  v10.16b, v10.16b, v15.16b
tbx  \in0\().16b, {v24.16b-v27.16b}, v8.16b
sub  v11.16b, v11.16b, v15.16b
tbx  \in1\().16b, {v24.16b-v27.16b}, v9.16b
sub  v8.16b, v8.16b, v15.16b
tbx  \in2\().16b, {v24.16b-v27.16b}, v10.16b
sub  v9.16b, v9.16b, v15.16b
tbx  \in3\().16b, {v24.16b-v27.16b}, v11.16b
sub  v10.16b, v10.16b, v15.16b
tbx  \in0\().16b, {v28.16b-v31.16b}, v8.16b
sub  v11.16b, v11.16b, v15.16b
tbx  \in1\().16b, {v28.16b-v31.16b}, v9.16b
tbx  \in2\().16b, {v28.16b-v31.16b}, v10.16b
tbx  \in3\().16b, {v28.16b-v31.16b}, v11.16b
.endm

.macro  mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
sshr  \tmp0\().16b, \in0\().16b, #7
shl  \out0\().16b, \in0\().16b, #1
sshr  \tmp1\().16b, \in1\().16b, #7
and  \tmp0\().16b, \tmp0\().16b, \const\().16b
shl  \out1\().16b, \in1\().16b, #1
and  \tmp1\().16b, \tmp1\().16b, \const\().16b
eor  \out0\().16b, \out0\().16b, \tmp0\().16b
eor  \out1\().16b, \out1\().16b, \tmp1\().16b
.endm

.macro  mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
ushr  \tmp0\().16b, \in0\().16b, #6
shl  \out0\().16b, \in0\().16b, #2
ushr  \tmp1\().16b, \in1\().16b, #6
pmul  \tmp0\().16b, \tmp0\().16b, \const\().16b
shl  \out1\().16b, \in1\().16b, #2
pmul  \tmp1\().16b, \tmp1\().16b, \const\().16b
eor  \out0\().16b, \out0\().16b, \tmp0\().16b
eor  \out1\().16b, \out1\().16b, \tmp1\().16b
.endm

.macro  mix_columns_2x, in0, in1, enc
.if  \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
eor  \in0\().16b, \in0\().16b, v8.16b
rev32  v8.8h, v8.8h
eor  \in1\().16b, \in1\().16b, v9.16b
rev32  v9.8h, v9.8h
eor  \in0\().16b, \in0\().16b, v8.16b
eor  \in1\().16b, \in1\().16b, v9.16b
.endif

mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
rev32  v10.8h, \in0\().8h
rev32  v11.8h, \in1\().8h
eor  v10.16b, v10.16b, v8.16b
eor  v11.16b, v11.16b, v9.16b
eor  \in0\().16b, \in0\().16b, v10.16b
eor  \in1\().16b, \in1\().16b, v11.16b
tbl  \in0\().16b, {\in0\().16b}, v14.16b
tbl  \in1\().16b, {\in1\().16b}, v14.16b
eor  \in0\().16b, \in0\().16b, v10.16b
eor  \in1\().16b, \in1\().16b, v11.16b
.endm

.macro  do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
ld1  {v15.4s}, [\rk]
add  \rkp, \rk, #16
mov  \i, \rounds
.La\@: eor  \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor  \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor  \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor  \in3\().16b, \in3\().16b, v15.16b /* ^round key */
movi  v15.16b, #0x40
tbl  \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl  \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl  \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl  \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
sub_bytes_4x \in0, \in1, \in2, \in3
sub  \i, \i, #1
ld1  {v15.4s}, [\rkp], #16
cbz  \i, .Lb\@
mix_columns_2x \in0, \in1, \enc
mix_columns_2x \in2, \in3, \enc
b  .La\@
.Lb\@: eor  \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor  \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor  \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor  \in3\().16b, \in3\().16b, v15.16b /* ^round key */
.endm

.macro  encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm

.macro  decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm

#include "aes-modes.S"

.section ".rodata", "a"
.align  4
.LForward_ShiftRows:
.octa  0x0b06010c07020d08030e09040f0a0500

.LReverse_ShiftRows:
.octa  0x0306090c0f0205080b0e0104070a0d00

.Lror32by8:
.octa  0x0c0f0e0d080b0a090407060500030201

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.8 Sekunden (vorverarbeitet am 2026-06-05) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.