/* AesOpt.c -- Intel's AES
2017-06-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if (_MSC_VER > 1500 ) || (_MSC_FULL_VER >= 150030729 )
#define USE_INTEL_AES
#endif
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i m = *p;
for (; numBlocks != 0 ; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1 ) - 1 ;
const __m128i *w = p + 3 ;
m = _mm_xor_si128(m, *data);
m = _mm_xor_si128(m, p[2 ]);
do
{
m = _mm_aesenc_si128(m, w[0 ]);
m = _mm_aesenc_si128(m, w[1 ]);
w += 2 ;
}
while (--numRounds2 != 0 );
m = _mm_aesenc_si128(m, w[0 ]);
m = _mm_aesenclast_si128(m, w[1 ]);
*data = m;
}
*p = m;
}
#define NUM_WAYS 3
#define AES_OP_W(op, n) { \
const __m128i t = w[n]; \
m0 = op(m0, t); \
m1 = op(m1, t); \
m2 = op(m2, t); \
}
#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i iv = *p;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1 );
const __m128i *w = p + numRounds2 * 2 ;
__m128i m0, m1, m2;
{
const __m128i t = w[2 ];
m0 = _mm_xor_si128(t, data[0 ]);
m1 = _mm_xor_si128(t, data[1 ]);
m2 = _mm_xor_si128(t, data[2 ]);
}
numRounds2--;
do
{
AES_DEC(1 )
AES_DEC(0 )
w -= 2 ;
}
while (--numRounds2 != 0 );
AES_DEC(1 )
AES_DEC_LAST(0 )
{
__m128i t;
t = _mm_xor_si128(m0, iv); iv = data[0 ]; data[0 ] = t;
t = _mm_xor_si128(m1, iv); iv = data[1 ]; data[1 ] = t;
t = _mm_xor_si128(m2, iv); iv = data[2 ]; data[2 ] = t;
}
}
for (; numBlocks != 0 ; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1 );
const __m128i *w = p + numRounds2 * 2 ;
__m128i m = _mm_xor_si128(w[2 ], *data);
numRounds2--;
do
{
m = _mm_aesdec_si128(m, w[1 ]);
m = _mm_aesdec_si128(m, w[0 ]);
w -= 2 ;
}
while (--numRounds2 != 0 );
m = _mm_aesdec_si128(m, w[1 ]);
m = _mm_aesdeclast_si128(m, w[0 ]);
m = _mm_xor_si128(m, iv);
iv = *data;
*data = m;
}
*p = iv;
}
void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i ctr = *p;
__m128i one;
one.m128i_u64[0 ] = 1 ;
one.m128i_u64[1 ] = 0 ;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1 ) - 1 ;
const __m128i *w = p;
__m128i m0, m1, m2;
{
const __m128i t = w[2 ];
ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
}
w += 3 ;
do
{
AES_ENC(0 )
AES_ENC(1 )
w += 2 ;
}
while (--numRounds2 != 0 );
AES_ENC(0 )
AES_ENC_LAST(1 )
data[0 ] = _mm_xor_si128(data[0 ], m0);
data[1 ] = _mm_xor_si128(data[1 ], m1);
data[2 ] = _mm_xor_si128(data[2 ], m2);
}
for (; numBlocks != 0 ; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1 ) - 1 ;
const __m128i *w = p;
__m128i m;
ctr = _mm_add_epi64(ctr, one);
m = _mm_xor_si128(ctr, p[2 ]);
w += 3 ;
do
{
m = _mm_aesenc_si128(m, w[0 ]);
m = _mm_aesenc_si128(m, w[1 ]);
w += 2 ;
}
while (--numRounds2 != 0 );
m = _mm_aesenc_si128(m, w[0 ]);
m = _mm_aesenclast_si128(m, w[1 ]);
*data = _mm_xor_si128(*data, m);
}
*p = ctr;
}
#else
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Encode(p, data, numBlocks);
}
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Decode(p, data, numBlocks);
}
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCtr_Code(p, data, numBlocks);
}
#endif
Messung V0.5 in Prozent C=93 H=90 G=91
¤ Dauer der Verarbeitung: 0.9 Sekunden
(vorverarbeitet am 2026-06-06)
¤
*© Formatika GbR, Deutschland