Spracherkennung für: .rs vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]
// Implementation adapted from mbedtls.
use core::arch::{aarch64::*, asm};
use crate::consts::K64;
cpufeatures::new!(sha3_hwcap, "sha3");
pub fn compress(state: &mut [u64;
8], blocks: &[[u8;
128]]) {
// TODO: Replace with
https://github.com/rust-lang/rfcs/pull/2725
// after stabilization
if sha3_hwcap::get() {
unsafe { sha512_compress(state, blocks) }
} else {
super::soft::compress(state, blocks);
}
}
#[target_feature(enable = "sha3")]
unsafe fn sha512_compress(state: &mut [u64;
8], blocks: &[[u8;
128]]) {
// SAFETY: Requires the sha3 feature.
// Load state into vectors.
let mut ab = vld1q_u64(state[
0..
2].as_ptr());
let mut cd = vld1q_u64(state[
2..
4].as_ptr());
let mut ef = vld1q_u64(state[
4..
6].as_ptr());
let mut gh = vld1q_u64(state[
6..
8].as_ptr());
// Iterate through the message blocks.
for block in blocks {
// Keep original state values.
let ab_orig = ab;
let cd_orig = cd;
let ef_orig = ef;
let gh_orig = gh;
// Load the message block into vectors, assuming little endianness.
let mut s0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
0..
16].as_ptr())));
let mut s1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
16..
32].as_ptr())));
let mut s2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
32..
48].as_ptr())));
let mut s3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
48..
64].as_ptr())));
let mut s4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
64..
80].as_ptr())));
let mut s5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
80..
96].as_ptr())));
let mut s6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
96..
112].as_ptr())));
let mut s7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[
112..
128].as_ptr())));
// Rounds
0 and
1
let mut initial_sum = vaddq_u64(s0, vld1q_u64(&K64[
0]));
let mut sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), gh);
let mut intermed = vsha512hq_u64(sum, vextq_u64(ef, gh,
1), vextq_u64(cd, ef,
1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
// Rounds
2 and
3
initial_sum = vaddq_u64(s1, vld1q_u64(&K64[
2]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef,
1), vextq_u64(ab, cd,
1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
// Rounds
4 and
5
initial_sum = vaddq_u64(s2, vld1q_u64(&K64[
4]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd,
1), vextq_u64(gh, ab,
1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
// Rounds
6 and
7
initial_sum = vaddq_u64(s3, vld1q_u64(&K64[
6]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab,
1), vextq_u64(ef, gh,
1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
// Rounds
8 and
9
initial_sum = vaddq_u64(s4, vld1q_u64(&K64[
8]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh,
1), vextq_u64(cd, ef,
1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
// Rounds
10 and
11
initial_sum = vaddq_u64(s5, vld1q_u64(&K64[
10]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef,
1), vextq_u64(ab, cd,
1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
// Rounds
12 and
13
initial_sum = vaddq_u64(s6, vld1q_u64(&K64[
12]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd,
1), vextq_u64(gh, ab,
1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
// Rounds
14 and
15
initial_sum = vaddq_u64(s7, vld1q_u64(&K64[
14]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab,
1), vextq_u64(ef, gh,
1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
for t in (
16..
80).step_by(
16) {
// Rounds t and t +
1
s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5,
1));
initial_sum = vaddq_u64(s0, vld1q_u64(&K64[t]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh,
1), vextq_u64(cd, ef,
1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
// Rounds t +
2 and t +
3
s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6,
1));
initial_sum = vaddq_u64(s1, vld1q_u64(&K64[t +
2]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef,
1), vextq_u64(ab, cd,
1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
// Rounds t +
4 and t +
5
s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7,
1));
initial_sum = vaddq_u64(s2, vld1q_u64(&K64[t +
4]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd,
1), vextq_u64(gh, ab,
1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
// Rounds t +
6 and t +
7
s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0,
1));
initial_sum = vaddq_u64(s3, vld1q_u64(&K64[t +
6]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab,
1), vextq_u64(ef, gh,
1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
// Rounds t +
8 and t +
9
s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1,
1));
initial_sum = vaddq_u64(s4, vld1q_u64(&K64[t +
8]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh,
1), vextq_u64(cd, ef,
1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
// Rounds t +
10 and t +
11
s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2,
1));
initial_sum = vaddq_u64(s5, vld1q_u64(&K64[t +
10]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef,
1), vextq_u64(ab, cd,
1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
// Rounds t +
12 and t +
13
s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3,
1));
initial_sum = vaddq_u64(s6, vld1q_u64(&K64[t +
12]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd,
1), vextq_u64(gh, ab,
1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
// Rounds t +
14 and t +
15
s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4,
1));
initial_sum = vaddq_u64(s7, vld1q_u64(&K64[t +
14]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum,
1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab,
1), vextq_u64(ef, gh,
1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
}
// Add the block-specific state to the original state.
ab = vaddq_u64(ab, ab_orig);
cd = vaddq_u64(cd, cd_orig);
ef = vaddq_u64(ef, ef_orig);
gh = vaddq_u64(gh, gh_orig);
}
// Store vectors into state.
vst1q_u64(state[
0..
2].as_mut_ptr(), ab);
vst1q_u64(state[
2..
4].as_mut_ptr(), cd);
vst1q_u64(state[
4..
6].as_mut_ptr(), ef);
vst1q_u64(state[
6..
8].as_mut_ptr(), gh);
}
// TODO remove these polyfills once SHA3 intrinsics land
#[inline(always)]
unsafe fn vsha512hq_u64(
mut hash_ed: uint64x2_t,
hash_gf: uint64x2_t,
kwh_kwh2: uint64x2_t,
) -> uint64x2_t {
asm!(
"SHA512H {:q}, {:q}, {:v}.
2D",
inout(vreg) hash_ed, in(vreg) hash_gf, in(vreg) kwh_kwh2,
options(pure, nomem, nostack, preserves_flags)
);
hash_ed
}
#[inline(always)]
unsafe fn vsha512h2q_u64(
mut sum_ab: uint64x2_t,
hash_c_: uint64x2_t,
hash_ab: uint64x2_t,
) -> uint64x2_t {
asm!(
"SHA512H2 {:q}, {:q}, {:v}.
2D",
inout(vreg) sum_ab, in(vreg) hash_c_, in(vreg) hash_ab,
options(pure, nomem, nostack, preserves_flags)
);
sum_ab
}
#[inline(always)]
unsafe fn vsha512su0q_u64(mut w0_1: uint64x2_t, w2_: uint64x2_t) -> uint64x2_t {
asm!(
"SHA512SU0 {:v}.
2D, {:v}.
2D",
inout(vreg) w0_1, in(vreg) w2_,
options(pure, nomem, nostack, preserves_flags)
);
w0_1
}
#[inline(always)]
unsafe fn vsha512su1q_u64(
mut s01_s02: uint64x2_t,
w14_15: uint64x2_t,
w9_10: uint64x2_t,
) -> uint64x2_t {
asm!(
"SHA512SU1 {:v}.
2D, {:v}.
2D, {:v}.
2D",
inout(vreg) s01_s02, in(vreg) w14_15, in(vreg) w9_10,
options(pure, nomem, nostack, preserves_flags)
);
s01_s02
}