/* * Use carryless multiply version of crc32c when buffer size is >= 512 to * account for FPU state save/restore overhead.
*/ #define CRC32C_PCLMUL_BREAKEVEN 512
if (!static_branch_likely(&have_crc32)) return crc32c_base(crc, p, len);
if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { /* * Long length, the vector registers are usable, and the CPU is * 64-bit and supports both CRC32 and PCLMULQDQ instructions. * It is worthwhile to divide the data into multiple streams, * CRC them independently, and combine them using PCLMULQDQ. * crc32c_x86_3way() does this using 3 streams, which is the * most that x86_64 CPUs have traditionally been capable of. * * However, due to improved VPCLMULQDQ performance on newer * CPUs, use crc32_lsb_vpclmul_avx512() instead of * crc32c_x86_3way() when the CPU supports VPCLMULQDQ and has a * "good" implementation of AVX-512. * * Future work: the optimal strategy on Zen 3--5 is actually to * use both crc32q and VPCLMULQDQ in parallel. Unfortunately, * different numbers of streams and vector lengths are optimal * on each CPU microarchitecture, making it challenging to take * advantage of this. (Zen 5 even supports 7 parallel crc32q, a * major upgrade.) For now, just choose between * crc32c_x86_3way() and crc32_lsb_vpclmul_avx512(). The latter * is needed anyway for crc32_le(), so we just reuse it here.
*/
kernel_fpu_begin(); if (static_branch_likely(&have_vpclmul_avx512))
crc = crc32_lsb_vpclmul_avx512(crc, p, len,
crc32_lsb_0x82f63b78_consts.fold_across_128_bits_consts); else
crc = crc32c_x86_3way(crc, p, len);
kernel_fpu_end(); return crc;
}
/* * Short length, XMM registers unusable, or the CPU is 32-bit; but the * CPU supports CRC32 instructions. Just issue a single stream of CRC32 * instructions inline. While this doesn't use the CPU's CRC32 * throughput very well, it avoids the need to combine streams. Stream * combination would be inefficient here.
*/
for (num_longs = len / sizeof(unsignedlong);
num_longs != 0; num_longs--, p += sizeof(unsignedlong)) asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsignedlong *)p));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.