/* crc32.c -- compute the CRC-32 of a data stream *Copyright(C)1995-2022MarkAdler *Forconditionsofdistributionanduse,seecopyrightnoticeinzlib.h * *ThisinterleavedimplementationofaCRCmakesuseofpipelinedmultiple *arithmetic-logicunits,commonlyfoundinmodernCPUcores.Itisdueto *KadatchandJenkins(2010).Seedoc/crc-doc.1.0.pdfinthisdistribution.
*/
/* DefineWandtheassociatedz_word_ttype.IfWisnotdefined,thena braidedcalculationisnotused,andtheassociatedtablesandcodearenot compiled.
*/ #ifdef Z_TESTW # if Z_TESTW-1 != -1 # define W Z_TESTW # endif #else # ifdef MAKECRCH # define W 8/* required for MAKECRCH */ # else # ifdefined(__x86_64__) || defined(__aarch64__) # define W 8 # else # define W 4 # endif # endif #endif #ifdef W # if W == 8 && defined(Z_U8) typedef Z_U8 z_word_t; # elif defined(Z_U4) # undef W # define W 4 typedef Z_U4 z_word_t; # else # undef W # endif #endif
/* If available, use the ARM processor CRC32 instruction. */ #ifdefined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 # define ARMCRC32 #endif
/* Local functions. */
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
local z_crc_t FAR crc_table[256];
local z_crc_t FAR x2n_table[32];
local void make_crc_table OF((void)); #ifdef W
local z_word_t FAR crc_big_table[256];
local z_crc_t FAR crc_braid_table[W][256];
local z_word_t FAR crc_braid_big_table[W][256];
local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); #endif #ifdef MAKECRCH
local void write_table OF((FILE *, const z_crc_t FAR *, int));
local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
local void write_table64 OF((FILE *, const z_word_t FAR *, int)); #endif/* MAKECRCH */
/* Definition of once functionality. */ typedefstruct once_s once_t;
local void once OF((once_t *, void (*)(void)));
/* Check for the availability of atomics. */ #ifdefined(__STDC__) && __STDC_VERSION__ >= 201112L && \
!defined(__STDC_NO_ATOMICS__)
#include <stdatomic.h>
/* Structure for once(), which must be initialized with ONCE_INIT. */ struct once_s {
atomic_flag begun;
atomic_int done;
}; #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
/* Runtheprovidedinit()functionexactlyonce,evenifmultiplethreads invokeonce()atthesametime.Thestatemustbeaonce_tinitializedwith ONCE_INIT.
*/
local void once(state, init)
once_t *state; void (*init)(void);
{ if (!atomic_load(&state->done)) { if (atomic_flag_test_and_set(&state->begun)) while (!atomic_load(&state->done))
; else {
init();
atomic_store(&state->done, 1);
}
}
}
#else/* no atomics */
/* Structure for once(), which must be initialized with ONCE_INIT. */ struct once_s { volatileint begun; volatileint done;
}; #define ONCE_INIT {0, 0}
/* Test and set. Alas, not atomic, but tries to minimize the period of
vulnerability. */
local int test_and_set OF((intvolatile *));
local int test_and_set(flag) intvolatile *flag;
{ int was;
was = *flag;
*flag = 1; return was;
}
/* Run the provided init() function once. This is not thread-safe. */
local void once(state, init)
once_t *state; void (*init)(void);
{ if (!state->done) { if (test_and_set(&state->begun)) while (!state->done)
; else {
init();
state->done = 1;
}
}
}
#endif
/* State for once(). */
local once_t made = ONCE_INIT;
local void make_crc_table()
{ unsigned i, j, n;
z_crc_t p;
/* initialize the CRC of bytes tables */ for (i = 0; i < 256; i++) {
p = i; for (j = 0; j < 8; j++)
p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
crc_table[i] = p; #ifdef W
crc_big_table[i] = byte_swap(p); #endif
}
/* initialize the x^2^n mod p(x) table */
p = (z_crc_t)1 << 30; /* x^1 */
x2n_table[0] = p; for (n = 1; n < 32; n++)
x2n_table[n] = p = multmodp(p, p);
#ifdef W /* initialize the braiding tables -- needs x2n_table[] */
braid(crc_braid_table, crc_braid_big_table, N, W); #endif
#ifdef MAKECRCH
{ /* Thecrc32.hheaderfilecontainstablesforboth32-bitand64-bit z_word_t's,andsorequiresa64-bittypebeavailable.Inthatcase, z_word_tmustbedefinedtobe64-bits.Thiscodethenalsogenerates andwritesoutthetablesforthecasethatz_word_tis32bits.
*/ #if !defined(W) || W != 8 # error Need a 64-bit integer type in order to generate crc32.h. #endif
FILE *out; int k, n;
z_crc_t ltl[8][256];
z_word_t big[8][256];
out = fopen("crc32.h", "w"); if (out == NULL) return;
/* write out little-endian CRC table to crc32.h */
fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n" " * Generated automatically by crc32.c\n */\n" "\n" "local const z_crc_t FAR crc_table[] = {\n" " ");
write_table(out, crc_table, 256);
fprintf(out, "};\n");
/* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
fprintf(out, "\n" "#ifdef W\n" "\n" "#if W == 8\n" "\n" "local const z_word_t FAR crc_big_table[] = {\n" " ");
write_table64(out, crc_big_table, 256);
fprintf(out, "};\n");
/* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
fprintf(out, "\n" "#else /* W == 4 */\n" "\n" "local const z_word_t FAR crc_big_table[] = {\n" " ");
write_table32hi(out, crc_big_table, 256);
fprintf(out, "};\n" "\n" "#endif\n");
/* write out braid tables for each value of N */ for (n = 1; n <= 6; n++) {
fprintf(out, "\n" "#if N == %d\n", n);
/* compute braid tables for this N and 64-bit word_t */
braid(ltl, big, n, 8);
/* write out braid tables for 64-bit z_word_t to crc32.h */
fprintf(out, "\n" "#if W == 8\n" "\n" "local const z_crc_t FAR crc_braid_table[][256] = {\n"); for (k = 0; k < 8; k++) {
fprintf(out, " {");
write_table(out, ltl[k], 256);
fprintf(out, "}%s", k < 7 ? ",\n" : "");
}
fprintf(out, "};\n" "\n" "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); for (k = 0; k < 8; k++) {
fprintf(out, " {");
write_table64(out, big[k], 256);
fprintf(out, "}%s", k < 7 ? ",\n" : "");
}
fprintf(out, "};\n");
/* compute braid tables for this N and 32-bit word_t */
braid(ltl, big, n, 4);
/* write out braid tables for 32-bit z_word_t to crc32.h */
fprintf(out, "\n" "#else /* W == 4 */\n" "\n" "local const z_crc_t FAR crc_braid_table[][256] = {\n"); for (k = 0; k < 4; k++) {
fprintf(out, " {");
write_table(out, ltl[k], 256);
fprintf(out, "}%s", k < 3 ? ",\n" : "");
}
fprintf(out, "};\n" "\n" "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); for (k = 0; k < 4; k++) {
fprintf(out, " {");
write_table32hi(out, big[k], 256);
fprintf(out, "}%s", k < 3 ? ",\n" : "");
}
fprintf(out, "};\n" "\n" "#endif\n" "\n" "#endif\n");
}
fprintf(out, "\n" "#endif\n");
/* Writethe32-bitvaluesintable[0..k-1]toout,fiveperlinein hexadecimalseparatedbycommas.
*/
local void write_table(out, table, k)
FILE *out; const z_crc_t FAR *table; int k;
{ int n;
for (n = 0; n < k; n++)
fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
(unsignedlong)(table[n]),
n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
}
/* Writethehigh32-bitsofeachvalueintable[0..k-1]toout,fiveperline inhexadecimalseparatedbycommas.
*/
local void write_table32hi(out, table, k)
FILE *out; const z_word_t FAR *table; int k;
{ int n;
for (n = 0; n < k; n++)
fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
(unsignedlong)(table[n] >> 32),
n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
}
/* Writethe64-bitvaluesintable[0..k-1]toout,threeperlinein hexadecimalseparatedbycommas.Thisassumesthatifthereisa64-bit type,thenthereisalsoalonglongintegertype,anditisatleast64 bits.Ifnot,thenthetypecastandformatstringcanbeadjusted accordingly.
*/
local void write_table64(out, table, k)
FILE *out; const z_word_t FAR *table; int k;
{ int n;
for (n = 0; n < k; n++)
fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
(unsignedlonglong)(table[n]),
n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
}
/* Actually do the deed. */ int main()
{
make_crc_table(); return0;
}
#endif/* MAKECRCH */
#ifdef W /* Generatethelittleandbig-endianbraidtablesforthegivennandz_word_t sizew.Eacharraymusthaveroomforwblocksof256elements.
*/
local void braid(ltl, big, n, w)
z_crc_t ltl[][256];
z_word_t big[][256]; int n; int w;
{ int k;
z_crc_t i, p, q; for (k = 0; k < w; k++) {
p = x2nmodp((n * w + 3 - k) << 3, 0);
ltl[k][0] = 0;
big[w - 1 - k][0] = 0; for (i = 1; i < 256; i++) {
ltl[k][i] = q = multmodp(i << 24, p);
big[w - 1 - k][i] = byte_swap(q);
}
}
} #endif
/* Returna(x)multipliedbyb(x)modulop(x),wherep(x)istheCRCpolynomial, reflected.Forspeed,thisrequiresthatanotbezero.
*/
local z_crc_t multmodp(a, b)
z_crc_t a;
z_crc_t b;
{
z_crc_t m, p;
m = (z_crc_t)1 << 31;
p = 0; for (;;) { if (a & m) {
p ^= b; if ((a & (m - 1)) == 0) break;
}
m >>= 1;
b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
} return p;
}
/* Constantsempiricallydeterminedtomaximizespeed.Thesevaluesarefrom measurementsonaCortex-A57.Yourmileagemayvary.
*/ #define Z_BATCH 3990/* number of words in a batch */ #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ #define Z_BATCH_MIN 800/* fewest words in a final batch */
/* Pre-condition the CRC */
crc = (~crc) & 0xffffffff;
/* Compute the CRC up to a word boundary. */ while (len && ((z_size_t)buf & 7) != 0) {
len--;
val = *buf++;
__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
}
/* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
word = (z_word_t const *)buf;
num = len >> 3;
len &= 7;
/* Do three interleaved CRCs to realize the throughput of one crc32x instructionpercycle.EachCRCiscalculatedonZ_BATCHwords.The
three CRCs are combined into a single CRC after each set of batches. */ while (num >= 3 * Z_BATCH) {
crc1 = 0;
crc2 = 0; for (i = 0; i < Z_BATCH; i++) {
val0 = word[i];
val1 = word[i + Z_BATCH];
val2 = word[i + 2 * Z_BATCH];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
}
word += 3 * Z_BATCH;
num -= 3 * Z_BATCH;
crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
}
/* Do one last smaller batch with the remaining words, if there are enough
to pay for the combination of CRCs. */
last = num / 3; if (last >= Z_BATCH_MIN) {
last2 = last << 1;
crc1 = 0;
crc2 = 0; for (i = 0; i < last; i++) {
val0 = word[i];
val1 = word[i + last];
val2 = word[i + last2];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
}
word += 3 * last;
num -= 3 * last;
val = x2nmodp(last, 6);
crc = multmodp(val, crc) ^ crc1;
crc = multmodp(val, crc) ^ crc2;
}
/* Compute the CRC on any remaining words. */ for (i = 0; i < num; i++) {
val0 = word[i];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
}
word += num;
/* Complete the CRC on any remaining bytes. */
buf = (constunsignedchar FAR *)word; while (len) {
len--;
val = *buf++;
__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
}
/* Pre-condition the CRC */
crc = (~crc) & 0xffffffff;
#ifdef W
/* If provided enough bytes, do a braided CRC calculation. */ if (len >= N * W + W - 1) {
z_size_t blks;
z_word_t const *words; unsigned endian; int k;
/* Compute the CRC up to a z_word_t boundary. */ while (len && ((z_size_t)buf & (W - 1)) != 0) {
len--;
crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
}
/* Compute the CRC on as many N z_word_t blocks as are available. */
blks = len / (N * W);
len -= blks * N * W;
words = (z_word_t const *)buf;
/* Do endian check at execution time instead of compile time, since ARM processorscanchangetheendianessatexecutiontime.Ifthe compilerknowswhattheendianesswillbe,itcanoptimizeoutthe
check and the unused branch. */
endian = 1; if (*(unsignedchar *)&endian) { /* Little endian. */
z_crc_t crc0;
z_word_t word0; #if N > 1
z_crc_t crc1;
z_word_t word1; #if N > 2
z_crc_t crc2;
z_word_t word2; #if N > 3
z_crc_t crc3;
z_word_t word3; #if N > 4
z_crc_t crc4;
z_word_t word4; #if N > 5
z_crc_t crc5;
z_word_t word5; #endif #endif #endif #endif #endif
/* Initialize the CRC for each braid. */
crc0 = crc; #if N > 1
crc1 = 0; #if N > 2
crc2 = 0; #if N > 3
crc3 = 0; #if N > 4
crc4 = 0; #if N > 5
crc5 = 0; #endif #endif #endif #endif #endif
/* Processthefirstblks-1blocks,computingtheCRCsoneachbraid independently.
*/ while (--blks) { /* Load the word for each braid into registers. */
word0 = crc0 ^ words[0]; #if N > 1
word1 = crc1 ^ words[1]; #if N > 2
word2 = crc2 ^ words[2]; #if N > 3
word3 = crc3 ^ words[3]; #if N > 4
word4 = crc4 ^ words[4]; #if N > 5
word5 = crc5 ^ words[5]; #endif #endif #endif #endif #endif
words += N;
/* Compute and update the CRC for each word. The loop should
get unrolled. */
crc0 = crc_braid_table[0][word0 & 0xff]; #if N > 1
crc1 = crc_braid_table[0][word1 & 0xff]; #if N > 2
crc2 = crc_braid_table[0][word2 & 0xff]; #if N > 3
crc3 = crc_braid_table[0][word3 & 0xff]; #if N > 4
crc4 = crc_braid_table[0][word4 & 0xff]; #if N > 5
crc5 = crc_braid_table[0][word5 & 0xff]; #endif #endif #endif #endif #endif for (k = 1; k < W; k++) {
crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; #if N > 1
crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; #if N > 2
crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; #if N > 3
crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; #if N > 4
crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; #if N > 5
crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; #endif #endif #endif #endif #endif
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.