// SPDX-License-Identifier: LGPL-2.1+ /* * Copyright 2016 Tom aan de Wiel * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper: * * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms, * R.D. Brown, 1977
*/
/* * Note: bit 0 of the header must always be 0. Otherwise it cannot * be guaranteed that the magic 8 byte sequence (see below) can * never occur in the rlc output.
*/ #define PFRAME_BIT BIT(15) #define DUPS_MASK 0x1ffe
/* * noinline_for_stack to work around * https://llvm.org/pr38809
*/ staticint noinline_for_stack
rlc(const s16 *in, __be16 *output, int blocktype)
{
s16 block[8 * 8];
s16 *wp = block; int i = 0; int x, y; int ret = 0;
/* read in block from framebuffer */ int lastzero_run = 0; int to_encode;
for (y = 0; y < 8; y++) { for (x = 0; x < 8; x++) {
*wp = in[x + y * 8];
wp++;
}
}
/* keep track of amount of trailing zeros */ for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
lastzero_run++;
i = 0; while (i < to_encode) { int cnt = 0; int tmp;
/* count leading zeros */ while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
cnt++;
i++; if (i == to_encode) {
cnt--; break;
}
} /* 4 bits for run, 12 for coefficient (quantization by 4) */
*output++ = htons((cnt | tmp << 4));
i++;
ret++;
} if (lastzero_run > 14) {
*output = htons(ALL_ZEROS | 0);
ret++;
}
return ret;
}
/* * This function will worst-case increase rlc_in by 65*2 bytes: * one s16 value for the header and 8 * 8 coefficients of type s16.
*/ static noinline_for_stack u16
derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
{ /* header */ const __be16 *input = *rlc_in;
u16 stat; int dec_count = 0;
s16 block[8 * 8 + 16];
s16 *wp = block; int i;
if (input > end_of_input) return OVERFLOW_BIT;
stat = ntohs(*input++);
/* * Now de-compress, it expands one byte to up to 15 bytes * (or fills the remainder of the 64 bytes with zeroes if it * is the last byte to expand). * * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to * allow for overflow if the incoming data was malformed.
*/ while (dec_count < 8 * 8) {
s16 in; int length; int coeff;
if (input > end_of_input) return OVERFLOW_BIT;
in = ntohs(*input++);
length = in & 0xf;
coeff = in >> 4;
/* fill remainder with zeros */ if (length == 15) { for (i = 0; i < 64 - dec_count; i++)
*wp++ = 0; break;
}
for (i = 0; i < length; i++)
*wp++ = 0;
*wp++ = coeff;
dec_count += length + 1;
}
wp = block;
for (i = 0; i < 64; i++) { int pos = zigzag[i]; int y = pos / 8; int x = pos % 8;
/* * Not the nicest way of doing it, but P-blocks get twice the range of * that of the I-blocks. Therefore we need a type bigger than 8 bits. * Furthermore values can be negative... This is just a version that * works with 16 signed data
*/ staticvoid noinline_for_stack
fwht16(const s16 *block, s16 *output_block, int stride, int intra)
{ /* we'll need more than 8 bits for the transformed coefficients */
s32 workspace1[8], workspace2[8]; const s16 *tmp = block;
s16 *out = output_block; int i;
for (i = 0; i < 8; i++, tmp += stride, out += 8) { /* stage 1 */
workspace1[0] = tmp[0] + tmp[1];
workspace1[1] = tmp[0] - tmp[1];
static noinline_for_stack void
ifwht(const s16 *block, s16 *output_block, int intra)
{ /* * we'll need more than 8 bits for the transformed coefficients * use native unit of cpu
*/ int workspace1[8], workspace2[8]; int inter = intra ? 0 : 1; const s16 *tmp = block;
s16 *out = output_block; int i;
for (i = 0; i < 8; i++, tmp += 8, out += 8) { /* stage 1 */
workspace1[0] = tmp[0] + tmp[1];
workspace1[1] = tmp[0] - tmp[1];
for (k = 0; k < 8; k++) { for (l = 0; l < 8; l++) {
*deltablock = *work - *reference;
deltablock++;
work++;
reference++;
}
}
deltablock -= 64;
vard = var_inter(old, tmp); return vari <= vard ? IBLOCK : PBLOCK;
}
staticvoid fill_decoder_block(u8 *dst, const s16 *input, int stride, unsignedint dst_step)
{ int i, j;
for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++, input++, dst += dst_step) { if (*input < 0)
*dst = 0; elseif (*input > 255)
*dst = 255; else
*dst = *input;
}
dst += stride - (8 * dst_step);
}
}
staticvoid add_deltas(s16 *deltas, const u8 *ref, int stride, unsignedint ref_step)
{ int k, l;
for (k = 0; k < 8; k++) { for (l = 0; l < 8; l++) {
*deltas += *ref;
ref += ref_step; /* * Due to quantizing, it might possible that the * decoded coefficients are slightly out of range
*/ if (*deltas < 0)
*deltas = 0; elseif (*deltas > 255)
*deltas = 255;
deltas++;
}
ref += stride - (8 * ref_step);
}
}
input = input_start; /* * The compressed stream should never contain the magic * header, so when we copy the YUV data we replace 0xff * by 0xfe. Since YUV is limited range such values * shouldn't appear anyway.
*/ for (j = 0; j < height; j++) { for (i = 0, p = input; i < width; i++, p += input_step)
*out++ = (*p == 0xff) ? 0xfe : *p;
input += stride;
}
*rlco = (__be16 *)out;
encoding &= ~FWHT_FRAME_PCODED;
} return encoding;
}
if (end_of_rlco_buf + 1 < *rlco + width * height / 2) returnfalse; for (i = 0; i < height; i++) {
memcpy(dst, *rlco, width);
dst += dst_stride;
*rlco += width / 2;
} returntrue;
}
/* * When decoding each macroblock the rlco pointer will be increased * by 65 * 2 bytes worst-case. * To avoid overflow the buffer has to be 65/64th of the actual raw * image size, just in case someone feeds it malicious data.
*/ for (j = 0; j < height / 8; j++) { for (i = 0; i < width / 8; i++) { const u8 *refp = ref + j * 8 * ref_stride +
i * 8 * ref_step;
u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.