// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses.
*/
/* ZSTD_compressSubBlock_literal() : * Compresses literals section for a sub-block. * When we have to write the Huffman table we will sometimes choose a header * size larger than necessary. This is because we have to pick the header size * before we know the table size + compressed size, so we have a bound on the * table size. If we guessed incorrectly, we fall back to uncompressed literals. * * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded * in writing the header, otherwise it is set to 0. * * hufMetadata->hType has literals block type info. * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block * and the following sub-blocks' literals sections will be Treeless_Literals_Block. * @return : compressed size of literals section of a sub-block * Or 0 if unable to compress.
* Or error code */ static size_t
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, const ZSTD_hufCTablesMetadata_t* hufMetadata, const BYTE* literals, size_t litSize, void* dst, size_t dstSize, constint bmi2, int writeEntropy, int* entropyWritten)
{
size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart + lhSize;
U32 const singleStream = lhSize == 3;
SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0;
/* ZSTD_compressSubBlock_sequences() : * Compresses sequences section for a sub-block. * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have * symbol compression modes for the super-block. * The first successfully compressed block will have these in its header. * We set entropyWritten=1 when we succeed in compressing the sequences. * The following sub-blocks will always have repeat mode. * @return : compressed size of sequences section of a sub-block * Or 0 if it is unable to compress
* Or error code. */ static size_t
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, const ZSTD_fseCTablesMetadata_t* fseMetadata, const SeqDef* sequences, size_t nbSeq, const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, constint bmi2, int writeEntropy, int* entropyWritten)
{ constint longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart;
BYTE* seqHead;
{ size_t const bitstreamSize = ZSTD_encodeSequences(
op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode,
sequences, nbSeq,
longOffsets, bmi2);
FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
op += bitstreamSize; /* zstd versions <= 1.3.4 mistakenly report corruption when * FSE_readNCount() receives a buffer < 4 bytes. * Fixed by https://github.com/facebook/zstd/pull/1146. * This can happen when the last set_compressed table present is 2 * bytes and the bitstream is only one byte. * In this exceedingly rare case, we will simply emit an uncompressed * block, since it isn't worth optimizing.
*/ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
assert(fseMetadata->lastCountSize + bitstreamSize == 3);
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return0;
} #endif
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
}
/* zstd versions <= 1.4.0 mistakenly report error when * sequences section body size is less than 3 bytes. * Fixed by https://github.com/facebook/zstd/pull/1664. * This can happen when the previous sequences section block is compressed * with rle mode and the current block's sequences section is compressed * with repeat mode where sequences section body size can be 1 byte.
*/ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (op-seqHead < 4) {
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " "an uncompressed block when sequences are < 4 bytes"); return0;
} #endif
/* first sequence => at least one sequence*/
budget += sp[0].litLength * avgLitCost + avgSeqCost; if (budget > targetBudget) return1;
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
/* loop over sequences */ for (n=1; n<nbSeqs; n++) {
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
budget += currentCost;
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH); /* stop when sub-block budget is reached */ if ( (budget > targetBudget) /* though continue to expand until the sub-block is deemed compressible */
&& (budget < inSize * BYTESCALE) ) break;
}
return n;
}
/* ZSTD_compressSubBlock_multi() : * Breaks super-block into multiple sub-blocks and compresses them. * Entropy will be written into the first block. * The following blocks use repeat_mode to compress. * Sub-blocks are all compressed, except the last one when beneficial. * @return : compressed size of the super block (which features multiple ZSTD blocks)
* or 0 if it failed to compress. */ static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr, const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock, const ZSTD_entropyCTablesMetadata_t* entropyMetadata, const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, constvoid* src, size_t srcSize, constint bmi2, U32 lastBlock, void* workspace, size_t wkspSize)
{ const SeqDef* const sstart = seqStorePtr->sequencesStart; const SeqDef* const send = seqStorePtr->sequences; const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
size_t const nbSeqs = (size_t)(send - sstart); const BYTE* const lstart = seqStorePtr->litStart; const BYTE* const lend = seqStorePtr->lit; const BYTE* lp = lstart;
size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart; const BYTE* llCodePtr = seqStorePtr->llCode; const BYTE* mlCodePtr = seqStorePtr->mlCode; const BYTE* ofCodePtr = seqStorePtr->ofCode;
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); int writeSeqEntropy = 1;
/* update pointers, the nb of literals borrowed from next sequence must be preserved */ if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount; /* Entropy only needs to be written once */ if (litEntropyWritten) {
writeLitEntropy = 0;
} if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
}
}
if (writeLitEntropy) {
DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
} if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { /* If we haven't written our entropy tables, then we've violated our contract and * must emit an uncompressed block.
*/
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block"); return0;
}
if (ip < iend) { /* some data left : last part of the block sent uncompressed */
size_t const rSize = (size_t)((iend - ip));
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0);
op += cSize; /* We have to regenerate the repcodes because we've skipped some sequences */ if (sp < send) { const SeqDef* seq;
Repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); for (seq = sstart; seq < sp; ++seq) {
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
}
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
}
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
(unsigned)(op-ostart)); return (size_t)(op-ostart);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.