// Copyright 2017 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.
// These are used for the leave_nulls_escaped argument to CUnescapeInternal().
constexpr bool kUnescapeNulls = false;
inlinebool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
inlineunsignedint hex_digit_to_int(char c) {
static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, "Character set must be ASCII.");
assert(absl::ascii_isxdigit(static_cast<unsignedchar>(c))); unsignedint x = static_cast<unsignedchar>(c); if (x > '9') {
x += 9;
} return x & 0xf;
}
inlinebool IsSurrogate(char32_t c, absl::string_view src,
absl::Nullable<std::string*> error) { if (c >= 0xD800 && c <= 0xDFFF) { if (error) {
*error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
src);
} returntrue;
} returnfalse;
}
// ---------------------------------------------------------------------- // CUnescapeInternal() // Implements both CUnescape() and CUnescapeForNullTerminatedString(). // // Unescapes C escape sequences and is the reverse of CEscape(). // // If 'source' is valid, stores the unescaped string and its size in // 'dest' and 'dest_len' respectively, and returns true. Otherwise // returns false and optionally stores the error description in // 'error'. Set 'error' to nullptr to disable error reporting. // // 'dest' should point to a buffer that is at least as big as 'source'. // 'source' and 'dest' may be the same. // // NOTE: any changes to this function must also be reflected in the older // UnescapeCEscapeSequences(). // ---------------------------------------------------------------------- bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
absl::Nonnull<char*> dest,
absl::Nonnull<ptrdiff_t*> dest_len,
absl::Nullable<std::string*> error) { char* d = dest; constchar* p = source.data(); constchar* end = p + source.size(); constchar* last_byte = end - 1;
// Small optimization for case where source = dest and there's no escaping while (p == d && p < end && *p != '\\') p++, d++;
while (p < end) { if (*p != '\\') {
*d++ = *p++;
} else { if (++p > last_byte) { // skip past the '\\' if (error) *error = "String cannot end with \\"; returnfalse;
} switch (*p) { case'a': *d++ = '\a'; break; case'b': *d++ = '\b'; break; case'f': *d++ = '\f'; break; case'n': *d++ = '\n'; break; case'r': *d++ = '\r'; break; case't': *d++ = '\t'; break; case'v': *d++ = '\v'; break; case'\\': *d++ = '\\'; break; case'?': *d++ = '\?'; break; // \? Who knew? case'\'': *d++ = '\''; break; case'"': *d++ = '\"'; break; case'0': case'1': case'2': case'3': case'4': case'5': case'6': case'7': { // octal digit: 1 to 3 digits constchar* octal_start = p; unsignedint ch = static_cast<unsignedint>(*p - '0'); // digit 1 if (p < last_byte && is_octal_digit(p[1]))
ch = ch * 8 + static_cast<unsignedint>(*++p - '0'); // digit 2 if (p < last_byte && is_octal_digit(p[1]))
ch = ch * 8 + static_cast<unsignedint>(*++p - '0'); // digit 3 if (ch > 0xff) { if (error) {
*error = "Value of \\" +
std::string(octal_start, static_cast<size_t>(p + 1 - octal_start)) + " exceeds 0xff";
} returnfalse;
} if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character const size_t octal_size = static_cast<size_t>(p + 1 - octal_start);
*d++ = '\\';
memmove(d, octal_start, octal_size);
d += octal_size; break;
}
*d++ = static_cast<char>(ch); break;
} case'x': case'X': { if (p >= last_byte) { if (error) *error = "String cannot end with \\x"; returnfalse;
} elseif (!absl::ascii_isxdigit(static_cast<unsignedchar>(p[1]))) { if (error) *error = "\\x cannot be followed by a non-hex digit"; returnfalse;
} unsignedint ch = 0; constchar* hex_start = p; while (p < last_byte &&
absl::ascii_isxdigit(static_cast<unsignedchar>(p[1]))) // Arbitrarily many hex digits
ch = (ch << 4) + hex_digit_to_int(*++p); if (ch > 0xFF) { if (error) {
*error = "Value of \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start)) + " exceeds 0xff";
} returnfalse;
} if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character const size_t hex_size = static_cast<size_t>(p + 1 - hex_start);
*d++ = '\\';
memmove(d, hex_start, hex_size);
d += hex_size; break;
}
*d++ = static_cast<char>(ch); break;
} case'u': { // \uhhhh => convert 4 hex digits to UTF-8
char32_t rune = 0; constchar* hex_start = p; if (p + 4 >= end) { if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start));
} returnfalse;
} for (int i = 0; i < 4; ++i) { // Look one char ahead. if (absl::ascii_isxdigit(static_cast<unsignedchar>(p[1]))) {
rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
} else { if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start));
} returnfalse;
}
} if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character
*d++ = '\\';
memmove(d, hex_start, 5); // u0000
d += 5; break;
} if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) { returnfalse;
}
d += strings_internal::EncodeUTF8Char(d, rune); break;
} case'U': { // \Uhhhhhhhh => convert 8 hex digits to UTF-8
char32_t rune = 0; constchar* hex_start = p; if (p + 8 >= end) { if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start));
} returnfalse;
} for (int i = 0; i < 8; ++i) { // Look one char ahead. if (absl::ascii_isxdigit(static_cast<unsignedchar>(p[1]))) { // Don't change rune until we're sure this // is within the Unicode limit, but do advance p.
uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); if (newrune > 0x10FFFF) { if (error) {
*error = "Value of \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start)) + " exceeds Unicode limit (0x10FFFF)";
} returnfalse;
} else {
rune = newrune;
}
} else { if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
std::string(hex_start, static_cast<size_t>(p + 1 - hex_start));
} returnfalse;
}
} if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character
*d++ = '\\';
memmove(d, hex_start, 9); // U00000000
d += 9; break;
} if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) { returnfalse;
}
d += strings_internal::EncodeUTF8Char(d, rune); break;
} default: { if (error) *error = std::string("Unknown escape sequence: \\") + *p; returnfalse;
}
}
p++; // read past letter we escaped
}
}
*dest_len = d - dest; returntrue;
}
// ---------------------------------------------------------------------- // CUnescapeInternal() // // Same as above but uses a std::string for output. 'source' and 'dest' // may be the same. // ---------------------------------------------------------------------- bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
absl::Nonnull<std::string*> dest,
absl::Nullable<std::string*> error) {
strings_internal::STLStringResizeUninitialized(dest, source.size());
// ---------------------------------------------------------------------- // CEscape() // CHexEscape() // Utf8SafeCEscape() // Utf8SafeCHexEscape() // Escapes 'src' using C-style escape sequences. This is useful for // preparing query flags. The 'Hex' version uses hexadecimal rather than // octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. // // Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). // ----------------------------------------------------------------------
std::string CEscapeInternal(absl::string_view src, bool use_hex, bool utf8_safe) {
std::string dest; bool last_hex_escape = false; // true if last output char was \xNN.
for (char c : src) { bool is_hex_escape = false; switch (c) { case'\n': dest.append("\\""n"); break; case'\r': dest.append("\\""r"); break; case'\t': dest.append("\\""t"); break; case'\"': dest.append("\\""\""); break; case'\'': dest.append("\\" "'"); break; case'\\': dest.append("\\""\\"); break; default: { // Note that if we emit \xNN and the src character after that is a hex // digit then that digit must be escaped too to prevent it being // interpreted as part of the character code by C. constunsignedchar uc = static_cast<unsignedchar>(c); if ((!utf8_safe || uc < 0x80) &&
(!absl::ascii_isprint(uc) ||
(last_hex_escape && absl::ascii_isxdigit(uc)))) { if (use_hex) {
dest.append("\\""x");
dest.push_back(numbers_internal::kHexChar[uc / 16]);
dest.push_back(numbers_internal::kHexChar[uc % 16]);
is_hex_escape = true;
} else {
dest.append("\\");
dest.push_back(numbers_internal::kHexChar[uc / 64]);
dest.push_back(numbers_internal::kHexChar[(uc % 64) / 8]);
dest.push_back(numbers_internal::kHexChar[uc % 8]);
}
} else {
dest.push_back(c); break;
}
}
}
last_hex_escape = is_hex_escape;
}
// Calculates the length of the C-style escaped version of 'src'. // Assumes that non-printable characters are escaped using octal sequences, and // that UTF-8 bytes are not handled specially. inline size_t CEscapedLength(absl::string_view src) {
size_t escaped_len = 0; // The maximum value of kCEscapedLen[x] is 4, so we can escape any string of // length size_t_max/4 without checking for overflow.
size_t unchecked_limit =
std::min<size_t>(src.size(), std::numeric_limits<size_t>::max() / 4);
size_t i = 0; while (i < unchecked_limit) { // Common case: No need to check for overflow.
escaped_len += kCEscapedLen[static_cast<unsignedchar>(src[i++])];
} while (i < src.size()) { // Beyond unchecked_limit we need to check for overflow before adding.
size_t char_len = kCEscapedLen[static_cast<unsignedchar>(src[i++])];
ABSL_INTERNAL_CHECK(
escaped_len <= std::numeric_limits<size_t>::max() - char_len, "escaped_len overflow");
escaped_len += char_len;
} return escaped_len;
}
// Reverses the mapping in Base64EscapeInternal; see that method's // documentation for details of the mapping. bool Base64UnescapeInternal(absl::Nullable<constchar*> src_param, size_t szsrc,
absl::Nullable<char*> dest, size_t szdest,
absl::Nonnull<constsignedchar*> unbase64,
absl::Nonnull<size_t*> len) { staticconstchar kPad64Equals = '='; staticconstchar kPad64Dot = '.';
size_t destidx = 0; int decode = 0; int state = 0; unsignedchar ch = 0; unsignedint temp = 0;
// If "char" is signed by default, using *src as an array index results in // accessing negative array elements. Treat the input as a pointer to // unsigned char to avoid this. constunsignedchar* src = reinterpret_cast<constunsignedchar*>(src_param);
// The GET_INPUT macro gets the next input character, skipping // over any whitespace, and stopping when we reach the end of the // string or when we read any non-data character. The arguments are // an arbitrary identifier (used as a label for goto) and the number // of data bytes that must remain in the input to avoid aborting the // loop. #define GET_INPUT(label, remain) \
label: \
--szsrc; \
ch = *src++; \
decode = unbase64[ch]; \ if (decode < 0) { \ if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
state = 4 - remain; \ break; \
}
// if dest is null, we're just checking to see if it's legal input // rather than producing output. (I suspect this could just be done // with a regexp...). We duplicate the loop so this test can be // outside it instead of in every iteration.
if (dest) { // This loop consumes 4 input bytes and produces 3 output bytes // per iteration. We can't know at the start that there is enough // data left in the string for a full iteration, so the loop may // break out in the middle; if so 'state' will be set to the // number of input bytes read.
while (szsrc >= 4) { // We'll start by optimistically assuming that the next four // bytes of the string (src[0..3]) are four good data bytes // (that is, no nulls, whitespace, padding chars, or illegal // chars). We need to test src[0..2] for nulls individually // before constructing temp to preserve the property that we // never read past a null in the string (no matter how long // szsrc claims the string is).
if (!src[0] || !src[1] || !src[2] ||
((temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) & 0x80000000)) { // Iff any of those four characters was bad (null, illegal, // whitespace, padding), then temp's high bit will be set // (because unbase64[] is -1 for all bad characters). // // We'll back up and resort to the slower decoder, which knows // how to handle those cases.
GET_INPUT(first, 4);
temp = static_cast<unsignedchar>(decode);
GET_INPUT(second, 3);
temp = (temp << 6) | static_cast<unsignedchar>(decode);
GET_INPUT(third, 2);
temp = (temp << 6) | static_cast<unsignedchar>(decode);
GET_INPUT(fourth, 1);
temp = (temp << 6) | static_cast<unsignedchar>(decode);
} else { // We really did have four good data bytes, so advance four // characters in the string.
szsrc -= 4;
src += 4;
}
// temp has 24 bits of input, so write that out as three bytes.
// if the loop terminated because we read a bad character, return // now. if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
!absl::ascii_isspace(ch)) returnfalse;
if (ch == kPad64Equals || ch == kPad64Dot) { // if we stopped by hitting an '=' or '.', un-read that character -- we'll // look at it again when we count to check for the proper number of // equals signs at the end.
++szsrc;
--src;
} else { // This loop consumes 1 input byte per iteration. It's used to // clean up the 0-3 input bytes remaining when the first, faster // loop finishes. 'temp' contains the data from 'state' input // characters read by the first loop. while (szsrc > 0) {
--szsrc;
ch = *src++;
decode = unbase64[ch]; if (decode < 0) { if (absl::ascii_isspace(ch)) { continue;
} elseif (ch == kPad64Equals || ch == kPad64Dot) { // back up one character; we'll read it again when we check // for the correct number of pad characters at the end.
++szsrc;
--src; break;
} else { returnfalse;
}
}
// Each input character gives us six bits of output.
temp = (temp << 6) | static_cast<unsignedchar>(decode);
++state; if (state == 4) { // If we've accumulated 24 bits of output, write that out as // three bytes. if (dest) { if (destidx + 3 > szdest) returnfalse;
dest[destidx + 2] = static_cast<char>(temp);
temp >>= 8;
dest[destidx + 1] = static_cast<char>(temp);
temp >>= 8;
dest[destidx] = static_cast<char>(temp);
}
destidx += 3;
state = 0;
temp = 0;
}
}
}
// Process the leftover data contained in 'temp' at the end of the input. int expected_equals = 0; switch (state) { case0: // Nothing left over; output is a multiple of 3 bytes. break;
case1: // Bad input; we have 6 bits left over. returnfalse;
case2: // Produce one more output byte from the 12 input bits we have left. if (dest) { if (destidx + 1 > szdest) returnfalse;
temp >>= 4;
dest[destidx] = static_cast<char>(temp);
}
++destidx;
expected_equals = 2; break;
case3: // Produce two more output bytes from the 18 input bits we have left. if (dest) { if (destidx + 2 > szdest) returnfalse;
temp >>= 2;
dest[destidx + 1] = static_cast<char>(temp);
temp >>= 8;
dest[destidx] = static_cast<char>(temp);
}
destidx += 2;
expected_equals = 1; break;
default: // state should have no other values at this point.
ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
state);
}
// The remainder of the string should be all whitespace, mixed with // exactly 0 equals signs, or exactly 'expected_equals' equals // signs. (Always accepting 0 equals signs is an Abseil extension // not covered in the RFC, as is accepting dot as the pad character.)
int equals = 0; while (szsrc > 0) { if (*src == kPad64Equals || *src == kPad64Dot)
++equals; elseif (!absl::ascii_isspace(*src)) returnfalse;
--szsrc;
++src;
}
constbool ok = (equals == 0 || equals == expected_equals); if (ok) *len = destidx; return ok;
}
// We are getting the destination buffer by getting the beginning of the // string and converting it into a char *.
size_t len; constbool ok =
Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); if (!ok) {
dest->clear(); returnfalse;
}
// could be shorter if there was padding
assert(len <= dest_len);
dest->erase(len);
// This is a templated function so that T can be either a char* // or a string. This works because we use the [] operator to access // individual characters at a time. template <typename T> void HexStringToBytesInternal(absl::Nullable<constchar*> from, T to,
size_t num) { for (size_t i = 0; i < num; i++) {
to[i] = static_cast<char>(kHexValueLenient[from[i * 2] & 0xFF] << 4) +
(kHexValueLenient[from[i * 2 + 1] & 0xFF]);
}
}
// This is a templated function so that T can be either a char* or a // std::string. template <typename T> void BytesToHexStringInternal(absl::Nullable<constunsignedchar*> src, T dest,
size_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { constchar* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
std::copy(hex_p, hex_p + 2, dest_ptr);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.