/* Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined.
**/ static uint32_t
_utf8_get_char (constunsignedchar *p)
{ int i, mask = 0, len;
uint32_t result; unsignedchar c = (unsignedchar) *p;
UTF8_COMPUTE (c, mask, len); if (len == -1) return (uint32_t)-1;
UTF8_GET (result, p, i, mask, len);
return result;
}
/* Like _utf8_get_char, but take a maximum length * and return (uint32_t)-2 on incomplete trailing character
*/ static uint32_t
_utf8_get_char_extended (constunsignedchar *p, long max_len)
{ int i, len;
uint32_t wc = (unsignedchar) *p;
if (max_len >= 0 && len > max_len) { for (i = 1; i < max_len; i++) { if ((((unsignedchar *)p)[i] & 0xc0) != 0x80) return (uint32_t)-1;
} return (uint32_t)-2;
}
for (i = 1; i < len; ++i) {
uint32_t ch = ((unsignedchar *)p)[i];
if ((ch & 0xc0) != 0x80) { if (ch) return (uint32_t)-1; else return (uint32_t)-2;
}
wc <<= 6;
wc |= (ch & 0x3f);
}
if (UTF8_LENGTH(wc) != len) return (uint32_t)-1;
return wc;
}
/** * _cairo_utf8_get_char_validated: * @p: a UTF-8 string * @unicode: location to store one Unicode character * * Decodes the first character of a valid UTF-8 string, and returns * the number of bytes consumed. * * Note that the string should be valid. Do not use this without * validating the string first. * * Returns: the number of bytes forming the character returned.
**/ int
_cairo_utf8_get_char_validated (constchar *p,
uint32_t *unicode)
{ int i, mask = 0, len;
uint32_t result; unsignedchar c = (unsignedchar) *p;
UTF8_COMPUTE (c, mask, len); if (len == -1) { if (unicode)
*unicode = (uint32_t)-1; return1;
}
UTF8_GET (result, p, i, mask, len);
if (unicode)
*unicode = result; return len;
}
/** * _cairo_utf8_to_ucs4: * @str: an UTF-8 string * @len: length of @str in bytes, or -1 if it is nul-terminated. * If @len is supplied and the string has an embedded nul * byte, only the portion before the nul byte is converted. * @result: location to store a pointer to a newly allocated UTF-32 * string (always native endian), or %NULL. Free with free(). A 0 * word will be written after the last character. * @items_written: location to store number of 32-bit words * written. (Not including the trailing 0) * * Converts a UTF-8 string to UCS-4. UCS-4 is an encoding of Unicode * with 1 32-bit word per character. The string is validated to * consist entirely of valid Unicode characters. * * Return value: %CAIRO_STATUS_SUCCESS if the entire string was * successfully converted. %CAIRO_STATUS_INVALID_STRING if an * invalid sequence was found.
**/
cairo_status_t
_cairo_utf8_to_ucs4 (constchar *str, int len,
uint32_t **result, int *items_written)
{
uint32_t *str32 = NULL; int n_chars, i; constunsignedchar *in; constunsignedchar * const ustr = (constunsignedchar *) str;
in = ustr;
n_chars = 0; while ((len < 0 || ustr + len - in > 0) && *in)
{
uint32_t wc = _utf8_get_char_extended (in, ustr + len - in); if (wc & 0x80000000 || !UNICODE_VALID (wc)) return _cairo_error (CAIRO_STATUS_INVALID_STRING);
n_chars++; if (n_chars == INT_MAX) return _cairo_error (CAIRO_STATUS_INVALID_STRING);
in = UTF8_NEXT_CHAR (in);
}
if (result) {
str32 = _cairo_malloc_ab (n_chars + 1, sizeof (uint32_t)); if (!str32) return _cairo_error (CAIRO_STATUS_NO_MEMORY);
in = ustr; for (i=0; i < n_chars; i++) {
str32[i] = _utf8_get_char (in);
in = UTF8_NEXT_CHAR (in);
}
str32[i] = 0;
*result = str32;
}
if (items_written)
*items_written = n_chars;
return CAIRO_STATUS_SUCCESS;
}
/** * _cairo_ucs4_to_utf8: * @unicode: a UCS-4 character * @utf8: buffer to write utf8 string into. Must have at least 4 bytes * space available. Or %NULL. * * This space left intentionally blank. * * Return value: Number of bytes in the utf8 string or 0 if an invalid * unicode character
**/ int
_cairo_ucs4_to_utf8 (uint32_t unicode, char *utf8)
{ int bytes; char *p;
/** * _cairo_ucs4_to_utf16: * @unicode: a UCS-4 character * @utf16: buffer to write utf16 string into. Must have at least 2 * elements. Or %NULL. * * This space left intentionally blank. * * Return value: Number of elements in the utf16 string or 0 if an * invalid unicode character
**/ int
_cairo_ucs4_to_utf16 (uint32_t unicode,
uint16_t *utf16)
{ if (unicode < 0x10000) { if (utf16)
utf16[0] = unicode; return1;
} elseif (unicode < 0x110000) { if (utf16) {
utf16[0] = (unicode - 0x10000) / 0x400 + 0xd800;
utf16[1] = (unicode - 0x10000) % 0x400 + 0xdc00;
} return2;
} else { return0;
}
}
#if CAIRO_HAS_UTF8_TO_UTF16 /** * _cairo_utf8_to_utf16: * @str: an UTF-8 string * @len: length of @str in bytes, or -1 if it is nul-terminated. * If @len is supplied and the string has an embedded nul * byte, only the portion before the nul byte is converted. * @result: location to store a pointer to a newly allocated UTF-16 * string (always native endian). Free with free(). A 0 * word will be written after the last character. * @items_written: location to store number of 16-bit words * written. (Not including the trailing 0) * * Converts a UTF-8 string to UTF-16. UTF-16 is an encoding of Unicode * where characters are represented either as a single 16-bit word, or * as a pair of 16-bit "surrogates". The string is validated to * consist entirely of valid Unicode characters. * * Return value: %CAIRO_STATUS_SUCCESS if the entire string was * successfully converted. %CAIRO_STATUS_INVALID_STRING if an * an invalid sequence was found.
**/
cairo_status_t
_cairo_utf8_to_utf16 (constchar *str, int len,
uint16_t **result, int *items_written)
{
uint16_t *str16 = NULL; int n16, i; constunsignedchar *in; constunsignedchar * const ustr = (constunsignedchar *) str;
in = ustr;
n16 = 0; while ((len < 0 || ustr + len - in > 0) && *in) {
uint32_t wc = _utf8_get_char_extended (in, ustr + len - in); if (wc & 0x80000000 || !UNICODE_VALID (wc)) return _cairo_error (CAIRO_STATUS_INVALID_STRING);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.