// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: patternprops.cpp
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar13
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "patternprops.h"
U_NAMESPACE_BEGIN
/*
* One byte per Latin-1 character.
* Bit 0 is set if either Pattern property is true,
* bit 1 if Pattern_Syntax is true,
* bit 2 if Pattern_White_Space is true.
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
*/
static const uint8_t latin1[
256 ]={
// WS: 9..D
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
5 ,
5 ,
5 ,
5 ,
5 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// WS: 20 Syntax: 21..2F
5 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
// Syntax: 3A..40
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// Syntax: 5B..5E
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
3 ,
3 ,
3 ,
3 ,
0 ,
// Syntax: 60
3 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// Syntax: 7B..7E
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
3 ,
3 ,
3 ,
3 ,
0 ,
// WS: 85
0 ,
0 ,
0 ,
0 ,
0 ,
5 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// Syntax: A1..A7, A9, AB, AC, AE
0 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
3 ,
0 ,
3 ,
0 ,
3 ,
3 ,
0 ,
3 ,
0 ,
// Syntax: B0, B1, B6, BB, BF
3 ,
3 ,
0 ,
0 ,
0 ,
0 ,
3 ,
0 ,
0 ,
0 ,
0 ,
3 ,
0 ,
0 ,
0 ,
3 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// Syntax: D7
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
3 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// Syntax: F7
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
3 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0
};
/*
* One byte per 32 characters from U+2000..U+303F indexing into
* a small table of 32-bit data words.
* The first two data words are all-zeros and all-ones.
*/
static const uint8_t index2000[
130 ]={
2 ,
3 ,
4 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// 20xx
0 ,
0 ,
0 ,
0 ,
5 ,
1 ,
1 ,
1 ,
// 21xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 22xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 23xx
1 ,
1 ,
1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// 24xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 25xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 26xx
1 ,
1 ,
1 ,
6 ,
7 ,
1 ,
1 ,
1 ,
// 27xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 28xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 29xx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 2Axx
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
// 2Bxx
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// 2Cxx
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// 2Dxx
1 ,
1 ,
1 ,
1 ,
0 ,
0 ,
0 ,
0 ,
// 2Exx
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
// 2Fxx
8 ,
9 // 3000..303F
};
/*
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
* are mapped to the first two values, other ranges map to appropriate bit patterns.
*/
static const uint32_t syntax2000[]={
0 ,
0 xffffffff,
0 xffff0000,
// 2: 2010..201F
0 x7fff00ff,
// 3: 2020..2027, 2030..203E
0 x7feffffe,
// 4: 2041..2053, 2055..205E
0 xffff0000,
// 5: 2190..219F
0 x003fffff,
// 6: 2760..2775
0 xfff00000,
// 7: 2794..279F
0 xffffff0e,
// 8: 3001..3003, 3008..301F
0 x00010001
// 9: 3020, 3030
};
/*
* Same as syntax2000, but with additional bits set for the
* Pattern_White_Space characters 200E 200F 2028 2029.
*/
static const uint32_t syntaxOrWhiteSpace2000[]={
0 ,
0 xffffffff,
0 xffffc000,
// 2: 200E..201F
0 x7fff03ff,
// 3: 2020..2029, 2030..203E
0 x7feffffe,
// 4: 2041..2053, 2055..205E
0 xffff0000,
// 5: 2190..219F
0 x003fffff,
// 6: 2760..2775
0 xfff00000,
// 7: 2794..279F
0 xffffff0e,
// 8: 3001..3003, 3008..301F
0 x00010001
// 9: 3020, 3030
};
UBool
PatternProps::isSyntax(UChar32 c) {
if (c<
0 ) {
return false ;
}
else if (c<=
0 xff) {
return (latin1[c] >>
1 ) &
1 ;
}
else if (c<
0 x2010) {
return false ;
}
else if (c<=
0 x3030) {
uint32_t bits=syntax2000[index2000[(c-
0 x2000)>>
5 ]];
return (bits >> (c &
0 x1f)) &
1 ;
}
else if (
0 xfd3e<=c && c<=
0 xfe46) {
return c<=
0 xfd3f ||
0 xfe45<=c;
}
else {
return false ;
}
}
UBool
PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
if (c<
0 ) {
return false ;
}
else if (c<=
0 xff) {
return latin1[c] &
1 ;
}
else if (c<
0 x200e) {
return false ;
}
else if (c<=
0 x3030) {
uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-
0 x2000)>>
5 ]];
return (bits >> (c &
0 x1f)) &
1 ;
}
else if (
0 xfd3e<=c && c<=
0 xfe46) {
return c<=
0 xfd3f ||
0 xfe45<=c;
}
else {
return false ;
}
}
UBool
PatternProps::isWhiteSpace(UChar32 c) {
if (c<
0 ) {
return false ;
}
else if (c<=
0 xff) {
return (latin1[c] >>
2 ) &
1 ;
}
else if (
0 x200e<=c && c<=
0 x2029) {
return c<=
0 x200f ||
0 x2028<=c;
}
else {
return false ;
}
}
const char16_t *
PatternProps::skipWhiteSpace(
const char16_t *s, int32_t length) {
while (length>
0 && isWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
int32_t
PatternProps::skipWhiteSpace(
const UnicodeString& s, int32_t start) {
int32_t i = start;
int32_t length = s.length();
while (i<length && isWhiteSpace(s.charAt(i))) {
++i;
}
return i;
}
const char16_t *
PatternProps::trimWhiteSpace(
const char16_t *s, int32_t &length) {
if (length<=
0 || (!isWhiteSpace(s[
0 ]) && !isWhiteSpace(s[length-
1 ]))) {
return s;
}
int32_t start=
0 ;
int32_t limit=length;
while (start<limit && isWhiteSpace(s[start])) {
++start;
}
if (start<limit) {
// There is non-white space at start; we will not move limit below that,
// so we need not test start<limit in the loop.
while (isWhiteSpace(s[limit-
1 ])) {
--limit;
}
}
length=limit-start;
return s+start;
}
UBool
PatternProps::isIdentifier(
const char16_t *s, int32_t length) {
if (length<=
0 ) {
return false ;
}
const char16_t *limit=s+length;
do {
if (isSyntaxOrWhiteSpace(*s++)) {
return false ;
}
}
while (s<limit);
return true ;
}
const char16_t *
PatternProps::skipIdentifier(
const char16_t *s, int32_t length) {
while (length>
0 && !isSyntaxOrWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
U_NAMESPACE_END
Messung V0.5 in Prozent C=90 H=91 G=90
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet am 2026-06-07)
¤
*© Formatika GbR, Deutschland