#include"unicode/utypes.h" #include"unicode/uobject.h" #include"unicode/rbbi.h" #include"unicode/uniset.h" #include"unicode/parseerr.h" #include"uhash.h" #include"uvector.h" #include"unicode/symtable.h"// For UnicodeSet parsing, is the interface that // looks up references to $variables within a set. #include"rbbinode.h" #include"rbbirpt.h"
U_NAMESPACE_BEGIN
class RBBIRuleBuilder; class RBBISymbolTable;
//-------------------------------------------------------------------------------- // // class RBBIRuleScanner does the lowest level, character-at-a-time // scanning of break iterator rules. // // The output of the scanner is parse trees for // the rule expressions and a list of all Unicode Sets // encountered. // //--------------------------------------------------------------------------------
class RBBIRuleScanner : public UMemory { public:
enum {
kStackSize = 100 // The size of the state stack for
}; // rules parsing. Corresponds roughly // to the depth of parentheses nesting // that is allowed in the rules.
void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. // Return false if at end.
UBool push(const RBBIRuleChar &c); // Push (unget) one character. // Only a single character may be pushed.
void parse(); // Parse the rules, generating two parse // trees, one each for the forward and // reverse rules, // and a list of UnicodeSets encountered.
int32_t numRules(); // Return the number of rules that have been seen.
/** * Return a rules string without unnecessary * characters.
*/ static UnicodeString stripRules(const UnicodeString &rules); private:
RBBIRuleBuilder *fRB; // The rule builder that we are part of.
int32_t fScanIndex; // Index of current character being processed // in the rule input string.
int32_t fNextIndex; // Index of the next character, which // is the first character not yet scanned.
UBool fQuoteMode; // Scan is in a 'quoted region'
int32_t fLineNum; // Line number in input file.
int32_t fCharNum; // Char position within the line.
UChar32 fLastChar; // Previous char, needed to count CR-LF // as a single line, not two.
RBBIRuleChar fC; // Current char for parse state machine // processing.
UnicodeString fVarName; // $variableName, valid when we've just // scanned one.
RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule // parsing. index by p[state][char-class]
uint16_t fStack[kStackSize]; // State stack, holds state pushes
int32_t fStackPtr; // and pops as specified in the state // transition rules.
RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created // during the parse of a rule
int32_t fNodeStackPtr;
UBool fReverseRule; // True if the rule currently being scanned // is a reverse direction rule (if it // starts with a '!')
UBool fLookAheadRule; // True if the rule includes a '/' // somewhere within it.
UBool fNoChainInRule; // True if the current rule starts with a '^'.
RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of // $variable symbols.
UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to // the sets created while parsing rules. // The key is the string used for creating // the set.
UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during // the scanning of RBBI rules. The // indices for these are assigned by the // perl script that builds the state tables. // See rbbirpt.h.
int32_t fRuleNum; // Counts each rule as it is scanned.
int32_t fOptionStart; // Input index of start of a !!option // keyword, while being scanned.
RBBIRuleScanner(const RBBIRuleScanner &other) = delete; // forbid copying of this class
RBBIRuleScanner &operator=(const RBBIRuleScanner &other) = delete; // forbid copying of this class
};
U_NAMESPACE_END
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.13 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.