Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions icu4c/source/common/unicode/uniset.h
Original file line number Diff line number Diff line change
Expand Up @@ -1696,13 +1696,58 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
const SymbolTable* symbols,
UErrorCode& status);

void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
void applyPattern(const UnicodeString &pattern,
const ParsePosition& parsePosition,
RuleCharacterIterator &chars,
const SymbolTable *symbols,
UnicodeString &rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec);
UnicodeSet &(UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode &ec);

// Recursive-descent predictive parsing. These functions parse the syntactic categories
// matching their name in the base grammar of PD UTR #56 (before the highlighted changes are
// applied).
// See https://www.unicode.org/reports/tr61/tr61-1.html#Set-Operations.
// `parseUnicodeSet` clears `*this` and makes it represent the parsed UnicodeSet; all other functions
// add the set represented by the parsed construct to `*this`.

class Lexer;

void parseUnicodeSet(Lexer &lexer,
UnicodeString &rebuiltPat,
uint32_t options,
UnicodeSet &(UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode &ec);

void parseUnion(Lexer &lexer,
UnicodeString &rebuiltPat,
uint32_t options,
UnicodeSet &(UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
bool &containsRestrictions,
UErrorCode &ec);

void parseTerm(Lexer &lexer,
UnicodeString &rebuiltPat,
uint32_t options,
UnicodeSet &(UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
bool &containsRestrictions,
UErrorCode &ec);

void parseRestriction(Lexer &lexer,
UnicodeString &rebuiltPat,
uint32_t options,
UnicodeSet &(UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode &ec);

void parseElements(Lexer &lexer,
UnicodeString &rebuiltPat,
UErrorCode &ec);


void closeOverCaseInsensitive(bool simple);
void closeOverAddCaseMappings();
Expand Down Expand Up @@ -1754,9 +1799,6 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
static UBool resemblesPropertyPattern(const UnicodeString& pattern,
int32_t pos);

static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
int32_t iterOpts);

/**
* Parse the given property pattern at the given parse position
* and set this UnicodeSet to the result.
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/common/uniset_closure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
applyPattern(pattern, pos, chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
Expand Down
Loading