diff --git a/lgr-1.0.rnc b/lgr-1.0.rnc index b169f17..c0bb13e 100644 --- a/lgr-1.0.rnc +++ b/lgr-1.0.rnc @@ -8,11 +8,11 @@ default namespace = "urn:ietf:params:xml:ns:lgr-1.0" # SIMPLE TYPES # -# RFC 5646 language tag (e.g. "de", "und-Latn", etc.) +# RFC 5646 language tag (e.g., "de", "und-Latn") language-tag = xsd:token -# The scope to which the LGR applies. For the "domain" scope type it -# should be a fully qualified domain name. +# The scope to which the LGR applies. For the "domain" scope type, +# it should be a fully qualified domain name. scope-value = xsd:token { minLength = "1" } @@ -39,39 +39,40 @@ code-point-set-shorthand = xsd:token { ~ "( ([0-9A-F]{4,6}|[0-9A-F]{4,6}-[0-9A-F]{4,6}))*" } -## dates are used in information fields in the meta +## dates are used in information fields in the meta ## section ("YYYY-MM-DD") date-pattern = xsd:token { pattern = "\d{4}-\d\d-\d\d" } ## variant type -## the variant type MUST be non-emtpy and MUST NOT -## start with a "_", using xsd:NMTOKEN here because +## the variant type MUST be non-empty and MUST NOT +## start with a "_"; using xsd:NMTOKEN here because ## we need space-separated lists of them variant-type = xsd:NMTOKEN ## variant type list for action triggers -## the list MUST NOT be empty and entries MUST NOT +## the list MUST NOT be empty, and entries MUST NOT ## start with a "_" variant-type-list = xsd:NMTOKENS ## reference to a rule name (used in "when" and "not-when" ## attributes, as well as the "by-ref" attribute of the "rule" -## element.) +## element). rule-ref = xsd:IDREF -## a space-separated list of tags. Tags should generally follow -## xsd:Name syntax. However, we are using the xsd:NMTOKENS here +## a space-separated list of tags. Tags should generally follow +## xsd:Name syntax. However, we are using the xsd:NMTOKENS here ## because there is no native XSD datatype for space-separated ## xsd:Name tags = xsd:NMTOKENS -## The value space of a "from-tag" attribute. Although it is closer +## The value space of a "from-tag" attribute. Although it is closer ## to xsd:IDREF lexically and semantically, tags are not unique in -## the document. As such, we are unable to take advantage of -## facilities provided by a validator. xsd:NMTOKEN is used instead of -## the stricter xsd:Names here so as to be consistent with the above. +## the document. As such, we are unable to take advantage of +## facilities provided by a validator. xsd:NMTOKEN is used instead +## of the stricter xsd:Names here so as to be consistent with +## the above. tag-ref = xsd:NMTOKEN ## an identifier type (used by "name" attributes). @@ -81,16 +82,17 @@ identifier = xsd:ID ## the same "name" attribute value. class-ref = xsd:IDREF -## count attribute pattern ("n", "n+" or "n:m") +## "count" attribute pattern ("n", "n+", or "n:m") count-pattern = xsd:token { pattern = "\d+(\+|:\d+)?" } -## ref attribute pattern -## space separated list of id attribute valiues for -## reference elements. These reference ids -## must be declared in a reference element -## before they can be used in a ref attribute + +## "ref" attribute pattern +## space-separated list of "id" attribute values for +## "reference" elements. These reference ids +## must be declared in a "reference" element +## before they can be used in a "ref" attribute ref-pattern = xsd:token { pattern = "[\-_.:0-9A-Z]+( [\-_.:0-9A-Z]+)*" } @@ -99,7 +101,7 @@ ref-pattern = xsd:token { # STRUCTURES # -## Representation of a single code point, or a sequence of code +## Representation of a single code point or a sequence of code ## points char = element char { attribute cp { code-point-literal }, @@ -137,7 +139,6 @@ variant = element var { # Classes # - ## a "class" element that references the name of another "class" ## (or set-operator like "union") defined elsewhere. ## If used as a matcher (appearing under a "rule" element), @@ -155,22 +156,22 @@ class-declaration = element class { class-declaration-content } class-declaration-content = # "name" attribute MUST be present if this is a "top-level" - # class declaration, i.e. appearing directly under the "rules" - # element. Otherwise, it MUST be absent. + # class declaration, i.e., appearing directly under the "rules" + # element. Otherwise, it MUST be absent. attribute name { identifier }?, # If used as a matcher (appearing in a "rule" element, but not - # when nested inside a set operator or class), the "count" - # attribute may be present. Otherwise, it MUST be absent. + # when nested inside a set-operator or class), the "count" + # attribute may be present. Otherwise, it MUST be absent. attribute count { count-pattern }?, attribute comment { text }?, attribute ref { ref-pattern }?, ( - # define the class by property (e.g. property="sc:Latn"), OR + # define the class by property (e.g., property="sc:Latn"), OR attribute property { xsd:NMTOKEN } # define the class by tagged code points, OR | attribute from-tag { tag-ref } # text node to allow for shorthand notation - # e.g. "0061 0062-0063" + # e.g., "0061 0062-0063" | code-point-set-shorthand ) @@ -183,13 +184,13 @@ class-or-set-operator-nested = class-invocation-or-declaration | set-operator class-or-set-operator-declaration = - # a "class" element or set operator (effectively defining a class) + # a "class" element or set-operator (effectively defining a class) # directly in the "rules" element. class-declaration | set-operator # -# Set operators +# set-operators # complement-operator = element complement { @@ -197,7 +198,7 @@ complement-operator = element complement { attribute comment { text }?, attribute ref { ref-pattern }?, # "count" attribute MUST only be used when this set-operator is - # used as a matcher (i.e. nested in a element but not + # used as a matcher (i.e., nested in a "rule" element but not # inside a set-operator or class) attribute count { count-pattern }?, class-or-set-operator-nested @@ -208,7 +209,7 @@ union-operator = element union { attribute comment { text }?, attribute ref { ref-pattern }?, # "count" attribute MUST only be used when this set-operator is - # used as a matcher (i.e. nested in a element but not + # used as a matcher (i.e., nested in a "rule" element but not # inside a set-operator or class) attribute count { count-pattern }?, class-or-set-operator-nested, @@ -216,12 +217,13 @@ union-operator = element union { class-or-set-operator-nested+ } + intersection-operator = element intersection { attribute name { identifier }?, attribute comment { text }?, attribute ref { ref-pattern }?, # "count" attribute MUST only be used when this set-operator is - # used as a matcher (i.e. nested in a element but not + # used as a matcher (i.e., nested in a "rule" element but not # inside a set-operator or class) attribute count { count-pattern }?, class-or-set-operator-nested, @@ -233,7 +235,7 @@ difference-operator = element difference { attribute comment { text }?, attribute ref { ref-pattern }?, # "count" attribute MUST only be used when this set-operator is - # used as a matcher (i.e. nested in a element but not + # used as a matcher (i.e., nested in a "rule" element but not # inside a set-operator or class) attribute count { count-pattern }?, class-or-set-operator-nested, @@ -245,7 +247,7 @@ symmetric-difference-operator = element symmetric-difference { attribute comment { text }?, attribute ref { ref-pattern }?, # "count" attribute MUST only be used when this set-operator is - # used as a matcher (i.e. nested in a element but not + # used as a matcher (i.e., nested in a "rule" element but not # inside a set-operator or class) attribute count { count-pattern }?, class-or-set-operator-nested, @@ -259,6 +261,7 @@ set-operator = complement-operator | difference-operator | symmetric-difference-operator + # # Match operators (matchers) # @@ -269,8 +272,8 @@ any-matcher = element any { } choice-matcher = element choice { - ## "count attribute MUST only be used when the choice-matcher - ## contains no nested "start", "end", "anchor", "look-behind" + ## "count" attribute MUST only be used when the choice-matcher + ## contains no nested "start", "end", "anchor", "look-behind", ## or "look-ahead" operators and no nested rule-matchers ## containing any of these elements attribute count { count-pattern }?, @@ -285,7 +288,7 @@ char-matcher = element char { attribute cp { non-empty-code-point-literal }, # If used as a matcher (appearing in a "rule" element), the - # "count" attribute may be present. Otherwise, it MUST be + # "count" attribute may be present. Otherwise, it MUST be # absent. attribute count { count-pattern }?, attribute comment { text }?, @@ -314,14 +317,14 @@ look-behind-matcher = element look-behind { } ## non-positional match operator that can be used as a direct child -## element of the choice matcher. +## element of the choice-matcher. match-operator-choice = ( any-matcher | choice-matcher | start-matcher | end-matcher | char-matcher | class-or-set-operator-nested | rule-matcher ) -## non-positional match operators do not contain any anchor, -## look-behind or look-ahead elements. +## non-positional match operators do not contain any "anchor", +## "look-behind", or "look-ahead" elements. match-operators-non-pos = ( start-matcher?, (any-matcher | choice-matcher | char-matcher @@ -329,8 +332,8 @@ match-operators-non-pos = ( end-matcher? ) -## positional match operators have an anchor element, which may be -## preceeded by a look-behind element, or followed by a look-ahead +## positional match operators have an "anchor" element, which may be +## preceded by a "look-behind" element, or followed by a "look-ahead" ## element, or both. match-operators-pos = look-behind-matcher?, anchor-matcher, look-ahead-matcher? @@ -338,6 +341,7 @@ match-operators-pos = match-operators = match-operators-non-pos | match-operators-pos + # # Rules # @@ -350,12 +354,12 @@ rule-declaration-top = element rule { match-operators } -## rule element used as a matcher (either by-ref or contains other -## match operators itself) +## "rule" element used as a matcher (either "by-ref" or contains +## other match operators itself) rule-matcher = element rule { - ## "count attribute MUST only be used when the rule-matcher - ## contains no nested "start", "end", "anchor", "look-behind" + ## "count" attribute MUST only be used when the rule-matcher + ## contains no nested "start", "end", "anchor", "look-behind", ## or "look-ahead" operators and no nested rule-matchers ## containing any of these elements attribute count { count-pattern }?, @@ -364,7 +368,6 @@ rule-matcher = (attribute by-ref { rule-ref } | match-operators) } - # # Actions # @@ -381,6 +384,8 @@ action-declaration = element action { | attribute only-variants { variant-type-list } )? } + + # DOCUMENT STRUCTURE start = lgr @@ -390,11 +395,10 @@ lgr = element lgr { rules-section? } -## Meta section - information recorded with an label -## generation ruleset that generally does not affect machine -## processing (except for unicode-version). -## However, if any "class-declaration" uses the "property" attribute -## a unicode-version MUST be present. +## Meta section - information recorded with an LGR that generally +## does not affect machine processing (except for "unicode-version"). +## However, if any "class-declaration" uses the "property" attribute, +## a "unicode-version" element MUST be present. meta-section = element meta { element version { attribute comment { text }?, @@ -423,8 +427,8 @@ meta-section = element meta { element reference { attribute id { xsd:token { - # limit id attribute to uppercase letters, - # digits and a few punctuation marks; use of + # limit "id" attribute to uppercase letters, + # digits, and a few punctuation marks; use of # integers is RECOMMENDED pattern = "[\-_.:0-9A-Z]*" minLength = "1" @@ -440,8 +444,8 @@ data-section = element data { (char | range)+ } ## Note that action declarations are strictly order dependent. ## class-or-set-operator-declaration and rule-declaration-top -## are weakly order dependent, they must precede first use of the -## identifier via by-ref. +## are weakly order dependent; they must precede first use of the +## identifier via "by-ref". rules-section = element rules { ( class-or-set-operator-declaration | rule-declaration-top