Skip to content
This repository has been archived by the owner on May 17, 2023. It is now read-only.

Commit

Permalink
Extract schema from published RFC 7940
Browse files Browse the repository at this point in the history
This is taken verbatim from the published RFC, which contains some
editorial changes made to the comments.
  • Loading branch information
wil committed Feb 2, 2017
1 parent 840de20 commit 761244f
Showing 1 changed file with 61 additions and 57 deletions.
118 changes: 61 additions & 57 deletions lgr-1.0.rnc
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ default namespace = "urn:ietf:params:xml:ns:lgr-1.0"
# SIMPLE TYPES
#

# RFC 5646 language tag (e.g. "de", "und-Latn", etc.)
# RFC 5646 language tag (e.g., "de", "und-Latn")
language-tag = xsd:token

# The scope to which the LGR applies. For the "domain" scope type it
# should be a fully qualified domain name.
# The scope to which the LGR applies. For the "domain" scope type,
# it should be a fully qualified domain name.
scope-value = xsd:token {
minLength = "1"
}
Expand All @@ -39,39 +39,40 @@ code-point-set-shorthand = xsd:token {
~ "( ([0-9A-F]{4,6}|[0-9A-F]{4,6}-[0-9A-F]{4,6}))*"
}

## dates are used in information fields in the meta
## dates are used in information fields in the meta
## section ("YYYY-MM-DD")
date-pattern = xsd:token {
pattern = "\d{4}-\d\d-\d\d"
}

## variant type
## the variant type MUST be non-emtpy and MUST NOT
## start with a "_", using xsd:NMTOKEN here because
## the variant type MUST be non-empty and MUST NOT
## start with a "_"; using xsd:NMTOKEN here because
## we need space-separated lists of them
variant-type = xsd:NMTOKEN

## variant type list for action triggers
## the list MUST NOT be empty and entries MUST NOT
## the list MUST NOT be empty, and entries MUST NOT
## start with a "_"
variant-type-list = xsd:NMTOKENS

## reference to a rule name (used in "when" and "not-when"
## attributes, as well as the "by-ref" attribute of the "rule"
## element.)
## element).
rule-ref = xsd:IDREF

## a space-separated list of tags. Tags should generally follow
## xsd:Name syntax. However, we are using the xsd:NMTOKENS here
## a space-separated list of tags. Tags should generally follow
## xsd:Name syntax. However, we are using the xsd:NMTOKENS here
## because there is no native XSD datatype for space-separated
## xsd:Name
tags = xsd:NMTOKENS

## The value space of a "from-tag" attribute. Although it is closer
## The value space of a "from-tag" attribute. Although it is closer
## to xsd:IDREF lexically and semantically, tags are not unique in
## the document. As such, we are unable to take advantage of
## facilities provided by a validator. xsd:NMTOKEN is used instead of
## the stricter xsd:Names here so as to be consistent with the above.
## the document. As such, we are unable to take advantage of
## facilities provided by a validator. xsd:NMTOKEN is used instead
## of the stricter xsd:Names here so as to be consistent with
## the above.
tag-ref = xsd:NMTOKEN

## an identifier type (used by "name" attributes).
Expand All @@ -81,16 +82,17 @@ identifier = xsd:ID
## the same "name" attribute value.
class-ref = xsd:IDREF

## count attribute pattern ("n", "n+" or "n:m")
## "count" attribute pattern ("n", "n+", or "n:m")
count-pattern = xsd:token {
pattern = "\d+(\+|:\d+)?"
}

## ref attribute pattern
## space separated list of id attribute valiues for
## reference elements. These reference ids
## must be declared in a reference element
## before they can be used in a ref attribute

## "ref" attribute pattern
## space-separated list of "id" attribute values for
## "reference" elements. These reference ids
## must be declared in a "reference" element
## before they can be used in a "ref" attribute
ref-pattern = xsd:token {
pattern = "[\-_.:0-9A-Z]+( [\-_.:0-9A-Z]+)*"
}
Expand All @@ -99,7 +101,7 @@ ref-pattern = xsd:token {
# STRUCTURES
#

## Representation of a single code point, or a sequence of code
## Representation of a single code point or a sequence of code
## points
char = element char {
attribute cp { code-point-literal },
Expand Down Expand Up @@ -137,7 +139,6 @@ variant = element var {
# Classes
#


## a "class" element that references the name of another "class"
## (or set-operator like "union") defined elsewhere.
## If used as a matcher (appearing under a "rule" element),
Expand All @@ -155,22 +156,22 @@ class-declaration = element class { class-declaration-content }

class-declaration-content =
# "name" attribute MUST be present if this is a "top-level"
# class declaration, i.e. appearing directly under the "rules"
# element. Otherwise, it MUST be absent.
# class declaration, i.e., appearing directly under the "rules"
# element. Otherwise, it MUST be absent.
attribute name { identifier }?,
# If used as a matcher (appearing in a "rule" element, but not
# when nested inside a set operator or class), the "count"
# attribute may be present. Otherwise, it MUST be absent.
# when nested inside a set-operator or class), the "count"
# attribute may be present. Otherwise, it MUST be absent.
attribute count { count-pattern }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
(
# define the class by property (e.g. property="sc:Latn"), OR
# define the class by property (e.g., property="sc:Latn"), OR
attribute property { xsd:NMTOKEN }
# define the class by tagged code points, OR
| attribute from-tag { tag-ref }
# text node to allow for shorthand notation
# e.g. "0061 0062-0063"
# e.g., "0061 0062-0063"
| code-point-set-shorthand
)

Expand All @@ -183,21 +184,21 @@ class-or-set-operator-nested =
class-invocation-or-declaration | set-operator

class-or-set-operator-declaration =
# a "class" element or set operator (effectively defining a class)
# a "class" element or set-operator (effectively defining a class)
# directly in the "rules" element.
class-declaration | set-operator


#
# Set operators
# set-operators
#

complement-operator = element complement {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e. nested in a <rule> element but not
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested
Expand All @@ -208,20 +209,21 @@ union-operator = element union {
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e. nested in a <rule> element but not
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
# needs two or more child elements
class-or-set-operator-nested+
}


intersection-operator = element intersection {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e. nested in a <rule> element but not
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
Expand All @@ -233,7 +235,7 @@ difference-operator = element difference {
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e. nested in a <rule> element but not
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
Expand All @@ -245,7 +247,7 @@ symmetric-difference-operator = element symmetric-difference {
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e. nested in a <rule> element but not
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
Expand All @@ -259,6 +261,7 @@ set-operator = complement-operator
| difference-operator
| symmetric-difference-operator


#
# Match operators (matchers)
#
Expand All @@ -269,8 +272,8 @@ any-matcher = element any {
}

choice-matcher = element choice {
## "count attribute MUST only be used when the choice-matcher
## contains no nested "start", "end", "anchor", "look-behind"
## "count" attribute MUST only be used when the choice-matcher
## contains no nested "start", "end", "anchor", "look-behind",
## or "look-ahead" operators and no nested rule-matchers
## containing any of these elements
attribute count { count-pattern }?,
Expand All @@ -285,7 +288,7 @@ char-matcher =
element char {
attribute cp { non-empty-code-point-literal },
# If used as a matcher (appearing in a "rule" element), the
# "count" attribute may be present. Otherwise, it MUST be
# "count" attribute may be present. Otherwise, it MUST be
# absent.
attribute count { count-pattern }?,
attribute comment { text }?,
Expand Down Expand Up @@ -314,30 +317,31 @@ look-behind-matcher = element look-behind {
}

## non-positional match operator that can be used as a direct child
## element of the choice matcher.
## element of the choice-matcher.
match-operator-choice = (
any-matcher | choice-matcher | start-matcher | end-matcher
| char-matcher | class-or-set-operator-nested | rule-matcher
)

## non-positional match operators do not contain any anchor,
## look-behind or look-ahead elements.
## non-positional match operators do not contain any "anchor",
## "look-behind", or "look-ahead" elements.
match-operators-non-pos = (
start-matcher?,
(any-matcher | choice-matcher | char-matcher
| class-or-set-operator-nested | rule-matcher)*,
end-matcher?
)

## positional match operators have an anchor element, which may be
## preceeded by a look-behind element, or followed by a look-ahead
## positional match operators have an "anchor" element, which may be
## preceded by a "look-behind" element, or followed by a "look-ahead"
## element, or both.
match-operators-pos =
look-behind-matcher?, anchor-matcher, look-ahead-matcher?

match-operators = match-operators-non-pos | match-operators-pos



#
# Rules
#
Expand All @@ -350,12 +354,12 @@ rule-declaration-top = element rule {
match-operators
}

## rule element used as a matcher (either by-ref or contains other
## match operators itself)
## "rule" element used as a matcher (either "by-ref" or contains
## other match operators itself)
rule-matcher =
element rule {
## "count attribute MUST only be used when the rule-matcher
## contains no nested "start", "end", "anchor", "look-behind"
## "count" attribute MUST only be used when the rule-matcher
## contains no nested "start", "end", "anchor", "look-behind",
## or "look-ahead" operators and no nested rule-matchers
## containing any of these elements
attribute count { count-pattern }?,
Expand All @@ -364,7 +368,6 @@ rule-matcher =
(attribute by-ref { rule-ref } | match-operators)
}


#
# Actions
#
Expand All @@ -381,6 +384,8 @@ action-declaration = element action {
| attribute only-variants { variant-type-list } )?
}



# DOCUMENT STRUCTURE

start = lgr
Expand All @@ -390,11 +395,10 @@ lgr = element lgr {
rules-section?
}

## Meta section - information recorded with an label
## generation ruleset that generally does not affect machine
## processing (except for unicode-version).
## However, if any "class-declaration" uses the "property" attribute
## a unicode-version MUST be present.
## Meta section - information recorded with an LGR that generally
## does not affect machine processing (except for "unicode-version").
## However, if any "class-declaration" uses the "property" attribute,
## a "unicode-version" element MUST be present.
meta-section = element meta {
element version {
attribute comment { text }?,
Expand Down Expand Up @@ -423,8 +427,8 @@ meta-section = element meta {
element reference {
attribute id {
xsd:token {
# limit id attribute to uppercase letters,
# digits and a few punctuation marks; use of
# limit "id" attribute to uppercase letters,
# digits, and a few punctuation marks; use of
# integers is RECOMMENDED
pattern = "[\-_.:0-9A-Z]*"
minLength = "1"
Expand All @@ -440,8 +444,8 @@ data-section = element data { (char | range)+ }

## Note that action declarations are strictly order dependent.
## class-or-set-operator-declaration and rule-declaration-top
## are weakly order dependent, they must precede first use of the
## identifier via by-ref.
## are weakly order dependent; they must precede first use of the
## identifier via "by-ref".
rules-section = element rules {
( class-or-set-operator-declaration
| rule-declaration-top
Expand Down

0 comments on commit 761244f

Please sign in to comment.