-
Notifications
You must be signed in to change notification settings - Fork 66
Requirement Parsers
Ching Chang edited this page Jul 14, 2022
·
15 revisions
This documentation specifies the string patterns each parser in /app/WebParsing/ReqParser.hs
accepts in Extended Backus-Naur Form (EBNF).
The main notations used are
Notation | Meaning |
---|---|
a | b |
a or b |
a, b |
a followed by b |
a - b |
a but not b |
n * a |
n number of a's |
"..." |
string literal (case insensitive[1]) |
[...] |
optional |
{...} |
1 or more |
(...) |
grouping |
(*...*) |
comment |
,
has a higher precedence than |
.
This means a, b | c, d
translates to "(a
followed by b
) or (c
followed by d
)"
not "a
followed by (b
or c
) followed by d
".
In addition, some logic cannot be represented in EBNF,
and are therefore described in plain English and expressed as ? ... ?
.
Symbol | Definition |
---|---|
digit | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" |" 9" |
letter | ? all upper and lower case English letters ? |
character | ? all ASCII characters ? |
Note: The parsers do not necessarily return the strings that they consume.
For example, gradeParser
may consume "(73%), and"
but would only return 73
.
requirement = ? and (category | "(", category, ")") ?;
category = fces | course | cgpa | programOr | rawText;
(* fces *)
fces = [completionPrefix], float, [department], fceSeparator, [includingSeparator, ? and category ?], [fromSeparator], [anyModifier], modifiers;
modifiers = modifier, [{[fromSeparator], modifiersNoRaw}];
modifier = modifiersNoRaw | rawModifier;
modifiersNoRaw = ? and course ? | level | department;
rawModifier = {character - (andSeparator | orSeparator)};
level = 3 * digit, ["+" | "-"], "level", [courseLiteral], ["or higher"];
department = {character - (courseLiteral | fceSeparator | orSeparator | andSeparator | fromSeparator)}, [courseLiteral];
(* course *)
course = courseOptionalCutoff | cutoffBefore | cutoffBefore;
courseOptionalCutoff = courseID, ["(", (percentGrade | letterGrade | info), ")"];
cutoffBefore = ["an" | "a " ? not followed by "in" ?], [("minimum grade" | "minimum mark" | "minimum" | "grade" | "final grade" | "at least"), ["of"]], grade, [{character - courseID}], courseID;
cutoffAfter = courseID, "(", [{character - ("(" | ")" | orSeparator | andSeparator | grade ? not followed by digit or letter ?)}], [{character - ")"}], ")";
courseID = (utsgCourseCode | utscCourseCode), ("H" | "Y"), digit;
utsgCourseCode = 3 * letter, 3 * digit;
utscCourseCode = 4 * letter, 2 * digit;
grade = "(", (percentParser | letterGrade), ")" | percentParser | letterGrade;
percentGrade = 2 * digit ? not followed by another digit ?, ["%"];
letterGrade = ("A" | "B" | "C" | "D" | "E" | "F") ? not followed by another letter ?, ["+", "-"];
info = {character - ")"}, ")";
(* cgpa *)
cgpa = [cgpaPrefix], float, ["cGPA"], {character - (andSeparator | orSeparator)};
(* program *)
programOr = programPrefix, programGroup, [{progOrSeparator, programGroup}];
programGroup = program, [{progGroupSeparator, program}], degreeType, [{orSeparator, degreeType}], [degreeType | programSuffix];
program = [{character - (degreeType | programSuffix | progGroupSeparator | progOrSeparator | "." | ";")}];
(* rawText *)
rawText = {character - (";" | "\n")} | "";
(* Other *)
fceSeparator = "FCEs." | "FCEs" | "FCE." | "FCE" | "credits" | "credit" | "full-course equivalents" | "additional credits" | "additional credit";
oneOfSeparator = ("one of either" | "one of the following" | "at least one of" | "one of" | "1 of" | "at least 1 of"), [":"];
orSeparator = "/" | "or" | ", or";
andSeparator = ", and" | ", an additional" | ", additional" | "," | "and" | "; and" | "." | ";" | "&" | "+" | "plus";
fromSeparator = "of any of the following:" | "of" | "from the following:" | "from the" | "from:" | "from" | "at the" | "at" | "in";
progGroupSeparator = "," | "or a" | "or";
progOrSeparator = "or in a";
includingSeparator = "including" | ", including";
completionPrefix = "Completion of at least" | "Completion of a minimum of" | "Completion of" | "have completed" | "At least one additional" | "At least one" | "At least" | "Any" | "a";
programPrefix = "admission to" | "enrolment in the" | "enrolment in an" | "enrolment in a" | "enrolment in";
cgpaPrefix = "and will normally have a CGPA of at least" | "with a CGPA of at least" | "with a minimum cGPA of" | "and a minimum cGPA of" | "and minimum cGPA of" | "a CGPA of at least" | "a minimum cGPA of" | "minimum cGPA of" | "with" | "cGPA";
programSuffix = "program of study" | "program";
degreeType = "major" | "minor" | "specialist";
courseLiteral = "course" | "courses";
anyModifier = "any", ("field" | "subject");
float = {digit}, [".", {digit}];
(* Helpers (see footnote [2]) *)
oneOf p = oneOfSeparator, p, [{(orSeparator | andSeparator), p}];
or p = p, [{orSeparator, p}];
and p = p [{andSeparator, (? oneOf p ? | ? or p ?)}];
- String literals are not case insensitive in EBNF by default, but for the simplicity of the documentation, assume they are :)
- These are context-sensitive and therefore not EBNF. However, we use them here to make the grammar more concise without the repetitions.