Skip to content

Commit a73e9e5

Browse files
committed
✨ Put <description> in fifth field of json output, <title> in seventh
We're now actually parsing <description> from XML :) Also, allow <description> alongside <label> in pipespec <option>'s (under prefs) + add a schemas.xml for editor
1 parent fe5c34c commit a73e9e5

File tree

10 files changed

+107
-30
lines changed

10 files changed

+107
-30
lines changed

src/checkertypes.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ namespace divvun {
3434
*/
3535

3636
typedef std::string Lang;
37-
typedef std::u16string Msg;
37+
typedef std::pair<std::u16string, std::u16string> Msg;
3838
typedef std::u16string ErrId;
3939
typedef std::basic_regex<char> ErrRe;
4040

src/errors.dtd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- Copyright (C) 2016-2017, Kevin Brubeck Unhammer <[email protected]> -->
1+
<!-- Copyright (C) 2016-2019, Kevin Brubeck Unhammer <[email protected]> -->
22

33
<!-- This program is free software: you can redistribute it and/or modify -->
44
<!-- it under the terms of the GNU General Public License as published by -->

src/errors.rnc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright (C) 2016-2017, Kevin Brubeck Unhammer <[email protected]>
1+
# THIS IS A GENERATED FILE, DO NOT EDIT!
2+
# Copyright (C) 2016-2019, Kevin Brubeck Unhammer <[email protected]>
23

34
# This program is free software: you can redistribute it and/or modify
45

src/main_checker.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,16 @@ void printPrefs(const Pipeline& pipeline) {
9494
const Prefs& prefs = lp.second;
9595
std::cout << "==== Toggles: ====" << std::endl;
9696
for(const auto& id : prefs.toggleIds) {
97-
std::cout << "- [ ] " << toUtf8(id.first) << " \t" << toUtf8(id.second) << std::endl;
97+
std::cout << "- [ ] " << toUtf8(id.first) << " \t" << toUtf8(id.second.first) << std::endl;
9898
}
9999
for(const auto& re : prefs.toggleRes) {
100-
std::cout << "- [ ] [regex] \t" << toUtf8(re.second) << std::endl;
100+
std::cout << "- [ ] [regex] \t" << toUtf8(re.second.first) << std::endl;
101101
}
102102
std::cout << "==== Options: ====" << std::endl;
103103
for(const Option& o : prefs.options) {
104104
std::cout << "- " << o.name << " (" << o.type << "):" << std::endl;
105105
for(const auto& c : o.choices) {
106-
std::cout << "- ( ) " << toUtf8(c.first) << " \t" << toUtf8(c.second) << std::endl;
106+
std::cout << "- ( ) " << toUtf8(c.first) << " \t" << toUtf8(c.second.first) << std::endl;
107107
}
108108
}
109109
}

src/pipeline.hpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,23 @@ inline void parsePrefs(LocalisedPrefs& prefs, const pugi::xml_node& cmd) {
217217
unordered_map<Lang, unordered_map<ErrId, Msg>> lems;
218218
for (const pugi::xml_node& option: pref.children()) {
219219
const auto errId = fromUtf8(option.attribute("err-id").value());
220-
for (const pugi::xml_node& label: option.children()) {
220+
for (const pugi::xml_node& label: option.children("label")) {
221221
const auto lang = label.attribute("xml:lang").value();
222222
const auto msg = fromUtf8(label.text().get()); // or xml_raw_cdata(label);
223-
lems[lang][errId] = msg;
223+
// Let <description> default to <label> first:
224+
lems[lang][errId] = std::make_pair(msg, msg);
224225
}
226+
for (const pugi::xml_node& description: option.children("description")) {
227+
const auto lang = description.attribute("xml:lang").value();
228+
const auto msg = fromUtf8(description.text().get());
229+
if(lems[lang].find(errId) != lems[lang].end()) {
230+
lems[lang][errId].second = msg;
231+
}
232+
else {
233+
// No <label> for this language, fallback to <description>:
234+
lems[lang][errId] = std::make_pair(msg, msg);
235+
}
236+
}
225237
}
226238
for(const auto& lem : lems) {
227239
const Lang& lang = lem.first;

src/pipespec.dtd

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- Copyright (C) 2017, Kevin Brubeck Unhammer <[email protected]> -->
1+
<!-- Copyright (C) 2017–2019, Kevin Brubeck Unhammer <[email protected]> -->
22

33
<!-- This program is free software: you can redistribute it and/or modify -->
44
<!-- it under the terms of the GNU General Public License as published by -->
@@ -57,7 +57,7 @@
5757
<!-- type and name are *not* unique here; several pipelines may have a pref for e.g. "Oxford comma" -->
5858

5959

60-
<!ELEMENT option (label+)>
60+
<!ELEMENT option ((label|description)+)>
6161
<!ATTLIST option
6262
err-id CDATA #REQUIRED
6363
>
@@ -69,6 +69,11 @@
6969
xml:lang CDATA #REQUIRED
7070
>
7171

72+
<!ELEMENT description %Text;>
73+
<!ATTLIST description
74+
xml:lang CDATA #REQUIRED
75+
>
76+
7277
<!-- General "system" command – pipelines with this can only be used
7378
in settings where we can open processes: -->
7479
<!ELEMENT sh (arg*)> <!-- NOT IMPLEMENTED YET -->

src/pipespec.rnc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# THIS IS A GENERATED FILE, DO NOT EDIT!
2-
# Copyright (C) 2017, Kevin Brubeck Unhammer <[email protected]>
2+
# Copyright (C) 2017–2019, Kevin Brubeck Unhammer <[email protected]>
33

44
# This program is free software: you can redistribute it and/or modify
55

@@ -72,13 +72,16 @@ attlist.pref &=
7272
attribute type { text },
7373
attribute name { text }
7474
# type and name are *not* unique here; several pipelines may have a pref for e.g. "Oxford comma"
75-
option = element option { attlist.option, label+ }
75+
option = element option { attlist.option, (label | description)+ }
7676
attlist.option &= attribute err-id { text }
7777

7878
Text = (text | em)*
7979
label = element label { attlist.label, Text }
8080
attlist.label &= attribute xml:lang { text }
8181

82+
description = element description { attlist.description, Text }
83+
attlist.description &= attribute xml:lang { text }
84+
8285
# General "system" command – pipelines with this can only be used
8386
# in settings where we can open processes:
8487
sh = element sh { attlist.sh, arg* }

src/suggest.cpp

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,36 +89,66 @@ const MsgMap readMessagesXml(pugi::xml_document& doc, pugi::xml_parse_result& re
8989
MsgMap msgs;
9090

9191
if (result) {
92+
// <default>'s:
9293
for (pugi::xml_node def: doc.child("errors").child("defaults").children("default")) {
93-
// std::cerr << "defaults" << std::endl;
94+
// For all <title>'s and <description>'s, add all their parent <id>/<re>'s:
9495
for (pugi::xml_node child: def.child("header").children("title")) {
9596
const auto& msg = fromUtf8(xml_raw_cdata(child));
9697
const auto& lang = child.attribute("xml:lang").value();
9798
for (pugi::xml_node e: def.child("ids").children("e")) {
9899
// e_value assumes we only ever have one PCDATA element here:
99100
const auto& errtype = fromUtf8(e.attribute("id").value());
100-
// std::cerr << toUtf8(errtype) << std::endl;
101101
if(msgs[lang].first.count(errtype) != 0) {
102102
std::cerr << "divvun-suggest: WARNING: Duplicate titles for " << e.attribute("id").value() << std::endl;
103103
}
104-
msgs[lang].first[errtype] = msg;
104+
// Default to <title> as <description>, may be overridden below:
105+
msgs[lang].first[errtype] = make_pair(msg, msg);
105106
}
106107
for (pugi::xml_node re: def.child("ids").children("re")) {
107108
std::basic_regex<char> r(re.attribute("v").value());
108-
msgs[lang].second.push_back(std::make_pair(r, msg));
109+
msgs[lang].second.push_back(std::make_pair(r, make_pair(msg, msg)));
109110
}
110111
}
112+
for (pugi::xml_node child: def.child("body").children("description")) {
113+
const auto& msg = fromUtf8(xml_raw_cdata(child));
114+
const auto& lang = child.attribute("xml:lang").value();
115+
for (pugi::xml_node e: def.child("ids").children("e")) {
116+
const auto& errtype = fromUtf8(e.attribute("id").value());
117+
auto &langmsgs = msgs[lang].first;
118+
if (langmsgs.find(errtype) != langmsgs.end()) {
119+
langmsgs[errtype].second = msg;
120+
} else {
121+
// No <title> for this language, fallback to <description>:
122+
langmsgs[errtype] = std::make_pair(msg, msg);
123+
}
124+
}
125+
}
111126
}
127+
// <error>'s
112128
for (pugi::xml_node error: doc.child("errors").children("error")) {
129+
const auto& errtype = fromUtf8(error.attribute("id").value());
130+
// For all <title>'s and <description>'s, add the <error id> attribute:
113131
for (pugi::xml_node child: error.child("header").children("title")) {
114132
// child_value assumes we only ever have one PCDATA element here:
115-
const auto& errtype = fromUtf8(error.attribute("id").value());
116133
const auto& msg = fromUtf8(xml_raw_cdata(child));
117134
const auto& lang = child.attribute("xml:lang").value();
118-
if(msgs[lang].first.count(errtype) != 0) {
119-
std::cerr << "divvun-suggest: WARNING: Duplicate titles for " << error.attribute("id").value() << std::endl;
135+
auto& langmsgs = msgs[lang].first;
136+
if(langmsgs.count(errtype) != 0) {
137+
std::cerr << "divvun-suggest: WARNING: Duplicate <title>'s for " << error.attribute("id").value() << std::endl;
138+
}
139+
langmsgs[errtype] = make_pair(msg, msg);
140+
}
141+
for (pugi::xml_node child: error.child("body").children("description")) {
142+
const auto& msg = fromUtf8(xml_raw_cdata(child));
143+
const auto& lang = child.attribute("xml:lang").value();
144+
auto& langmsgs = msgs[lang].first;
145+
if(langmsgs.find(errtype) != langmsgs.end()) {
146+
langmsgs[errtype].second = msg;
147+
}
148+
else {
149+
// No <title> for this language, fallback to <description>:
150+
langmsgs[errtype] = std::make_pair(msg, msg);
120151
}
121-
msgs[lang].first[errtype] = msg;
122152
}
123153
}
124154
}
@@ -397,10 +427,10 @@ variant<Nothing, Err> Suggest::cohort_errs(const ErrId& err_id,
397427
if(cohort_empty(c) || c.added || ignores.find(err_id) != ignores.end()) {
398428
return Nothing();
399429
}
400-
u16string msg;
430+
Msg msg;
401431
for(const auto& mlang : sortedmsglangs) {
402-
if(msg.empty() && mlang != locale) {
403-
std::cerr << "divvun-suggest: WARNING: No message for " << json::str(err_id) << " in xml:lang '" << locale << "', trying '" << mlang << "'" << std::endl;
432+
if(msg.second.empty() && mlang != locale) {
433+
std::cerr << "divvun-suggest: WARNING: No <description> for " << json::str(err_id) << " in xml:lang '" << locale << "', trying '" << mlang << "'" << std::endl;
404434
}
405435
const auto& lmsgs = msgs.at(mlang);
406436
if(lmsgs.first.count(err_id) != 0) {
@@ -421,23 +451,28 @@ variant<Nothing, Err> Suggest::cohort_errs(const ErrId& err_id,
421451
}
422452
}
423453
}
424-
if(!msg.empty()) {
454+
if(!msg.second.empty()) {
425455
break;
426456
}
427457
}
428-
if(msg.empty()) {
429-
std::cerr << "divvun-suggest: WARNING: No message for " << json::str(err_id) << " in any xml:lang" << std::endl;
430-
msg = err_id;
458+
if(msg.second.empty()) {
459+
std::cerr << "divvun-suggest: WARNING: No <description> for " << json::str(err_id) << " in any xml:lang" << std::endl;
460+
msg.second = err_id;
461+
}
462+
if(msg.first.empty()) {
463+
msg.first = err_id;
431464
}
432465
// TODO: Make suitable structure on creating MsgMap instead?
433-
replaceAll(msg, u"$1", c.form);
466+
replaceAll(msg.first, u"$1", c.form);
467+
replaceAll(msg.second, u"$1", c.form);
434468
for(const auto& r: c.readings) {
435469
if((!r.errtype.empty()) && err_id != r.errtype) {
436470
continue;
437471
}
438472
rel_on_match(r.rels, MSG_TEMPLATE_REL, sentence,
439473
[&] (const string& relname, size_t i_t, const Cohort& trg) {
440-
replaceAll(msg, fromUtf8(relname.c_str()), trg.form);
474+
replaceAll(msg.first, fromUtf8(relname.c_str()), trg.form);
475+
replaceAll(msg.second, fromUtf8(relname.c_str()), trg.form);
441476
});
442477
}
443478
auto beg = c.pos;
@@ -770,8 +805,9 @@ RunState Suggest::run_json(std::istream& is, std::ostream& os)
770805
<< "," << std::to_string(e.beg)
771806
<< "," << std::to_string(e.end)
772807
<< "," << json::str(e.err)
773-
<< "," << json::str(e.msg)
808+
<< "," << json::str(e.msg.second)
774809
<< "," << json::str_arr(e.rep)
810+
<< "," << json::str(e.msg.first)
775811
<< "]";
776812
wantsep = true;
777813
}

test/checker/pipespec.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
<pref type="Punctuation" name="Tusenskilje">
1616
<option err-id="tusen-mellom">
1717
<label xml:lang="nn">Eg vil ha mellomrom mellom 000</label>
18+
<description xml:lang="nn">Det er lov med anten mellomrom eller punktum som skiljeteikn for tal over tusen.</description>
1819
</option>
1920
<option err-id="tusen-punktum">
2021
<label xml:lang="nn">Eg vil ha punktum mellom 000</label>

test/checker/schemas.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0"?>
2+
<locatingRules xmlns="http://thaiopensource.com/ns/locating-rules/1.0">
3+
4+
<!--
5+
Having this file in the same dir as your XML's
6+
lets Emacs show validation errors.
7+
More information at:
8+
https://www.gnu.org/software/emacs/manual/html_node/nxml-mode/Schema-locating-files.html#Schema-locating-files
9+
-->
10+
11+
<typeId id="errors" uri="../../src/errors.rnc"/>
12+
<documentElement localName="errors" typeId="errors"/>
13+
<uri pattern="errors.xml" typeId="errors"/>
14+
15+
<typeId id="pipespec" uri="../../src/pipespec.rnc"/>
16+
<documentElement localName="pipespec" typeId="pipespec"/>
17+
<uri pattern="pipespec.xml" typeId="pipespec"/>
18+
19+
</locatingRules>

0 commit comments

Comments
 (0)