Skip to content

Commit

Permalink
Added wmt21/systems for system outputs (#214)
Browse files Browse the repository at this point in the history
  • Loading branch information
mjpost authored Oct 11, 2022
1 parent 37de171 commit e416ee2
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- (#213) Added JSON formatting for multi-system output (thanks to Manikanta Inugurthi @me-manikanta)
- (#211) You can now list all test sets for a language pair with `--list SRC-TRG`.
Thanks to Jaume Zaragoza (@ZJaume) for adding this feature.
- System outputs: Added `-t wmt21/systems` which will produce WMT system outputs

- 2.2.1 (2022-09-13)
Bugfix: Standard usage was returning (and using) each reference twice.
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,21 @@ $ sacrebleu -t wmt17 -l en-de --echo src > wmt17.en-de.en
$ cat wmt17.en-de.en | translate.sh | sacrebleu -t wmt17 -l en-de
```

Some test sets also have the outputs of systems that were submitted to the task.
For example, the `wmt/systems` test set.

```bash
$ sacrebleu -t wmt21/systems -l zh-en --echo NiuTrans
```

This provides a convenient way to score:

```bash
$ sacrebleu -t wmt21/system -l zh-en --echo NiuTrans | sacrebleu -t wmt21/systems -l zh-en
``

You can see a list of the available outputs by passing an invalid value to `--echo`.

### JSON output

As of version `>=2.0.0`, sacreBLEU prints the computed scores in JSON format to make parsing less painful:
Expand Down
29 changes: 29 additions & 0 deletions sacrebleu/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,35 @@

DATASETS = {
# wmt
"wmt21/systems": WMTXMLDataset(
"wmt21/systems",
data=["https://github.com/wmt-conference/wmt21-news-systems/archive/refs/tags/v1.3.tar.gz"],
description="WMT21 system output.",
md5=["a6aee4099da58f98f71eb3fac1694237"],
langpairs={
"de-fr": ["wmt21-news-systems-1.3/xml/newstest2021.de-fr.all.xml"],
"en-de": ["wmt21-news-systems-1.3/xml/newstest2021.en-de.all.xml"],
"en-ha": ["wmt21-news-systems-1.3/xml/newstest2021.en-ha.all.xml"],
"en-is": ["wmt21-news-systems-1.3/xml/newstest2021.en-is.all.xml"],
"en-ja": ["wmt21-news-systems-1.3/xml/newstest2021.en-ja.all.xml"],
"fr-de": ["wmt21-news-systems-1.3/xml/newstest2021.fr-de.all.xml"],
"ha-en": ["wmt21-news-systems-1.3/xml/newstest2021.ha-en.all.xml"],
"is-en": ["wmt21-news-systems-1.3/xml/newstest2021.is-en.all.xml"],
"ja-en": ["wmt21-news-systems-1.3/xml/newstest2021.ja-en.all.xml"],
"zh-en": ["wmt21-news-systems-1.3/xml/newstest2021.zh-en.all.xml"],
"en-zh": ["wmt21-news-systems-1.3/xml/newstest2021.en-zh.all.xml"],
"cs-en": ["wmt21-news-systems-1.3/xml/newstest2021.cs-en.all.xml"],
"de-en": ["wmt21-news-systems-1.3/xml/newstest2021.de-en.all.xml"],
"en-cs": ["wmt21-news-systems-1.3/xml/newstest2021.en-cs.all.xml"],
"en-ru": ["wmt21-news-systems-1.3/xml/newstest2021.en-ru.all.xml"],
"ru-en": ["wmt21-news-systems-1.3/xml/newstest2021.ru-en.all.xml"],
"bn-hi": ["wmt21-news-systems-1.3/xml/florestest2021.bn-hi.all.xml"],
"hi-bn": ["wmt21-news-systems-1.3/xml/florestest2021.hi-bn.all.xml"],
"xh-zu": ["wmt21-news-systems-1.3/xml/florestest2021.xh-zu.all.xml"],
"zu-xh": ["wmt21-news-systems-1.3/xml/florestest2021.zu-xh.all.xml"],
},
),

"wmt21": WMTXMLDataset(
"wmt21",
data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
Expand Down
16 changes: 14 additions & 2 deletions sacrebleu/dataset/wmt_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from ..utils import smart_open
from .base import Dataset

from collections import defaultdict


class WMTXMLDataset(Dataset):
"""
Expand Down Expand Up @@ -44,6 +46,7 @@ def _unwrap_wmt21_or_later(raw_file, allowed_refs=[]):
assert (
len(ref_langs) == 1
), f"Multiple reference languages found in the file: {raw_file}"

src = []
docids = []
orig_langs = []
Expand All @@ -56,6 +59,8 @@ def get_field_by_translator(translator):

refs = {get_field_by_translator(translator): [] for translator in translators}

systems = defaultdict(list)

src_sent_count, doc_count = 0, 0
for doc in tree.getroot().findall(".//doc"):
docid = doc.attrib["id"]
Expand All @@ -82,20 +87,27 @@ def get_sents(doc):
ref_doc.get("translator"): get_sents(ref_doc) for ref_doc in ref_docs
}

hyp_docs = doc.findall(".//hyp")
hyps = {
hyp_doc.get("system"): get_sents(hyp_doc) for hyp_doc in hyp_docs
}

for seg_id in sorted(src_sents.keys()):
# no ref translation is avaliable for this segment
# no ref translation is available for this segment
if not any([value.get(seg_id, "") for value in trans_to_ref.values()]):
continue
for translator in translators:
refs[get_field_by_translator(translator)].append(
trans_to_ref.get(translator, {translator: {}}).get(seg_id, "")
)
src.append(src_sents[seg_id])
for system_name in hyps.keys():
systems[system_name].append(hyps[system_name][seg_id])
docids.append(docid)
orig_langs.append(origlang)
src_sent_count += 1

return {"src": src, **refs, "docid": docids, "origlang": orig_langs,}
return {"src": src, **refs, "docid": docids, "origlang": orig_langs, **systems}

def process_to_text(self, langpair=None):
"""Processes raw files to plain text files.
Expand Down
2 changes: 1 addition & 1 deletion sacrebleu/sacrebleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def parse_args():
help='Use a subset of sentences whose document annotation matches a given regex (see SUBSETS in the source code).')
arg_parser.add_argument('--download', type=str, default=None,
help='Download a test set and quit.')
arg_parser.add_argument('--echo', nargs="*", type=str, default=None,
arg_parser.add_argument('--echo', nargs="+", type=str, default=None,
help='Output the source (src), reference (ref), or other available field (docid, ref:A, ref:1 for example) to STDOUT and quit. '
'You can get available fields with options `--list` and `-t`' 'For example: `sacrebleu -t wmt21 --list`. '
'If multiple fields are given, they are outputted with tsv format in the order they are given.'
Expand Down

0 comments on commit e416ee2

Please sign in to comment.