diff --git a/CHANGELOG.md b/CHANGELOG.md index c933acd0..8affe248 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # VERSION HISTORY +- 1.4.13 (2020-07-30) + - Added WMT20 newstest test sets (#103) + - Make mecab3-python an extra dependency, adapt code to new mecab3-python + This fixes the recent Windows installation issues as well (#104) + Japanese support should now be explicitly installed through sacrebleu[ja] package. + - Fix return type annotation of corpus_bleu() + - Improve sentence_score's documentation, do not allow single ref string (#98) + - 1.4.12 (2020-07-03) - Fix a deployment bug (#96) diff --git a/README.md b/README.md index 8c3e6700..9a2c92b9 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,11 @@ Install the Python module (Python 3 only) pip3 install sacrebleu +In order to install Japanese tokenizer support through `mecab-python3`, you need to run the +following command instead, to perform a full installation with dependencies: + + pip3 install sacrebleu[ja] + Alternately, you can install from the source: python3 setup.py install diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py index cb8c2273..72590fe6 100644 --- a/sacrebleu/__init__.py +++ b/sacrebleu/__init__.py @@ -14,7 +14,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -__version__ = '1.4.12' +__version__ = '1.4.13' __description__ = 'Hassle-free computation of shareable, comparable, and reproducible BLEU scores' diff --git a/sacrebleu/dataset.py b/sacrebleu/dataset.py index 2b7e7d67..3660c9f5 100644 --- a/sacrebleu/dataset.py +++ b/sacrebleu/dataset.py @@ -22,6 +22,43 @@ # Many of these are *.sgm files, which are processed to produced plain text that can be used by this script. # The canonical location of unpacked, processed data is $SACREBLEU_DIR/$TEST/$SOURCE-$TARGET.{$SOURCE,$TARGET} DATASETS = { + "wmt20/tworefs": { + 'data': ['http://data.statmt.org/wmt20/translation-task/test.tgz'], + 'description': 'WMT20 news test sets with two references', + 'md5': ['3b1f777cfd2fb15ccf66e9bfdb2b1699'], + 'de-en': ['sgm/newstest2020-deen-src.de.sgm', 'sgm/newstest2020-deen-ref.en.sgm', 'sgm/newstestB2020-deen-ref.en.sgm'], + 'en-de': ['sgm/newstest2020-ende-src.en.sgm', 'sgm/newstest2020-ende-ref.de.sgm', 'sgm/newstestB2020-ende-ref.de.sgm'], + 'en-zh': ['sgm/newstest2020-enzh-src.en.sgm', 'sgm/newstest2020-enzh-ref.zh.sgm', 'sgm/newstestB2020-enzh-ref.zh.sgm'], + 'ru-en': ['sgm/newstest2020-ruen-src.ru.sgm', 'sgm/newstest2020-ruen-ref.en.sgm', 'sgm/newstestB2020-ruen-ref.en.sgm'], + 'zh-en': ['sgm/newstest2020-zhen-src.zh.sgm', 'sgm/newstest2020-zhen-ref.en.sgm', 'sgm/newstestB2020-zhen-ref.en.sgm'], + }, + "wmt20": { + 'data': ['http://data.statmt.org/wmt20/translation-task/test.tgz'], + 'description': 'Official evaluation data for WMT20', + 'md5': ['3b1f777cfd2fb15ccf66e9bfdb2b1699'], + 'cs-en': ['sgm/newstest2020-csen-src.cs.sgm', 'sgm/newstest2020-csen-ref.en.sgm'], + 'de-en': ['sgm/newstest2020-deen-src.de.sgm', 'sgm/newstest2020-deen-ref.en.sgm'], + 'de-fr': ['sgm/newstest2020-defr-src.de.sgm', 'sgm/newstest2020-defr-ref.fr.sgm'], + 'en-cs': ['sgm/newstest2020-encs-src.en.sgm', 'sgm/newstest2020-encs-ref.cs.sgm'], + 'en-de': ['sgm/newstest2020-ende-src.en.sgm', 'sgm/newstest2020-ende-ref.de.sgm'], + 'en-iu': ['sgm/newstest2020-eniu-src.en.sgm', 'sgm/newstest2020-eniu-ref.iu.sgm'], + 'en-ja': ['sgm/newstest2020-enja-src.en.sgm', 'sgm/newstest2020-enja-ref.ja.sgm'], + 'en-km': ['sgm/newstest2020-enkm-src.en.sgm', 'sgm/newstest2020-enkm-ref.km.sgm'], + 'en-pl': ['sgm/newstest2020-enpl-src.en.sgm', 'sgm/newstest2020-enpl-ref.pl.sgm'], + 'en-ps': ['sgm/newstest2020-enps-src.en.sgm', 'sgm/newstest2020-enps-ref.ps.sgm'], + 'en-ru': ['sgm/newstest2020-enru-src.en.sgm', 'sgm/newstest2020-enru-ref.ru.sgm'], + 'en-ta': ['sgm/newstest2020-enta-src.en.sgm', 'sgm/newstest2020-enta-ref.ta.sgm'], + 'en-zh': ['sgm/newstest2020-enzh-src.en.sgm', 'sgm/newstest2020-enzh-ref.zh.sgm'], + 'fr-de': ['sgm/newstest2020-frde-src.fr.sgm', 'sgm/newstest2020-frde-ref.de.sgm'], + 'iu-en': ['sgm/newstest2020-iuen-src.iu.sgm', 'sgm/newstest2020-iuen-ref.en.sgm'], + 'ja-en': ['sgm/newstest2020-jaen-src.ja.sgm', 'sgm/newstest2020-jaen-ref.en.sgm'], + 'km-en': ['sgm/newstest2020-kmen-src.km.sgm', 'sgm/newstest2020-kmen-ref.en.sgm'], + 'pl-en': ['sgm/newstest2020-plen-src.pl.sgm', 'sgm/newstest2020-plen-ref.en.sgm'], + 'ps-en': ['sgm/newstest2020-psen-src.ps.sgm', 'sgm/newstest2020-psen-ref.en.sgm'], + 'ru-en': ['sgm/newstest2020-ruen-src.ru.sgm', 'sgm/newstest2020-ruen-ref.en.sgm'], + 'ta-en': ['sgm/newstest2020-taen-src.ta.sgm', 'sgm/newstest2020-taen-ref.en.sgm'], + 'zh-en': ['sgm/newstest2020-zhen-src.zh.sgm', 'sgm/newstest2020-zhen-ref.en.sgm'], + }, 'mtnt2019': { 'data': ['http://www.cs.cmu.edu/~pmichel1/hosting/MTNT2019.tar.gz'], 'description': 'Test set for the WMT 19 robustness shared task', @@ -78,6 +115,31 @@ 'data': ['http://data.statmt.org/wmt19/translation-task/test.tgz'], 'description': 'Official evaluation data.', 'md5': ['84de7162d158e28403103b01aeefc39a'], + 'citation': r"""@proceedings{ws-2019-machine, + title = "Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)", + editor = "Bojar, Ond{\v{r}}ej and + Chatterjee, Rajen and + Federmann, Christian and + Fishel, Mark and + Graham, Yvette and + Haddow, Barry and + Huck, Matthias and + Yepes, Antonio Jimeno and + Koehn, Philipp and + Martins, Andr{\'e} and + Monz, Christof and + Negri, Matteo and + N{\'e}v{\'e}ol, Aur{\'e}lie and + Neves, Mariana and + Post, Matt and + Turchi, Marco and + Verspoor, Karin", + month = aug, + year = "2019", + address = "Florence, Italy", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/W19-5200", +}""", 'cs-de': ['sgm/newstest2019-csde-src.cs.sgm', 'sgm/newstest2019-csde-ref.de.sgm'], 'de-cs': ['sgm/newstest2019-decs-src.de.sgm', 'sgm/newstest2019-decs-ref.cs.sgm'], 'de-en': ['sgm/newstest2019-deen-src.de.sgm', 'sgm/newstest2019-deen-ref.en.sgm'],