diff --git a/cv_py/__init__.py b/cv_py/__init__.py index d8ea7044a8..b9aba0e337 100644 --- a/cv_py/__init__.py +++ b/cv_py/__init__.py @@ -1,5 +1,18 @@ +import scispacy + __title__ = "cv-py" + +__scispacy_version__ = scispacy.__version__ + __compatible__ = { "cord19_cdcs": "~0.2.3", + "en_core_sci_sm": __scispacy_version__, + "en_core_sci_md": __scispacy_version__, + "en_core_sci_lg": __scispacy_version__, + "en_ner_craft_md": __scispacy_version__, + "en_ner_jnlpba_md": __scispacy_version__, + "en_ner_bc5cdr_md": __scispacy_version__, + "en_ner_bionlp13cg_md": __scispacy_version__, + "en_core_sci_scibert": __scispacy_version__ } __release__ = True diff --git a/cv_py/resources/datapackage.py b/cv_py/resources/datapackage.py index d963b65456..4ef420e2ee 100644 --- a/cv_py/resources/datapackage.py +++ b/cv_py/resources/datapackage.py @@ -5,7 +5,7 @@ import os import subprocess import sys -from cv_py import __compatible__ +from cv_py import __compatible__, __scispacy_version__ import argparse import re from pathlib import Path @@ -15,7 +15,6 @@ import requests import semantic_version as sv - __all__ = ["load"] @@ -40,17 +39,9 @@ def get_filename(datapackage="cord19_cdcs"): f"https://github.com/{repo}/releases/download/v{v}/cord19-cdcs-{v}.tar.gz" ) return fname - elif datapackage in [ # Sci-spaCy - "en_core_sci_sm", - "en_core_sci_md", - "en_core_sci_lg", - "en_ner_craft_md", - "en_ner_jnlpba_md", - "en_ner_bc5cdr_md", - "en_ner_bionlp13cg_md", - ]: + else: fname = ( - f"https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/{datapackage}-0.2.4.tar.gz" + f"https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v{__scispacy_version__}/{datapackage}-{__scispacy_version__}.tar.gz" ) return fname diff --git a/pyproject.toml b/pyproject.toml index 83b43f99d3..6d4e3f9adc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,12 @@ pyarrow = "^3.0.0" scikit-learn = "^0.24.1" dask = {extras = ["complete"], version = "^2.13.0"} +#necessary to download the correct scispacy language models +scispacy = "^0.4.4" + # A list of optional dependencies, choosable by module ## Spacy ecosystem textacy = { version = "^0.10.0", optional = true} -scispacy = { version = "^0.2.4", optional = true} ## flair ecosystem flair = { version = "^0.4.5", optional = true}