diff --git a/setup.cfg b/setup.cfg index af62f016..79c94d76 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,7 @@ python_requires = >=3.7, <3.11 install_requires = anaforatools~=1.1.0 datasets~=2.4.0 + en-core-sci-sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz ; platform_machine!='arm64' and platform_machine!='aarch64' fastapi~=0.88.0 filelock~=3.4.0 huggingface-hub~=0.9.0 @@ -36,7 +37,9 @@ install_requires = PyRuSH~=1.0.3.6 requests~=2.26.0 scikit-learn~=1.0.2 + scispacy==0.5.1; platform_machine!='arm64' and platform_machine!='aarch64' seqeval~=1.2.2 + spacy~=3.4.0 torch~=1.5 transformers~=4.15 uvicorn[standard]~=0.16.0 diff --git a/src/cnlpt/data/transform_prot.py b/src/cnlpt/data/transform_prot.py index 768a0b74..7e9873ec 100644 --- a/src/cnlpt/data/transform_prot.py +++ b/src/cnlpt/data/transform_prot.py @@ -12,7 +12,14 @@ TEST_DIR = "chemprot_development" TRAIN_DIR = "chemprot_training" -nlp = spacy.load("en_core_sci_sm") + +nlp = None +try: + import scispacy + nlp = spacy.load("en_core_sci_sm") +except ImportError: + print(f"scispacy not installed; cannot run transform_prot.") + sys.exit(1) def remove_newline(review): @@ -56,7 +63,10 @@ def get_intersect(ls1, ls2): def to_stanza_style_dict(text): - processed_doc = nlp(text) + if nlp is not None: + processed_doc = nlp(text) + else: + raise RuntimeError("nlp object not initialized but execution proceeded??") def sent_dict(spacy_sent): return [