diff --git a/arekit/contrib/source/sentinerel/entities.py b/arekit/contrib/source/sentinerel/entities.py index 50fc4784..1d37a57b 100644 --- a/arekit/contrib/source/sentinerel/entities.py +++ b/arekit/contrib/source/sentinerel/entities.py @@ -35,10 +35,6 @@ def __keep_entity(self, entity): @classmethod def read_collection(cls, filename, version, entities_to_ignore=None): - """ You may adopt the following list of entities to ignore by default. - entities_to_ignore = ["EFFECT_NEG", "EFFECT_POS", "ARGUMENT_NEG", "ARGUMENT_POS", "EVENT"] - We setup the latter as a part of the side-project and the related experiments. - """ assert(isinstance(filename, str)) # Since this dataset does not provide the synonyms collection by default, diff --git a/arekit/contrib/source/sentinerel/reader.py b/arekit/contrib/source/sentinerel/reader.py index 97f6606f..8a23f49d 100644 --- a/arekit/contrib/source/sentinerel/reader.py +++ b/arekit/contrib/source/sentinerel/reader.py @@ -19,7 +19,7 @@ def read_text_relations(filename, version): version=version) @staticmethod - def read_document(filename, doc_id): + def read_document(filename, doc_id, entities_to_ignore=None): assert(isinstance(filename, str)) assert(isinstance(doc_id, int)) @@ -27,7 +27,14 @@ def file_to_doc(input_file): sentences = BratDocumentSentencesReader.from_file(input_file=input_file, entities=entities) return BratNews(doc_id=doc_id, sentences=sentences, text_relations=text_relations) - entities = SentiNerelEntityCollection.read_collection(filename=filename, version=SentiNerelVersions.V1) + # TODO. #398 issue -- in some cases entities might be nested. Therefore we limit the set + # TODO. of the potential named entities. + eti = ["EFFECT_NEG", "EFFECT_POS", "ARGUMENT_NEG", "ARGUMENT_POS", "EVENT"] \ + if entities_to_ignore is None else entities_to_ignore + + entities = SentiNerelEntityCollection.read_collection( + filename=filename, version=SentiNerelVersions.V1, + entities_to_ignore=eti) text_relations = SentiNerelDocReader.read_text_relations(filename=filename, version=SentiNerelVersions.V1) return SentiNerelIOUtils.read_from_zip(