diff --git a/build_doc_source.sh b/build_doc_source.sh old mode 100644 new mode 100755 diff --git a/docs/cnlpt.api.hier_rest.rst b/docs/cnlpt.api.hier_rest.rst new file mode 100644 index 00000000..6cb487d4 --- /dev/null +++ b/docs/cnlpt.api.hier_rest.rst @@ -0,0 +1,6 @@ +cnlpt.api.hier\_rest module +=========================== + +.. automodule:: cnlpt.api.hier_rest + :members: + :show-inheritance: diff --git a/docs/cnlpt.api.rst b/docs/cnlpt.api.rst index 774d1e85..a5c263ad 100644 --- a/docs/cnlpt.api.rst +++ b/docs/cnlpt.api.rst @@ -10,6 +10,7 @@ Submodules cnlpt.api.cnlp_rest cnlpt.api.dtr_rest cnlpt.api.event_rest + cnlpt.api.hier_rest cnlpt.api.negation_rest cnlpt.api.temporal_rest cnlpt.api.timex_rest diff --git a/docs/cnlpt.cnlp_args.rst b/docs/cnlpt.cnlp_args.rst new file mode 100644 index 00000000..2b78ab71 --- /dev/null +++ b/docs/cnlpt.cnlp_args.rst @@ -0,0 +1,6 @@ +cnlpt.cnlp\_args module +======================= + +.. automodule:: cnlpt.cnlp_args + :members: + :show-inheritance: diff --git a/docs/cnlpt.cnlp_metrics.rst b/docs/cnlpt.cnlp_metrics.rst new file mode 100644 index 00000000..62a58e3c --- /dev/null +++ b/docs/cnlpt.cnlp_metrics.rst @@ -0,0 +1,6 @@ +cnlpt.cnlp\_metrics module +========================== + +.. automodule:: cnlpt.cnlp_metrics + :members: + :show-inheritance: diff --git a/docs/cnlpt.cnlp_predict.rst b/docs/cnlpt.cnlp_predict.rst new file mode 100644 index 00000000..fea49987 --- /dev/null +++ b/docs/cnlpt.cnlp_predict.rst @@ -0,0 +1,6 @@ +cnlpt.cnlp\_predict module +========================== + +.. automodule:: cnlpt.cnlp_predict + :members: + :show-inheritance: diff --git a/docs/cnlpt.data.rst b/docs/cnlpt.data.rst index 9fab0f3a..8edad245 100644 --- a/docs/cnlpt.data.rst +++ b/docs/cnlpt.data.rst @@ -8,6 +8,7 @@ Submodules :maxdepth: 4 cnlpt.data.preprocess_i2b2_2008 + cnlpt.data.transform_prot cnlpt.data.transform_uci_drug Module contents diff --git a/docs/cnlpt.data.transform_prot.rst b/docs/cnlpt.data.transform_prot.rst new file mode 100644 index 00000000..3fcb0193 --- /dev/null +++ b/docs/cnlpt.data.transform_prot.rst @@ -0,0 +1,6 @@ +cnlpt.data.transform\_prot module +================================= + +.. automodule:: cnlpt.data.transform_prot + :members: + :show-inheritance: diff --git a/docs/cnlpt.rst b/docs/cnlpt.rst index 05a73959..1f22d09f 100644 --- a/docs/cnlpt.rst +++ b/docs/cnlpt.rst @@ -19,7 +19,10 @@ Submodules cnlpt.BaselineModels cnlpt.CnlpModelForClassification cnlpt.HierarchicalTransformer + cnlpt.cnlp_args cnlpt.cnlp_data + cnlpt.cnlp_metrics + cnlpt.cnlp_predict cnlpt.cnlp_processors cnlpt.thyme_eval cnlpt.train_system diff --git a/docs/transformer_objects.inv b/docs/transformer_objects.inv index b69e283d..edb9ecfd 100644 Binary files a/docs/transformer_objects.inv and b/docs/transformer_objects.inv differ diff --git a/docs/transformer_objects.txt b/docs/transformer_objects.txt index 570cca3c..681b1a81 100644 --- a/docs/transformer_objects.txt +++ b/docs/transformer_objects.txt @@ -12,3 +12,5 @@ transformers.data.processors.utils.InputExample py:class 1 main_classes/processo transformers.tokenization_utils.PreTrainedTokenizer py:class 1 main_classes/tokenizer#transformers.PreTrainedTokenizer - transformers.PretrainedConfig py:class 1 main_classes/configuration#$ - transformers.AutoConfig py:class 1 model_doc/auto#$ - +transformers.BatchEncoding py:class 1 main_classes/tokenizer#$ - +transformers.tokenization_utils_base.BatchEncoding py:class 1 main_classes/tokenizer#transformers.BatchEncoding - diff --git a/src/cnlpt/cnlp_data.py b/src/cnlpt/cnlp_data.py index 842cee6c..0db96e9b 100644 --- a/src/cnlpt/cnlp_data.py +++ b/src/cnlpt/cnlp_data.py @@ -11,7 +11,6 @@ import torch from torch.utils.data.dataset import Dataset from transformers import BatchEncoding, InputExample -# from transformers.data.processors.utils import DataProcessor, InputExample from transformers.tokenization_utils import PreTrainedTokenizer from datasets import Features from dataclasses import dataclass, field, asdict, astuple @@ -121,7 +120,7 @@ def cnlp_convert_features_to_hierarchical( :param pad_id: the tokenizer's ID representing the PAD token :param insert_empty_chunk_at_beginning: whether to insert an empty chunk at the beginning of the instance - :return: an instance of `BatchEncoding` containing the chunked instance + :return: an instance of :class:`transformers.BatchEncoding` containing the chunked instance """ for ind in range(len(features['input_ids'])): @@ -740,7 +739,7 @@ def _reconcile_labels_lists(self, processor): new_labels = set(labels) old_labels = set(self.tasks_to_labels[task]) if new_labels.isdisjoint(old_labels): - raise Exception("The same task name has disjoint sets of labels in different dataset: %s vs. %s" % (str(str(old_labels), str(new_labels)))) + raise Exception("The same task name has disjoint sets of labels in different dataset: %s vs. %s" % (str(old_labels), str(new_labels))) elif new_labels != old_labels: logger.warn("Two different datasets have the same task name but not completely equal label lists: %s vs. %s. We will merge them." (str(old_labels), str(new_labels))) self.tasks_to_labels[task] = old_labels.union(new_labels)