From 7739a5fd0ab50269a9edb4fce4f547ec50341e7d Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 11 Jul 2023 17:53:10 -0500 Subject: [PATCH] rolled back --- src/oaklib/cli.py | 46 ++++++------------- .../semsimian/semsimian_implementation.py | 27 ----------- 2 files changed, 15 insertions(+), 58 deletions(-) diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py index 109c64ffa..4bd7d14f6 100644 --- a/src/oaklib/cli.py +++ b/src/oaklib/cli.py @@ -2654,12 +2654,6 @@ def similarity_pair(terms, predicates, autolabel: bool, output: TextIO, output_t show_default=True, help="Score used for summarization", ) -@click.option( - "--quick/--no-quick", - default=False, - show_default=True, - help="If set, results will be generated by Rust.", -) @autolabel_option @output_type_option @click.argument("terms", nargs=-1) @@ -2669,7 +2663,6 @@ def similarity( set1_file, set2_file, autolabel: bool, - quick: bool, min_jaccard_similarity: Optional[float], min_ancestor_information_content: Optional[float], main_score_field, @@ -2759,30 +2752,21 @@ def similarity( else: set2it = query_terms_iterator(terms, impl) actual_predicates = _process_predicates_arg(predicates) - if quick: - impl.all_by_all_pairwise_similarity_quick( - set1it, - set2it, - predicates=actual_predicates, - min_jaccard_similarity=min_jaccard_similarity, - min_ancestor_information_content=min_ancestor_information_content, - ) - else: - for sim in impl.all_by_all_pairwise_similarity( - set1it, - set2it, - predicates=actual_predicates, - min_jaccard_similarity=min_jaccard_similarity, - min_ancestor_information_content=min_ancestor_information_content, - ): - if autolabel: - # TODO: this can be made more efficient - sim.subject_label = impl.label(sim.subject_id) - sim.object_label = impl.label(sim.object_id) - sim.ancestor_label = impl.label(sim.ancestor_id) - writer.emit(sim) - writer.finish() - writer.file.close() + for sim in impl.all_by_all_pairwise_similarity( + set1it, + set2it, + predicates=actual_predicates, + min_jaccard_similarity=min_jaccard_similarity, + min_ancestor_information_content=min_ancestor_information_content, + ): + if autolabel: + # TODO: this can be made more efficient + sim.subject_label = impl.label(sim.subject_id) + sim.object_label = impl.label(sim.object_id) + sim.ancestor_label = impl.label(sim.ancestor_id) + writer.emit(sim) + writer.finish() + writer.file.close() else: raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}") diff --git a/src/oaklib/implementations/semsimian/semsimian_implementation.py b/src/oaklib/implementations/semsimian/semsimian_implementation.py index 9b2231e39..93d7cf181 100644 --- a/src/oaklib/implementations/semsimian/semsimian_implementation.py +++ b/src/oaklib/implementations/semsimian/semsimian_implementation.py @@ -165,30 +165,3 @@ def all_by_all_pairwise_similarity( sim.jaccard_similarity = 0 sim.ancestor_information_content = 0 yield sim - - def all_by_all_pairwise_similarity_quick( - self, - subjects: Iterable[CURIE], - objects: Iterable[CURIE], - predicates: List[PRED_CURIE] = None, - min_jaccard_similarity: Optional[float] = None, - min_ancestor_information_content: Optional[float] = None, - ) -> None: - """ - Compute similarity for all combinations of terms in subsets vs all terms in objects - - :param subjects: - :param objects: - :param predicates: - :return: - """ - objects = list(objects) - logging.info(f"Calculating all-by-all pairwise similarity for {len(objects)} objects") - self.semsimian.all_by_all_pairwise_similarity_quick( - subject_terms=set(subjects), - object_terms=set(objects), - minimum_jaccard_threshold=min_jaccard_similarity, - minimum_resnik_threshold=min_ancestor_information_content, - predicates=set(predicates) if predicates else None, - outfile="rust_output.tsv", - )