Skip to content

Commit

Permalink
rolled back
Browse files Browse the repository at this point in the history
  • Loading branch information
hrshdhgd committed Jul 11, 2023
1 parent 45933a2 commit 7739a5f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 58 deletions.
46 changes: 15 additions & 31 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2654,12 +2654,6 @@ def similarity_pair(terms, predicates, autolabel: bool, output: TextIO, output_t
show_default=True,
help="Score used for summarization",
)
@click.option(
"--quick/--no-quick",
default=False,
show_default=True,
help="If set, results will be generated by Rust.",
)
@autolabel_option
@output_type_option
@click.argument("terms", nargs=-1)
Expand All @@ -2669,7 +2663,6 @@ def similarity(
set1_file,
set2_file,
autolabel: bool,
quick: bool,
min_jaccard_similarity: Optional[float],
min_ancestor_information_content: Optional[float],
main_score_field,
Expand Down Expand Up @@ -2759,30 +2752,21 @@ def similarity(
else:
set2it = query_terms_iterator(terms, impl)
actual_predicates = _process_predicates_arg(predicates)
if quick:
impl.all_by_all_pairwise_similarity_quick(
set1it,
set2it,
predicates=actual_predicates,
min_jaccard_similarity=min_jaccard_similarity,
min_ancestor_information_content=min_ancestor_information_content,
)
else:
for sim in impl.all_by_all_pairwise_similarity(
set1it,
set2it,
predicates=actual_predicates,
min_jaccard_similarity=min_jaccard_similarity,
min_ancestor_information_content=min_ancestor_information_content,
):
if autolabel:
# TODO: this can be made more efficient
sim.subject_label = impl.label(sim.subject_id)
sim.object_label = impl.label(sim.object_id)
sim.ancestor_label = impl.label(sim.ancestor_id)
writer.emit(sim)
writer.finish()
writer.file.close()
for sim in impl.all_by_all_pairwise_similarity(
set1it,
set2it,
predicates=actual_predicates,
min_jaccard_similarity=min_jaccard_similarity,
min_ancestor_information_content=min_ancestor_information_content,
):
if autolabel:
# TODO: this can be made more efficient
sim.subject_label = impl.label(sim.subject_id)
sim.object_label = impl.label(sim.object_id)
sim.ancestor_label = impl.label(sim.ancestor_id)
writer.emit(sim)
writer.finish()
writer.file.close()
else:
raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")

Expand Down
27 changes: 0 additions & 27 deletions src/oaklib/implementations/semsimian/semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,30 +165,3 @@ def all_by_all_pairwise_similarity(
sim.jaccard_similarity = 0
sim.ancestor_information_content = 0
yield sim

def all_by_all_pairwise_similarity_quick(
self,
subjects: Iterable[CURIE],
objects: Iterable[CURIE],
predicates: List[PRED_CURIE] = None,
min_jaccard_similarity: Optional[float] = None,
min_ancestor_information_content: Optional[float] = None,
) -> None:
"""
Compute similarity for all combinations of terms in subsets vs all terms in objects
:param subjects:
:param objects:
:param predicates:
:return:
"""
objects = list(objects)
logging.info(f"Calculating all-by-all pairwise similarity for {len(objects)} objects")
self.semsimian.all_by_all_pairwise_similarity_quick(
subject_terms=set(subjects),
object_terms=set(objects),
minimum_jaccard_threshold=min_jaccard_similarity,
minimum_resnik_threshold=min_ancestor_information_content,
predicates=set(predicates) if predicates else None,
outfile="rust_output.tsv",
)

0 comments on commit 7739a5f

Please sign in to comment.