Skip to content

Commit

Permalink
Added temp flag --score-only and termset_pairwise_similarity_temp
Browse files Browse the repository at this point in the history
  • Loading branch information
hrshdhgd committed Aug 9, 2023
1 parent cf69712 commit 37dac2b
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 10 deletions.
18 changes: 15 additions & 3 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2796,11 +2796,18 @@ def similarity(
@output_option
@output_type_option
@autolabel_option
@click.option(
"--score-only/--no-score-only",
default=False,
show_default=True,
help="If set, results will automatically have labels assigned [THIS IS TEMPORARY]",
)
@click.argument("terms", nargs=-1)
def termset_similarity(
terms,
predicates,
autolabel,
score_only, # TEMPORARY
output_type,
output: TextIO,
):
Expand Down Expand Up @@ -2833,9 +2840,14 @@ def termset_similarity(
logging.info(f"Set1={set1}")
logging.info(f"Set2={set2}")
actual_predicates = _process_predicates_arg(predicates)
sim = impl.termset_pairwise_similarity(
set1, set2, predicates=actual_predicates, labels=autolabel
)
if score_only:
sim = impl.termset_pairwise_similarity_temp(
set1, set2, predicates=actual_predicates, labels=autolabel
)
else:
sim = impl.termset_pairwise_similarity(
set1, set2, predicates=actual_predicates, labels=autolabel
)
writer.emit(sim)
writer.finish()

Expand Down
53 changes: 46 additions & 7 deletions src/oaklib/implementations/semsimian/semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from semsimian import Semsimian

from oaklib.datamodels.similarity import TermPairwiseSimilarity
from oaklib.datamodels.similarity import TermPairwiseSimilarity, TermSetPairwiseSimilarity
from oaklib.datamodels.vocabulary import OWL_THING
from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface
from oaklib.interfaces.obograph_interface import OboGraphInterface
Expand Down Expand Up @@ -63,9 +63,14 @@ def __post_init__(self):
for attr in vars(TermPairwiseSimilarity)
if not any(attr.startswith(s) for s in ["class_", "_"])
]
self.termset_pairwise_similarity_attributes = [
attr
for attr in vars(TermSetPairwiseSimilarity)
if not any(attr.startswith(s) for s in ["class_", "_"])
]

def _get_semsimian_object(
self, predicates: List[PRED_CURIE] = None, attributes: List[str] = None
self, predicates: List[PRED_CURIE] = None, attributes: List[str] = None, resource_path: str = None
) -> Semsimian:
"""
Get Semsimian object from "semsimian_object_cache" or add a new one.
Expand All @@ -81,7 +86,7 @@ def _get_semsimian_object(
include_entailed=True, predicates=predicates
)
]
self.semsimian_object_cache[predicates] = Semsimian(spo, attributes)
self.semsimian_object_cache[predicates] = Semsimian(spo, predicates, attributes, resource_path)

return self.semsimian_object_cache[predicates]

Expand Down Expand Up @@ -112,7 +117,7 @@ def pairwise_similarity(
predicates=predicates, attributes=self.term_pairwise_similarity_attributes
)

jaccard_val = semsimian.jaccard_similarity(subject, object, set(predicates))
jaccard_val = semsimian.jaccard_similarity(subject, object)

if math.isnan(jaccard_val):
return None
Expand All @@ -121,7 +126,7 @@ def pairwise_similarity(
return None

_, ancestor_information_content_val = semsimian.resnik_similarity(
subject, object, set(predicates)
subject, object
)

if math.isnan(ancestor_information_content_val):
Expand Down Expand Up @@ -173,12 +178,13 @@ def all_by_all_pairwise_similarity(
object_terms=set(objects),
minimum_jaccard_threshold=min_jaccard_similarity,
minimum_resnik_threshold=min_ancestor_information_content,
predicates=set(predicates) if predicates else None,
# predicates=set(predicates) if predicates else None,
)
logging.info("Post-processing results from semsimian")
for term1_key, values in all_results.items():
for term2_key, result in values.items():
jaccard, resnik, phenodigm_score, ancestor_set = result
# Remember the _ here is cosine_similarity which we do not use at the moment.
jaccard, resnik, phenodigm_score, _, ancestor_set = result
if len(ancestor_set) > 0:
sim = TermPairwiseSimilarity(
subject_id=term1_key,
Expand All @@ -197,3 +203,36 @@ def all_by_all_pairwise_similarity(
sim.jaccard_similarity = 0
sim.ancestor_information_content = 0
yield sim

def termset_pairwise_similarity_temp(
self,
subjects: List[CURIE],
objects: List[CURIE],
predicates: List[PRED_CURIE] = None,
labels=False,
) -> TermSetPairwiseSimilarity:
"""Return TermSetPairwiseSimilarity object.
:param subjects: List of subject nodes.
:param objects: List of object nodes.
:param predicates: List of predicates, defaults to None
:param labels: Boolean to get labels for all nodes from resource, defaults to False
:param score_only: Boolean to return just the average score [TEMPORARY], defaults to False
:return: TermSetPairwiseSimilarity object
"""
semsimian = self._get_semsimian_object(
predicates=predicates, attributes=self.termset_pairwise_similarity_attributes
)
sim = TermSetPairwiseSimilarity()
# average_score = semsimian.termset_comparison(
# subject_terms=set(subjects),
# object_terms=set(objects),
# )
average_score = semsimian.termset_comparison(
set(subjects),
set(objects),
)

sim.average_score = average_score

return sim

0 comments on commit 37dac2b

Please sign in to comment.