Skip to content

Commit

Permalink
formatted and added test
Browse files Browse the repository at this point in the history
  • Loading branch information
hrshdhgd committed Aug 9, 2023
1 parent 24b3294 commit 903b816
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 12 deletions.
16 changes: 10 additions & 6 deletions src/oaklib/implementations/semsimian/semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ def __post_init__(self):
if not any(attr.startswith(s) for s in ["class_", "_"])
]

def _get_semsimian_object(self, predicates: List[PRED_CURIE] = None) -> Semsimian:
def _get_semsimian_object(
self, predicates: List[PRED_CURIE] = None, attributes: List[str] = None
) -> Semsimian:
"""
Get Semsimian object from "semsimian_object_cache" or add a new one.
Expand All @@ -79,9 +81,7 @@ def _get_semsimian_object(self, predicates: List[PRED_CURIE] = None) -> Semsimia
include_entailed=True, predicates=predicates
)
]
self.semsimian_object_cache[predicates] = Semsimian(
spo, self.term_pairwise_similarity_attributes
)
self.semsimian_object_cache[predicates] = Semsimian(spo, attributes)

return self.semsimian_object_cache[predicates]

Expand All @@ -108,7 +108,9 @@ def pairwise_similarity(
:return:
"""
logging.debug(f"Calculating pairwise similarity for {subject} x {object} over {predicates}")
semsimian = self._get_semsimian_object(predicates=predicates)
semsimian = self._get_semsimian_object(
predicates=predicates, attributes=self.term_pairwise_similarity_attributes
)

jaccard_val = semsimian.jaccard_similarity(subject, object, set(predicates))

Expand Down Expand Up @@ -163,7 +165,9 @@ def all_by_all_pairwise_similarity(
"""
objects = list(objects)
logging.info(f"Calculating all-by-all pairwise similarity for {len(objects)} objects")
semsimian = self._get_semsimian_object(predicates=predicates)
semsimian = self._get_semsimian_object(
predicates=predicates, attributes=self.term_pairwise_similarity_attributes
)
all_results = semsimian.all_by_all_pairwise_similarity(
subject_terms=set(subjects),
object_terms=set(objects),
Expand Down
43 changes: 37 additions & 6 deletions tests/test_implementations/test_semsimian_implementation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import timeit
import unittest

from linkml_runtime.dumpers import yaml_dumper
Expand Down Expand Up @@ -45,6 +46,14 @@ def setUp(self) -> None:
self.other_oi = comparison_oi
self.db = db
self.compliance_tester = ComplianceTester(self)
self.subject_terms = {VACUOLE, NUCLEUS, NUCLEAR_MEMBRANE}
self.object_terms = {ENDOMEMBRANE_SYSTEM, HUMAN, FUNGI}
self.predicates = {IS_A, PART_OF}
self.term_pairwise_similarity_attributes = [
attr
for attr in vars(TermPairwiseSimilarity)
if not any(attr.startswith(s) for s in ["class_", "_"])
]

def test_definitions(self):
"""Definitions should be delegated to the wrapped adapter."""
Expand All @@ -65,7 +74,7 @@ def test_compare_pairwise_similarity(self):
debug = False
for s in entities:
for o in entities:
for preds in [[IS_A, PART_OF]]:
for preds in [self.predicates]:
sim = adapter.pairwise_similarity(s, o, predicates=preds)

original_sim = self.other_oi.pairwise_similarity(s, o, predicates=preds)
Expand Down Expand Up @@ -97,10 +106,9 @@ def test_compare_pairwise_similarity(self):
print(yaml_dumper.dumps(original_sim))

def test_all_by_all_pairwise_similarity(self):
subject_terms = {VACUOLE, NUCLEUS, NUCLEAR_MEMBRANE}
object_terms = {ENDOMEMBRANE_SYSTEM, HUMAN, FUNGI}
predicates = {IS_A, PART_OF}
result = self.oi.all_by_all_pairwise_similarity(subject_terms, object_terms, predicates)
result = self.oi.all_by_all_pairwise_similarity(
self.subject_terms, self.object_terms, self.predicates
)
sem_similarity_object: TermPairwiseSimilarity = [
x for x in result if x.subject_id == "GO:0031965" and x.object_id == "GO:0012505"
][0]
Expand All @@ -110,7 +118,7 @@ def test_all_by_all_pairwise_similarity(self):
self.assertEqual(sem_similarity_object.phenodigm_score, 1.672622556711612)

result2 = self.other_oi.all_by_all_pairwise_similarity(
subject_terms, object_terms, predicates
self.subject_terms, self.object_terms, self.predicates
)

sql_similarity_object: TermPairwiseSimilarity = [
Expand All @@ -128,3 +136,26 @@ def test_all_by_all_pairwise_similarity(self):
self.assertAlmostEqual(
sem_similarity_object.phenodigm_score, sql_similarity_object.phenodigm_score, places=2
)

def test_semsimian_object_cache(self):
start_time = timeit.default_timer()
_ = list(
self.oi.all_by_all_pairwise_similarity(
self.subject_terms, self.object_terms, self.predicates
)
)
end_time = timeit.default_timer()
time_taken_1 = end_time - start_time

shuffled_predicate = set(reversed(list(self.predicates)))
start_time = timeit.default_timer()
_ = list(
self.oi.all_by_all_pairwise_similarity(
self.subject_terms, self.object_terms, shuffled_predicate
)
)
end_time = timeit.default_timer()
time_taken_2 = end_time - start_time

self.assertEqual(len(self.oi.semsimian_object_cache), 1)
self.assertTrue(time_taken_1 > time_taken_2)

0 comments on commit 903b816

Please sign in to comment.