From aece14fb272c940dd33e255622f2c267ffb45cf1 Mon Sep 17 00:00:00 2001 From: "Hartung, Michael" Date: Thu, 15 Aug 2024 13:08:01 +0200 Subject: [PATCH] test new setting for edge weight --- tasks/degree_centrality.py | 36 +++++++++++++++++++++++++++++++++++- tasks/util/edge_weights.py | 37 +++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/tasks/degree_centrality.py b/tasks/degree_centrality.py index ced0849..e79abb8 100755 --- a/tasks/degree_centrality.py +++ b/tasks/degree_centrality.py @@ -1,5 +1,6 @@ from tasks.util.read_graph_tool_graph import read_graph_tool_graph from tasks.util.scores_to_results import scores_to_results +from tasks.util.edge_weights import edge_weights from tasks.task_hook import TaskHook import graph_tool as gt import os.path @@ -61,9 +62,30 @@ def degree_centrality(task_hook: TaskHook): only_atc_l_drugs = task_hook.parameters.get("only_atc_l_drugs", False) filter_paths = task_hook.parameters.get("filter_paths", True) + + hub_penalty = task_hook.parameters.get("hub_penalty", 0.0) drug_target_action = task_hook.parameters.get("drug_target_action", None) + mutation_cancer_type = task_hook.parameters.get("mutation_cancer_type", None) + if mutation_cancer_type is not None: + mutation_cancer_type = models.MutationCancerType.objects.filter(name__iexact=mutation_cancer_type).first() + if mutation_cancer_type is None: + raise ValueError('Could not find tissue.') + + expression_cancer_type = task_hook.parameters.get("expression_cancer_type", None) + if expression_cancer_type is not None: + expression_cancer_type = models.ExpressionCancerType.objects.filter(name__iexact=expression_cancer_type).first() + if expression_cancer_type is None: + raise ValueError('Could not find tissue.') + + tissue = task_hook.parameters.get("tissue", None) + if tissue is not None: + tissue = models.Tissue.objects.filter(name__iexact=tissue).first() + if tissue is None: + raise ValueError('Could not find tissue.') + + available_drugs = task_hook.parameters.get("available_drugs", None) if available_drugs is not None: available_drugs = [drug.lower() for drug in available_drugs] @@ -93,6 +115,15 @@ def degree_centrality(task_hook: TaskHook): include_only_ctrpv2_drugs=include_only_ctrpv2_drugs ) + weights = edge_weights( + g, + hub_penalty, + mutation_cancer_type, + expression_cancer_type, + tissue, + inverse=True, + ) + # Set number of threads if OpenMP support is enabled. if gt.openmp_enabled(): gt.openmp_set_num_threads(num_threads) @@ -103,7 +134,10 @@ def degree_centrality(task_hook: TaskHook): for node in seed_graph_ids: for nb in g.get_all_neighbors(node): scores.a[nb] += 1 - + edge = g.edge(node, nb) + if edge: + scores[nb] += weights[edge] + # Compute and return the results. task_hook.set_progress(2 / 3.0, "Formating results.") task_hook.set_results( diff --git a/tasks/util/edge_weights.py b/tasks/util/edge_weights.py index 399d6a8..6dd1692 100755 --- a/tasks/util/edge_weights.py +++ b/tasks/util/edge_weights.py @@ -1,9 +1,8 @@ import graph_tool.stats as gts -import json SMALL_VALUE = 1*10**-10 -def _calc_score(g, e, label, graph_key): +def _calc_score(g, e, label, graph_key, average_weight): if label is not None: # consider label score source = 1 if g.vertex_properties["type"][int(e.source())] == 'Drug' else \ @@ -11,16 +10,17 @@ def _calc_score(g, e, label, graph_key): target = 1 if g.vertex_properties["type"][int(e.target())] == 'Drug' else \ g.vertex_properties[graph_key][int(e.target())][label.name] - # source + target is on average 1 since it is normalized - # score can be 0 if unknown - if source is None or source == 0.0: + if source is None: + print('here', source) source = SMALL_VALUE - if target is None or target == 0.0: + if target is None: + print('here', target) target = SMALL_VALUE else: # no score, set it do default values - source = 0.5 - target = 0.5 + print('average weight', average_weight) + source = average_weight + target = average_weight return source, target def _calc_hub_penalty(g, hub_penalty, avdeg, weights, inverse): @@ -66,17 +66,30 @@ def edge_weights(g, hub_penalty, mutation_cancer_type=None, expression_cancer_ty if hub_penalty < 0: hub_penalty = 0 - + + average_mutation_score = SMALL_VALUE + average_cancer_expression_score = SMALL_VALUE + average_tissue_expression_score = SMALL_VALUE + if mutation_cancer_type is not None: + average_mutation_score = sum(filter(None, [g.vertex_properties['mutation_scores'][e] + [mutation_cancer_type.name] for e in g.edges()])) / len(g.edges()) + if expression_cancer_type is not None: + average_cancer_expression_score = sum(filter(None, [g.vertex_properties['cancer_expression_scores'][e] + [expression_cancer_type.name] for e in g.edges()])) / len(g.edges()) + if tissue is not None: + average_tissue_expression_score = sum(filter(None, [g.vertex_properties['expression_scores'][e] + [tissue.name] for e in g.edges()])) / len(g.edges()) for e in g.edges(): edge_avdeg = float(e.source().out_degree() + e.target().out_degree()) / 2.0 # Mutation weights - mut_source, mut_target = _calc_score(g, e, mutation_cancer_type, 'mutation_scores') + mut_source, mut_target = _calc_score(g, e, mutation_cancer_type, 'mutation_scores', average_mutation_score) # Expression weights - expr_source, expr_target = _calc_score(g, e, expression_cancer_type, 'cancer_expression_scores') + expr_source, expr_target = _calc_score(g, e, expression_cancer_type, + 'cancer_expression_scores', average_cancer_expression_score) # Tissue Expression weights - texpr_source, texpr_target = _calc_score(g, e, tissue, 'expression_scores') + texpr_source, texpr_target = _calc_score(g, e, tissue, 'expression_scores', average_tissue_expression_score) penalized_weight = (1.0 / (mut_source + mut_target)) * (1.0 / (expr_source + expr_target)) * \ (1.0 / (texpr_source + texpr_target)) * ((1.0 - hub_penalty) * avdeg + hub_penalty * edge_avdeg)