diff --git a/config/config.yaml b/config/config.yaml
index 8025fd83..d9d7cac4 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -51,15 +51,20 @@ diffexp:
# model for sleuth differential expression analysis
models:
model_X:
- full: ~condition + batch_effect
- reduced: ~batch_effect
+ full: ~condition_X + batch_effect_X
+ reduced: ~batch_effect_X
# Covariate / sample sheet column that shall be used for fold
# change/effect size based downstream analyses.
- primary_variable: condition
+ primary_variable: condition_X
# base level of the primary variable (this should be one of the entries
# in the primary_variable sample sheet column and will be considered as
# denominator in the fold change/effect size estimation).
- base_level: untreated
+ base_level: untreated_X
+ model_Y:
+ full: ~condition_Y + batch_effect_Y
+ reduced: ~batch_effect_Y
+ primary_variable: condition_Y
+ base_level: untreated_Y
# significance level to use for volcano, ma- and qq-plots
sig-level:
volcano-plot: 0.05
@@ -105,6 +110,21 @@ enrichment:
# the species specified by resources -> ref -> species above
pathway_database: "reactome"
+meta_comparisons:
+ # comparison is only run if set to `true`
+ activate: false
+ # Define any name for comparison
+ model_X_vs_model_Y:
+ items:
+ # Define the two underlying models for the comparison. The models must be defined in the diffexp/models in the config
+ - label: X # Arbitrary label
+ model: model_X # Must match a diffexp model from config
+ - label: Y # Arbitrary label
+ model: model_Y # Must match a diffexp model from config
+ desc: |
+ Comparison between model_X and model_Y effect.
+ label: model_X vs. model_Y effect
+
bootstrap_plots:
# desired false discovery rate for bootstrap plots, i.e. a lower FDR will result in fewer boxplots generated
FDR: 0.01
diff --git a/workflow/envs/pystats.yaml b/workflow/envs/pystats.yaml
new file mode 100644
index 00000000..df4b5c89
--- /dev/null
+++ b/workflow/envs/pystats.yaml
@@ -0,0 +1,16 @@
+channels:
+ - conda-forge
+ - nodefaults
+dependencies:
+ - polars =0.20.28
+ - pyreadr =0.5
+ - altair =5.2
+ - pyarrow =16.1
+ - vegafusion =1.6
+ - vegafusion-python-embed =1.6
+ - vl-convert-python =1.2
+ - jupyter_core =5.7
+ - ipykernel =6.29
+ - nbconvert =7.14
+ - notebook =7.0
+ - jupyterlab_code_formatter =1.4
\ No newline at end of file
diff --git a/workflow/report/meta_comparison_diffexp.rst b/workflow/report/meta_comparison_diffexp.rst
new file mode 100644
index 00000000..0fc01778
--- /dev/null
+++ b/workflow/report/meta_comparison_diffexp.rst
@@ -0,0 +1,5 @@
+{{ snakemake.params.desc }}
+Each point represents a gene, x axis shows the {{ snakemake.params.labels[0] }} effect, y-axis the {{ snakemake.params.labels[1] }} effect (both as log2 fold change).
+The color encodes the corresponding q-value.
+By clicking on points, their label can be displayed.
+Holding the Shift key allows to select or deselect labels for multiple genes.
\ No newline at end of file
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index c1b9d62f..76f1524c 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -396,4 +396,17 @@ def all_input(wildcards):
ind_transcripts=config["experiment"]["3-prime-rna-seq"]["plot-qc"],
)
)
+
+ # meta comparisons
+ if config["meta_comparisons"]["activate"]:
+ wanted_input.extend(
+ directory(
+ expand(
+ "results/datavzrd-reports/{report_type}_meta_comparison_{meta_comp}",
+ report_type=["go_terms", "diffexp", "pathways"],
+ meta_comp=lookup(dpath="meta_comparisons", within=config),
+ )
+ ),
+ )
+
return wanted_input
diff --git a/workflow/rules/meta_comparisons.smk b/workflow/rules/meta_comparisons.smk
new file mode 100644
index 00000000..a6fa26a5
--- /dev/null
+++ b/workflow/rules/meta_comparisons.smk
@@ -0,0 +1,70 @@
+rule meta_compare_diffex:
+ input:
+ expand(
+ "results/sleuth/diffexp/{model}.genes-representative.diffexp.rds",
+ model=lookup(
+ dpath="meta_comparisons/{meta_comp}/items/*/model", within=config
+ ),
+ ),
+ output:
+ "results/tables/diffexp/meta_compare_{meta_comp}.tsv",
+ "results/meta_comparison/diffexp/{meta_comp}.json",
+ log:
+ notebook="logs/meta_compare_diffexp/{meta_comp}.ipynb",
+ params:
+ desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config),
+ labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config),
+ conda:
+ "../envs/pystats.yaml"
+ notebook:
+ "../scripts/compare_diffexp.py.ipynb"
+
+
+rule meta_compare_enrichment:
+ input:
+ expand(
+ "results/tables/go_terms/{model}.go_term_enrichment.gene_fdr_{gene_fdr}.go_term_sig_study_fdr_{go_term_fdr}.tsv",
+ model=lookup(
+ dpath="meta_comparisons/{meta_comp}/items/*/model", within=config
+ ),
+ gene_fdr=str(config["enrichment"]["goatools"]["fdr_genes"]).replace(
+ ".", "-"
+ ),
+ go_term_fdr=str(config["enrichment"]["goatools"]["fdr_go_terms"]).replace(
+ ".", "-"
+ ),
+ ),
+ output:
+ "results/tables/go_terms/meta_compare_{meta_comp}.tsv",
+ "results/meta_comparison/go_terms/{meta_comp}.json",
+ log:
+ notebook="logs/meta_compare_enrichment/{meta_comp}.ipynb",
+ params:
+ desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config),
+ labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config),
+ conda:
+ "../envs/pystats.yaml"
+ notebook:
+ "../scripts/compare_enrichment.py.ipynb"
+
+
+rule meta_compare_pathways:
+ input:
+ expand(
+ "results/tables/pathways/{model}.pathways.tsv",
+ model=lookup(
+ dpath="meta_comparisons/{meta_comp}/items/*/model", within=config
+ ),
+ ),
+ output:
+ "results/tables/pathways/meta_compare_{meta_comp}.tsv",
+ "results/meta_comparison/pathways/{meta_comp}.json",
+ log:
+ notebook="logs/meta_compare_pathways/{meta_comp}.ipynb",
+ params:
+ desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config),
+ labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config),
+ conda:
+ "../envs/pystats.yaml"
+ notebook:
+ "../scripts/compare_pathways.py.ipynb"
diff --git a/workflow/scripts/compare_diffexp.py.ipynb b/workflow/scripts/compare_diffexp.py.ipynb
new file mode 100644
index 00000000..86328e12
--- /dev/null
+++ b/workflow/scripts/compare_diffexp.py.ipynb
@@ -0,0 +1,324 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "metadata": {}
+ },
+ "outputs": [],
+ "source": [
+ "import pyreadr\n",
+ "import polars as pl\n",
+ "import polars.selectors as cs\n",
+ "\n",
+ "diffexp_x = pl.from_pandas(pyreadr.read_r(snakemake.input[0])[None]).lazy()\n",
+ "diffexp_y = pl.from_pandas(pyreadr.read_r(snakemake.input[1])[None]).lazy()\n",
+ "label_x = snakemake.params.labels[0]\n",
+ "label_y = snakemake.params.labels[1]\n",
+ "\n",
+ "# diffexp_x = pl.from_pandas(pyreadr.read_r(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_mgl_in_wt_vs_etho_in_wt_liver.genes-representative.diffexp.rds\")[None]).lazy()\n",
+ "# diffexp_y = pl.from_pandas(pyreadr.read_r(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_t3_in_wt_vs_etoh_in_wt_liver.genes-representative.diffexp.rds\")[None]).lazy()\n",
+ "# label_x = \"mgl\"\n",
+ "# label_y = \"t3\"\n",
+ "\n",
+ "\n",
+ "effect_x = f\"effect {label_x} (beta score)\"\n",
+ "effect_y = f\"effect {label_y} (beta score)\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "metadata": {}
+ },
+ "outputs": [],
+ "source": [
+ "def prepare(df):\n",
+ " return df.select(\n",
+ " cs.by_name(\"target_id\", \"ext_gene\", \"pval\", \"qval\"),\n",
+ " cs.starts_with(\"b_\").alias(\"beta\")\n",
+ " ).select(\n",
+ " cs.by_index(range(0, 5)) # only keep first b_ column\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "combined = prepare(diffexp_x).join(\n",
+ " prepare(diffexp_y), on=[\"target_id\", \"ext_gene\"], suffix=\"_y\"\n",
+ ").with_columns(\n",
+ " pl.min_horizontal(\"qval\", \"qval_y\").alias(\"qval_min\"),\n",
+ ").filter(\n",
+ " pl.col(\"qval_min\") <= 0.05\n",
+ ").rename(\n",
+ " {\n",
+ " \"beta\": effect_x,\n",
+ " \"beta_y\": effect_y,\n",
+ " \"qval_min\": \"min q-value\",\n",
+ " }\n",
+ ").collect()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "effects = combined.select(pl.col(effect_x, effect_y))\n",
+ "min_value = effects.min().min_horizontal()[0]\n",
+ "max_value = effects.max().max_horizontal()[0]\n",
+ "combined = combined.with_columns(\n",
+ " abs(pl.col(effect_x) - pl.col(effect_y)).alias(\"difference\")\n",
+ ")\n",
+ "combined_sorted = combined.sort(\"difference\", descending=True)\n",
+ "combined_pd = combined_sorted.select(\n",
+ " pl.col(\"ext_gene\", \"min q-value\", effect_x, effect_y)\n",
+ ").to_pandas()\n",
+ "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n",
+ "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.txt\", sep=\"\\t\", index=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
shape: (5_073, 10)target_id | ext_gene | pval | qval | effect mgl (beta score) | pval_y | qval_y | effect t3 (beta score) | min q-value | difference |
---|
str | str | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
"ENSMUST00000029670.7" | "Ptgfr" | null | null | null | 5.8900e-8 | 0.0000174 | 6.63 | 0.0000174 | null |
"ENSMUST00000062307.5" | "Phf11a" | null | null | null | 9.9300e-7 | 0.000106 | 4.59 | 0.000106 | null |
"ENSMUST00000150649.9" | "Ifi213" | null | null | null | 0.0000104 | 0.000586 | 4.28 | 0.000586 | null |
"ENSMUST00000033617.13" | "Btk" | null | null | null | 0.0000138 | 0.000733 | 3.97 | 0.000733 | null |
"ENSMUST00000005950.12" | "Mmp12" | null | null | null | 0.0000152 | 0.000793 | 4.55 | 0.000793 | null |
… | … | … | … | … | … | … | … | … | … |
"ENSMUST00000034860.5" | "Cyp1a2" | 3.0000e-11 | 9.2300e-9 | -1.94 | 0.0000018 | 0.000171 | -1.94 | 9.2300e-9 | 0.0 |
"ENSMUST00000093485.3" | "Ddx60" | 0.000008 | 0.000121 | 2.81 | 0.0000504 | 0.00196 | 2.81 | 0.000121 | 0.0 |
"ENSMUST00000020301.14" | "Vsir" | 0.000796 | 0.00445 | 1.3 | 0.00449 | 0.0473 | 1.3 | 0.00445 | 0.0 |
"ENSMUST00000040336.12" | "Slc22a23" | 0.014 | 0.0358 | -0.465 | 0.0177 | 0.112 | -0.465 | 0.0358 | 0.0 |
"ENSMUST00000226516.2" | "Vmn1r73" | 0.00702 | 0.0221 | 2.11 | 0.0181 | 0.114 | 2.11 | 0.0221 | 0.0 |
"
+ ],
+ "text/plain": [
+ "shape: (5_073, 10)\n",
+ "┌───────────┬──────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
+ "│ target_id ┆ ext_gene ┆ pval ┆ qval ┆ … ┆ qval_y ┆ effect t3 ┆ min ┆ differenc │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ (beta ┆ q-value ┆ e │\n",
+ "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ f64 ┆ score) ┆ --- ┆ --- │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ f64 │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ │\n",
+ "╞═══════════╪══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
+ "│ ENSMUST00 ┆ Ptgfr ┆ null ┆ null ┆ … ┆ 0.0000174 ┆ 6.63 ┆ 0.0000174 ┆ null │\n",
+ "│ 000029670 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .7 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Phf11a ┆ null ┆ null ┆ … ┆ 0.000106 ┆ 4.59 ┆ 0.000106 ┆ null │\n",
+ "│ 000062307 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Ifi213 ┆ null ┆ null ┆ … ┆ 0.000586 ┆ 4.28 ┆ 0.000586 ┆ null │\n",
+ "│ 000150649 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .9 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Btk ┆ null ┆ null ┆ … ┆ 0.000733 ┆ 3.97 ┆ 0.000733 ┆ null │\n",
+ "│ 000033617 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .13 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Mmp12 ┆ null ┆ null ┆ … ┆ 0.000793 ┆ 4.55 ┆ 0.000793 ┆ null │\n",
+ "│ 000005950 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .12 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ ENSMUST00 ┆ Cyp1a2 ┆ 3.0000e-1 ┆ 9.2300e-9 ┆ … ┆ 0.000171 ┆ -1.94 ┆ 9.2300e-9 ┆ 0.0 │\n",
+ "│ 000034860 ┆ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Ddx60 ┆ 0.000008 ┆ 0.000121 ┆ … ┆ 0.00196 ┆ 2.81 ┆ 0.000121 ┆ 0.0 │\n",
+ "│ 000093485 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .3 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Vsir ┆ 0.000796 ┆ 0.00445 ┆ … ┆ 0.0473 ┆ 1.3 ┆ 0.00445 ┆ 0.0 │\n",
+ "│ 000020301 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .14 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Slc22a23 ┆ 0.014 ┆ 0.0358 ┆ … ┆ 0.112 ┆ -0.465 ┆ 0.0358 ┆ 0.0 │\n",
+ "│ 000040336 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .12 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ENSMUST00 ┆ Vmn1r73 ┆ 0.00702 ┆ 0.0221 ┆ … ┆ 0.114 ┆ 2.11 ┆ 0.0221 ┆ 0.0 │\n",
+ "│ 000226516 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ .2 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "└───────────┴──────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined_sorted\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.LayerChart(...)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import altair as alt\n",
+ "import sys\n",
+ "# we cannot use vegafusion here because it makes the point selection impossible since\n",
+ "# it prunes the required ext_gene column\n",
+ "#alt.data_transformers.enable(\"vegafusion\")\n",
+ "alt.data_transformers.disable_max_rows()\n",
+ "\n",
+ "point_selector = alt.selection_point(fields=[\"ext_gene\"], empty=False)\n",
+ "\n",
+ "points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n",
+ " alt.X(effect_x),\n",
+ " alt.Y(effect_y),\n",
+ " alt.Color(\"min q-value\", scale=alt.Scale(scheme=\"viridis\")),\n",
+ " opacity=alt.value(0.5),\n",
+ ")\n",
+ "\n",
+ "line = alt.Chart(\n",
+ " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n",
+ ").mark_line(color=\"lightgrey\").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " strokeDash=alt.value([5, 5]),\n",
+ ")\n",
+ "\n",
+ "text_background = alt.Chart(combined_pd).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5,\n",
+ " fill='white',\n",
+ " stroke='white',\n",
+ " strokeWidth=5,\n",
+ " ).encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"ext_gene\", alt.value(\"\")),\n",
+ " )\n",
+ "\n",
+ "text = alt.Chart(combined_pd).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5,\n",
+ ").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"ext_gene\", alt.value(\"\")),\n",
+ ")\n",
+ "\n",
+ "\n",
+ "chart = alt.layer(line, points, text_background, text).add_params(\n",
+ " point_selector\n",
+ ").interactive()\n",
+ "\n",
+ "display(chart)\n",
+ "\n",
+ "# chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.html\", inline=True)\n",
+ "\n",
+ "chart.save(snakemake.output[1], inline=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "pystats",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/workflow/scripts/compare_enrichment.py.ipynb b/workflow/scripts/compare_enrichment.py.ipynb
new file mode 100644
index 00000000..27fe9a70
--- /dev/null
+++ b/workflow/scripts/compare_enrichment.py.ipynb
@@ -0,0 +1,402 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pyreadr\n",
+ "import polars as pl\n",
+ "import polars.selectors as cs\n",
+ "\n",
+ "diffexp_x = pl.read_csv(snakemake.input[0], separator=\"\\t\").lazy()\n",
+ "diffexp_y = pl.read_csv(snakemake.input[1], separator=\"\\t\").lazy()\n",
+ "label_x = snakemake.params.labels[0]\n",
+ "label_y = snakemake.params.labels[1]\n",
+ "\n",
+ "# diffexp_x = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/etoh_mgl_in_wt_vs_etho_in_wt_liver.go_term_enrichment.gene_fdr_0-05.go_term_sig_study_fdr0-05.tsv\", separator=\"\\t\").lazy()\n",
+ "# diffexp_y = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/etoh_t3_in_wt_vs_etoh_in_wt_liver.go_term_enrichment.gene_fdr_0-05.go_term_sig_study_fdr0-05.tsv\", separator=\"\\t\").lazy()\n",
+ "# label_x = \"mgl\"\n",
+ "# label_y = \"t3\"\n",
+ "\n",
+ "effect_x_pos = f\"positive effect {label_x}\"\n",
+ "effect_y_pos = f\"positive effect {label_y}\"\n",
+ "effect_x_neg = f\"negative effect {label_x}\"\n",
+ "effect_y_neg = f\"negative effect {label_y}\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def extract_study_items(value):\n",
+ " if value and value.strip() != \"\":\n",
+ " gene_values = value.split(', ')\n",
+ " data = []\n",
+ " for gene_value in gene_values:\n",
+ " parts = gene_value.split(':')\n",
+ " gene = parts[0]\n",
+ " val = float(parts[1])\n",
+ " data.append({\"gene\": gene, \"value\": val})\n",
+ " return data\n",
+ " else:\n",
+ " return []\n",
+ "\n",
+ "def calculate_sums(parsed_terms):\n",
+ " positive_sum = sum(item['value'] for item in parsed_terms if item['value'] > 0)\n",
+ " negative_sum = sum(item['value'] for item in parsed_terms if item['value'] < 0)\n",
+ " return positive_sum, negative_sum"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare(df):\n",
+ " # Select necessary columns\n",
+ " df = df.select([\n",
+ " cs.by_name(\"GO\", \"term\", \"p_uncorrected\", \"p_fdr_bh\", \"study_items_sig_terms\")\n",
+ " ])\n",
+ " \n",
+ " # map_elements functions to columns using with_columns\n",
+ " df = df.with_columns([\n",
+ " pl.col(\"study_items_sig_terms\").map_elements(extract_study_items).alias(\"parsed_terms\")])\n",
+ " df = df.with_columns([\n",
+ " pl.col(\"parsed_terms\").map_elements(lambda x: calculate_sums(x)[0]).alias(\"cumulative_b_scores_positive\"),\n",
+ " pl.col(\"parsed_terms\").map_elements(lambda x: calculate_sums(x)[1]).alias(\"cumulative_b_scores_negative\")\n",
+ " ])\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n",
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n",
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n",
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n",
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n",
+ "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prepared_diffexp_x = prepare(diffexp_x)\n",
+ "prepared_diffexp_y = prepare(diffexp_y)\n",
+ "combined = prepared_diffexp_x.join(\n",
+ " prepared_diffexp_y, on=[\"GO\", \"term\"], suffix=\"_y\"\n",
+ ").with_columns(\n",
+ " pl.min_horizontal(\"p_fdr_bh\", \"p_fdr_bh_y\").alias(\"p_fdr_bh_min\")\n",
+ ").filter(\n",
+ " pl.col(\"p_fdr_bh_min\") <= 0.05\n",
+ ").rename(\n",
+ " {\n",
+ " \"cumulative_b_scores_positive\": effect_x_pos,\n",
+ " \"cumulative_b_scores_positive_y\": effect_y_pos,\n",
+ " \"cumulative_b_scores_negative\": effect_x_neg,\n",
+ " \"cumulative_b_scores_negative_y\": effect_y_neg,\n",
+ " \"p_fdr_bh_min\": \"min p_fdr_bh\",\n",
+ " }\n",
+ ").collect()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (353, 15)GO | term | p_uncorrected | p_fdr_bh | study_items_sig_terms | parsed_terms | positive effect mgl | negative effect mgl | p_uncorrected_y | p_fdr_bh_y | study_items_sig_terms_y | parsed_terms_y | positive effect t3 | negative effect t3 | min p_fdr_bh |
---|
str | str | f64 | f64 | str | list[struct[2]] | f64 | f64 | f64 | f64 | str | list[struct[2]] | f64 | f64 | f64 |
"GO:0006629" | "lipid metabolic process" | 2.7545e-61 | 3.4109e-57 | "Fah:0.508, Dhcr24:1.27, Lypla1… | [{"Fah",0.508}, {"Dhcr24",1.27}, … {"Cyp2c38",2.2}] | 184.10933 | -135.15124 | 5.5125e-38 | 3.4131e-34 | "Scp2:-0.752, Ces1g:-0.8, Plbd1… | [{"Scp2",-0.752}, {"Ces1g",-0.8}, … {"Neu1",0.628}] | 58.609 | -66.752 | 3.4109e-57 |
"GO:0007608" | "sensory perception of smell" | 6.1322e-42 | 1.8984e-38 | "Or2ag20:1.69, Or5d18:1.82, Or4… | [{"Or2ag20",1.69}, {"Or5d18",1.82}, … {"Or1j15",0.91}] | 24.873 | -15.23 | 3.8951e-11 | 2.8372e-8 | "Or1e32:2.9, Or4f14b:1.1, B2m:0… | [{"Or1e32",2.9}, {"Or4f14b",1.1}, … {"Or10ag59",2.27}] | 6.888 | 0.0 | 1.8984e-38 |
"GO:0006631" | "fatty acid metabolic process" | 7.0720e-37 | 1.7514e-33 | "Lypla1:-0.484, Acads:1.13, C3:… | [{"Lypla1",-0.484}, {"Acads",1.13}, … {"Pecr",0.701}] | 60.493 | -50.0166 | 6.0084e-17 | 9.3002e-14 | "Acsl1:-0.815, Fads1:0.544, Lpi… | [{"Acsl1",-0.815}, {"Fads1",0.544}, … {"Fads2",1.26}] | 17.256 | -32.78 | 1.7514e-33 |
"GO:0007186" | "G protein-coupled receptor sig… | 2.8894e-32 | 3.9755e-29 | "Or2ag20:1.69, Apoe:0.842000000… | [{"Or2ag20",1.69}, {"Apoe",0.842}, … {"Apoa1",1.44}] | 70.384 | -40.467 | 0.000194 | 0.013482 | "Ccl5:3.04, Adgra3:-1.11, Ccl9:… | [{"Ccl5",3.04}, {"Adgra3",-1.11}, … {"Cxcl10",4.18}] | 50.685 | -11.693 | 3.9755e-29 |
"GO:0002376" | "immune system process" | 1.4476e-30 | 1.7925e-27 | "Hmgb2:-1.74, Cadm1:1.45, Serpi… | [{"Hmgb2",-1.74}, {"Cadm1",1.45}, … {"Rsad2",3.72}] | 182.7968 | -54.5817 | 8.6964e-46 | 1.0769e-41 | "Iigp1:1.23, Themis2:3.46, C1ra… | [{"Iigp1",1.23}, {"Themis2",3.46}, … {"Slc15a3",1.79}] | 162.701 | -15.033 | 1.0769e-41 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
"GO:0004029" | "aldehyde dehydrogenase (NAD+) … | 0.002638 | 0.034555 | "Aldh1a1:-0.447, Aldh1a7:0.728,… | [{"Aldh1a1",-0.447}, {"Aldh1a7",0.728}, … {"Aldh7a1",-0.936}] | 1.8012 | -1.383 | 0.000093 | 0.005015 | "Aldh7a1:-0.81, Aldh2:-0.586, A… | [{"Aldh7a1",-0.81}, {"Aldh2",-0.586}, … {"Aldh1a1",-0.588}] | 0.0 | -3.715 | 0.005015 |
"GO:0008391" | "arachidonic acid monooxygenase… | 0.002638 | 0.034555 | "Cyp4a10:0.716, Cyp2d22:-0.0859… | [{"Cyp4a10",0.716}, {"Cyp2d22",-0.0859}, … {"Cyp4a31",2.49}] | 3.206 | -12.5359 | 2.4159e-7 | 0.000027 | "Cyp2d22:-0.741, Cyp4a14:1.88, … | [{"Cyp2d22",-0.741}, {"Cyp4a14",1.88}, … {"Cyp4a10",0.793}] | 5.633 | -14.391 | 0.000027 |
"GO:0008047" | "enzyme activator activity" | 0.002749 | 0.035898 | "Krtcap2:0.353, Timp2:0.6559999… | [{"Krtcap2",0.353}, {"Timp2",0.656}, … {"Dtx3l",0.897}] | 5.568 | -3.548 | 0.000328 | 0.013481 | "Apobec1:2.0, Arl1:0.721, Cd44:… | [{"Apobec1",2.0}, {"Arl1",0.721}, … {"Alox5ap",1.56}] | 7.785 | 0.0 | 0.013481 |
"GO:0008194" | "UDP-glycosyltransferase activi… | 0.003799 | 0.04697 | "Ugt3a2:-0.982, Ugt2b37:1.76, B… | [{"Ugt3a2",-0.982}, {"Ugt2b37",1.76}, … {"Ugt2b1",-2.29}] | 3.98 | -11.185 | 0.000274 | 0.011994 | "Ugt2a3:-0.9640000000000001, Ug… | [{"Ugt2a3",-0.964}, {"Ugt2b5",-1.13}, … {"Ugt3a2",-0.54}] | 1.79 | -6.404 | 0.011994 |
"GO:0016655" | "oxidoreductase activity, actin… | 0.003901 | 0.047819 | "Akr1c12:-1.14, Akr1c6:-1.38, A… | [{"Akr1c12",-1.14}, {"Akr1c6",-1.38}, … {"Aifm2",1.16}] | 2.61 | -5.54 | 0.000525 | 0.020257 | "Akr1c6:-0.8140000000000001, Ai… | [{"Akr1c6",-0.814}, {"Aifm2",0.658}, … {"Dcxr",-0.752}] | 4.348 | -1.566 | 0.020257 |
"
+ ],
+ "text/plain": [
+ "shape: (353, 15)\n",
+ "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n",
+ "│ GO ┆ term ┆ p_uncorre ┆ p_fdr_bh ┆ … ┆ parsed_te ┆ positive ┆ negative ┆ min │\n",
+ "│ --- ┆ --- ┆ cted ┆ --- ┆ ┆ rms_y ┆ effect t3 ┆ effect t3 ┆ p_fdr_bh │\n",
+ "│ str ┆ str ┆ --- ┆ f64 ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ ┆ ┆ f64 ┆ ┆ ┆ list[stru ┆ f64 ┆ f64 ┆ f64 │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ct[2]] ┆ ┆ ┆ │\n",
+ "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n",
+ "│ GO:000662 ┆ lipid ┆ 2.7545e-6 ┆ 3.4109e-5 ┆ … ┆ [{\"Scp2\", ┆ 58.609 ┆ -66.752 ┆ 3.4109e- │\n",
+ "│ 9 ┆ metabolic ┆ 1 ┆ 7 ┆ ┆ -0.752}, ┆ ┆ ┆ 57 │\n",
+ "│ ┆ process ┆ ┆ ┆ ┆ {\"Ces1g\", ┆ ┆ ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ -0.… ┆ ┆ ┆ │\n",
+ "│ GO:000760 ┆ sensory ┆ 6.1322e-4 ┆ 1.8984e-3 ┆ … ┆ [{\"Or1e32 ┆ 6.888 ┆ 0.0 ┆ 1.8984e- │\n",
+ "│ 8 ┆ perceptio ┆ 2 ┆ 8 ┆ ┆ \",2.9}, ┆ ┆ ┆ 38 │\n",
+ "│ ┆ n of ┆ ┆ ┆ ┆ {\"Or4f14b ┆ ┆ ┆ │\n",
+ "│ ┆ smell ┆ ┆ ┆ ┆ \",1.… ┆ ┆ ┆ │\n",
+ "│ GO:000663 ┆ fatty ┆ 7.0720e-3 ┆ 1.7514e-3 ┆ … ┆ [{\"Acsl1\" ┆ 17.256 ┆ -32.78 ┆ 1.7514e- │\n",
+ "│ 1 ┆ acid ┆ 7 ┆ 3 ┆ ┆ ,-0.815}, ┆ ┆ ┆ 33 │\n",
+ "│ ┆ metabolic ┆ ┆ ┆ ┆ {\"Fads1\", ┆ ┆ ┆ │\n",
+ "│ ┆ process ┆ ┆ ┆ ┆ 0.… ┆ ┆ ┆ │\n",
+ "│ GO:000718 ┆ G protein ┆ 2.8894e-3 ┆ 3.9755e-2 ┆ … ┆ [{\"Ccl5\", ┆ 50.685 ┆ -11.693 ┆ 3.9755e- │\n",
+ "│ 6 ┆ -coupled ┆ 2 ┆ 9 ┆ ┆ 3.04}, ┆ ┆ ┆ 29 │\n",
+ "│ ┆ receptor ┆ ┆ ┆ ┆ {\"Adgra3\" ┆ ┆ ┆ │\n",
+ "│ ┆ sig… ┆ ┆ ┆ ┆ ,-1.1… ┆ ┆ ┆ │\n",
+ "│ GO:000237 ┆ immune ┆ 1.4476e-3 ┆ 1.7925e-2 ┆ … ┆ [{\"Iigp1\" ┆ 162.701 ┆ -15.033 ┆ 1.0769e- │\n",
+ "│ 6 ┆ system ┆ 0 ┆ 7 ┆ ┆ ,1.23}, ┆ ┆ ┆ 41 │\n",
+ "│ ┆ process ┆ ┆ ┆ ┆ {\"Themis2 ┆ ┆ ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ \",3.… ┆ ┆ ┆ │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ GO:000402 ┆ aldehyde ┆ 0.002638 ┆ 0.034555 ┆ … ┆ [{\"Aldh7a ┆ 0.0 ┆ -3.715 ┆ 0.005015 │\n",
+ "│ 9 ┆ dehydroge ┆ ┆ ┆ ┆ 1\",-0.81} ┆ ┆ ┆ │\n",
+ "│ ┆ nase ┆ ┆ ┆ ┆ , {\"Aldh2 ┆ ┆ ┆ │\n",
+ "│ ┆ (NAD+) … ┆ ┆ ┆ ┆ \",-… ┆ ┆ ┆ │\n",
+ "│ GO:000839 ┆ arachidon ┆ 0.002638 ┆ 0.034555 ┆ … ┆ [{\"Cyp2d2 ┆ 5.633 ┆ -14.391 ┆ 0.000027 │\n",
+ "│ 1 ┆ ic acid ┆ ┆ ┆ ┆ 2\",-0.741 ┆ ┆ ┆ │\n",
+ "│ ┆ monooxyge ┆ ┆ ┆ ┆ }, {\"Cyp4 ┆ ┆ ┆ │\n",
+ "│ ┆ nase… ┆ ┆ ┆ ┆ a14… ┆ ┆ ┆ │\n",
+ "│ GO:000804 ┆ enzyme ┆ 0.002749 ┆ 0.035898 ┆ … ┆ [{\"Apobec ┆ 7.785 ┆ 0.0 ┆ 0.013481 │\n",
+ "│ 7 ┆ activator ┆ ┆ ┆ ┆ 1\",2.0}, ┆ ┆ ┆ │\n",
+ "│ ┆ activity ┆ ┆ ┆ ┆ {\"Arl1\",0 ┆ ┆ ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ .72… ┆ ┆ ┆ │\n",
+ "│ GO:000819 ┆ UDP-glyco ┆ 0.003799 ┆ 0.04697 ┆ … ┆ [{\"Ugt2a3 ┆ 1.79 ┆ -6.404 ┆ 0.011994 │\n",
+ "│ 4 ┆ syltransf ┆ ┆ ┆ ┆ \",-0.964} ┆ ┆ ┆ │\n",
+ "│ ┆ erase ┆ ┆ ┆ ┆ , {\"Ugt2b ┆ ┆ ┆ │\n",
+ "│ ┆ activi… ┆ ┆ ┆ ┆ 5\",… ┆ ┆ ┆ │\n",
+ "│ GO:001665 ┆ oxidoredu ┆ 0.003901 ┆ 0.047819 ┆ … ┆ [{\"Akr1c6 ┆ 4.348 ┆ -1.566 ┆ 0.020257 │\n",
+ "│ 5 ┆ ctase ┆ ┆ ┆ ┆ \",-0.814} ┆ ┆ ┆ │\n",
+ "│ ┆ activity, ┆ ┆ ┆ ┆ , {\"Aifm2 ┆ ┆ ┆ │\n",
+ "│ ┆ actin… ┆ ┆ ┆ ┆ \",0… ┆ ┆ ┆ │\n",
+ "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.HConcatChart(...)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import altair as alt\n",
+ "import sys\n",
+ "from IPython.display import display\n",
+ "# we cannot use vegafusion here because it makes the point selection impossible since\n",
+ "# it prunes the required ext_gene column\n",
+ "#alt.data_transformers.enable(\"vegafusion\")\n",
+ "xlabel = f\"effect {label_x}\"\n",
+ "ylabel = f\"effect {label_y}\"\n",
+ "df_filtered = combined.filter((pl.col(effect_x_pos) != 0) & (pl.col(effect_y_pos) != 0) & (pl.col(effect_x_neg) != 0) & (pl.col(effect_y_neg) != 0))\n",
+ "df_filtered = df_filtered.with_columns(\n",
+ " pl.max_horizontal(abs(pl.col(effect_x_pos) - pl.col(effect_y_pos)), abs(pl.col(effect_x_neg) - pl.col(effect_y_neg))).alias(\"difference\")\n",
+ ")\n",
+ "combined_sorted = df_filtered.sort(\"difference\", descending=True)\n",
+ "combined_pd = combined_sorted.select(\n",
+ " pl.col(\"GO\", \"term\", \"min p_fdr_bh\", effect_x_pos, effect_y_pos, effect_x_neg, effect_y_neg)\n",
+ ").to_pandas()\n",
+ "# combined_pd = combined_pd[\"GO\"].astype(str)\n",
+ "\n",
+ "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n",
+ "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/meta_compare_etoh_wt_mgl_vs_t3.tsv\", sep=\"\\t\", index=False)\n",
+ "\n",
+ "point_selector = alt.selection_point(fields=[\"term\"], empty=False)\n",
+ "\n",
+ "\n",
+ "def plot(df, effect_x, effect_y, title, xlabel, ylabel):\n",
+ " # Filter out rows where either effect_x or effect_y is zero because of logarithmic scale\n",
+ "\n",
+ " df = df_filtered.select(\n",
+ " pl.col(\"GO\", \"term\", \"min p_fdr_bh\", effect_x, effect_y)\n",
+ " ).to_pandas()\n",
+ "\n",
+ "\n",
+ " effects = df_filtered.select(pl.col(effect_x), pl.col(effect_y))\n",
+ " min_value = effects.min().min_horizontal()[0]\n",
+ " max_value = effects.max().max_horizontal()[0]\n",
+ "\n",
+ " alt.data_transformers.disable_max_rows()\n",
+ "\n",
+ "\n",
+ " points = alt.Chart(df).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n",
+ " alt.X(effect_x, title=xlabel, scale=alt.Scale(type='log', nice=False), axis=alt.Axis(grid=False)),\n",
+ " alt.Y(effect_y, title=ylabel, scale=alt.Scale(type='log', nice=False), axis=alt.Axis(grid=False)),\n",
+ " alt.Color(\"min p_fdr_bh\", scale=alt.Scale(scheme=\"viridis\")),\n",
+ " opacity=alt.value(0.5),\n",
+ " )\n",
+ "\n",
+ " line = alt.Chart(\n",
+ " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n",
+ " ).mark_line(color=\"lightgrey\").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " strokeDash=alt.value([5, 5]),\n",
+ " )\n",
+ " \n",
+ " text_background = alt.Chart(df).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5,\n",
+ " fill='white',\n",
+ " stroke='white',\n",
+ " strokeWidth=5,\n",
+ " ).encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"term\", alt.value(\"\")),\n",
+ " )\n",
+ "\n",
+ " text = alt.Chart(df).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5, \n",
+ " \n",
+ " ).encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"term\", alt.value(\"\")),\n",
+ " )\n",
+ " \n",
+ "\n",
+ " chart = alt.layer(line, points, text_background, text).add_params(\n",
+ " point_selector\n",
+ " ).properties(\n",
+ " title=title\n",
+ " ).interactive()\n",
+ " return chart\n",
+ "\n",
+ "# Update the plot function calls to include the logarithmic scale and filter out zero values\n",
+ "positive_chart = plot(combined_pd, effect_x_pos, effect_y_pos, \"Positive effect\", xlabel, ylabel)\n",
+ "negative_chart = plot(combined_pd, effect_x_neg, effect_y_neg, \"Negative effect\", xlabel, ylabel)\n",
+ "\n",
+ "final_chart = alt.hconcat(positive_chart, negative_chart).resolve_scale(\n",
+ " color='shared'\n",
+ ")\n",
+ "\n",
+ "display(final_chart)\n",
+ "\n",
+ "# final_chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.html\", inline=True)\n",
+ "final_chart.save(snakemake.output[1], inline=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/workflow/scripts/compare_pathways.py.ipynb b/workflow/scripts/compare_pathways.py.ipynb
new file mode 100644
index 00000000..d3937ffc
--- /dev/null
+++ b/workflow/scripts/compare_pathways.py.ipynb
@@ -0,0 +1,370 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pyreadr\n",
+ "import polars as pl\n",
+ "import polars.selectors as cs\n",
+ "diffexp_x = pl.read_csv(snakemake.input[0], separator=\"\\t\").lazy()\n",
+ "diffexp_y = pl.read_csv(snakemake.input[1], separator=\"\\t\").lazy()\n",
+ "label_x = snakemake.params.labels[0]\n",
+ "label_y = snakemake.params.labels[1]\n",
+ "\n",
+ "\n",
+ "# diffexp_x = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/etoh_mgl_in_wt_vs_etho_in_wt_liver.pathways.tsv\", separator=\"\\t\").lazy()\n",
+ "# diffexp_y = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/etoh_t3_in_wt_vs_etoh_in_wt_liver.pathways.tsv\", separator=\"\\t\").lazy()\n",
+ "# label_x = \"mgl\"\n",
+ "# label_y = \"t3\"\n",
+ "\n",
+ "effect_x = f\"effect {label_x}\"\n",
+ "effect_y = f\"effect {label_y}\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare(df):\n",
+ " # Select necessary columns and filter\n",
+ " df = df.select([\n",
+ " cs.by_name(\"Name\", \"Combined p-value\", \"Combined FDR\", \"total perturbation accumulation\")\n",
+ " ])\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prepared_diffexp_x = prepare(diffexp_x)\n",
+ "prepared_diffexp_y = prepare(diffexp_y)\n",
+ "combined = prepared_diffexp_x.join(\n",
+ " prepared_diffexp_y, on=[\"Name\"], suffix=\"_y\"\n",
+ ").with_columns(\n",
+ " pl.min_horizontal(\"Combined FDR\", \"Combined FDR_y\").alias(\"fdr_min\")\n",
+ ").filter(\n",
+ " pl.col(\"fdr_min\") <= 0.05\n",
+ ").rename(\n",
+ " {\n",
+ " \"total perturbation accumulation\": effect_x,\n",
+ " \"total perturbation accumulation_y\": effect_y,\n",
+ " \"fdr_min\": \"min fdr\",\n",
+ " }\n",
+ ").collect()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (426, 8)Name | Combined p-value | Combined FDR | effect mgl | Combined p-value_y | Combined FDR_y | effect t3 | min fdr |
---|
str | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
"Metabolism of RNA" | 5.0991e-104 | 3.8804e-101 | -147.828048 | 0.000008 | 0.000235 | -47.932192 | 3.8804e-101 |
"Translation" | 3.8445e-69 | 1.4628e-66 | -25.0236 | 0.025409 | 0.101636 | -7.771333 | 1.4628e-66 |
"rRNA processing in the nucleus… | 1.0766e-53 | 2.7309e-51 | -138.862941 | 0.000185 | 0.003123 | -37.756992 | 2.7309e-51 |
"Amino acid and derivative meta… | 3.3796e-51 | 6.4297e-49 | 8.516927 | 1.1433e-24 | 3.4757e-22 | -1.648 | 6.4297e-49 |
"Major pathway of rRNA processi… | 6.1362e-51 | 9.3392e-49 | -138.442231 | 0.00016 | 0.002854 | -36.96517 | 9.3392e-49 |
… | … | … | … | … | … | … | … |
"FCERI mediated Ca+2 mobilizati… | 0.21274 | 0.273934 | 1.568 | 0.002636 | 0.026737 | 7.4415 | 0.026737 |
"DAP12 interactions" | 0.232955 | 0.294444 | 3.038273 | 0.005922 | 0.046757 | 5.982545 | 0.046757 |
"Interleukin-3, Interleukin-5 a… | 0.344568 | 0.400942 | 0.0 | 0.003194 | 0.028986 | 1.02 | 0.028986 |
"Collagen formation" | 0.425525 | 0.48332 | 1.303354 | 0.004406 | 0.037203 | -15.308537 | 0.037203 |
"G alpha (q) signalling events" | 0.712048 | 0.755175 | 3.1885 | 0.006453 | 0.04844 | 9.2385 | 0.04844 |
"
+ ],
+ "text/plain": [
+ "shape: (426, 8)\n",
+ "┌────────────┬────────────┬────────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n",
+ "│ Name ┆ Combined ┆ Combined ┆ effect ┆ Combined ┆ Combined ┆ effect t3 ┆ min fdr │\n",
+ "│ --- ┆ p-value ┆ FDR ┆ mgl ┆ p-value_y ┆ FDR_y ┆ --- ┆ --- │\n",
+ "│ str ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ f64 ┆ f64 │\n",
+ "│ ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ │\n",
+ "╞════════════╪════════════╪════════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
+ "│ Metabolism ┆ 5.0991e-10 ┆ 3.8804e-10 ┆ -147.8280 ┆ 0.000008 ┆ 0.000235 ┆ -47.93219 ┆ 3.8804e-1 │\n",
+ "│ of RNA ┆ 4 ┆ 1 ┆ 48 ┆ ┆ ┆ 2 ┆ 01 │\n",
+ "│ Translatio ┆ 3.8445e-69 ┆ 1.4628e-66 ┆ -25.0236 ┆ 0.025409 ┆ 0.101636 ┆ -7.771333 ┆ 1.4628e-6 │\n",
+ "│ n ┆ ┆ ┆ ┆ ┆ ┆ ┆ 6 │\n",
+ "│ rRNA ┆ 1.0766e-53 ┆ 2.7309e-51 ┆ -138.8629 ┆ 0.000185 ┆ 0.003123 ┆ -37.75699 ┆ 2.7309e-5 │\n",
+ "│ processing ┆ ┆ ┆ 41 ┆ ┆ ┆ 2 ┆ 1 │\n",
+ "│ in the ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ nucleus… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ Amino acid ┆ 3.3796e-51 ┆ 6.4297e-49 ┆ 8.516927 ┆ 1.1433e-2 ┆ 3.4757e-2 ┆ -1.648 ┆ 6.4297e-4 │\n",
+ "│ and ┆ ┆ ┆ ┆ 4 ┆ 2 ┆ ┆ 9 │\n",
+ "│ derivative ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ meta… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ Major ┆ 6.1362e-51 ┆ 9.3392e-49 ┆ -138.4422 ┆ 0.00016 ┆ 0.002854 ┆ -36.96517 ┆ 9.3392e-4 │\n",
+ "│ pathway of ┆ ┆ ┆ 31 ┆ ┆ ┆ ┆ 9 │\n",
+ "│ rRNA ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ processi… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ FCERI ┆ 0.21274 ┆ 0.273934 ┆ 1.568 ┆ 0.002636 ┆ 0.026737 ┆ 7.4415 ┆ 0.026737 │\n",
+ "│ mediated ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ Ca+2 mobil ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ izati… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ DAP12 inte ┆ 0.232955 ┆ 0.294444 ┆ 3.038273 ┆ 0.005922 ┆ 0.046757 ┆ 5.982545 ┆ 0.046757 │\n",
+ "│ ractions ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ Interleuki ┆ 0.344568 ┆ 0.400942 ┆ 0.0 ┆ 0.003194 ┆ 0.028986 ┆ 1.02 ┆ 0.028986 │\n",
+ "│ n-3, Inter ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ leukin-5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ a… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ Collagen ┆ 0.425525 ┆ 0.48332 ┆ 1.303354 ┆ 0.004406 ┆ 0.037203 ┆ -15.30853 ┆ 0.037203 │\n",
+ "│ formation ┆ ┆ ┆ ┆ ┆ ┆ 7 ┆ │\n",
+ "│ G alpha ┆ 0.712048 ┆ 0.755175 ┆ 3.1885 ┆ 0.006453 ┆ 0.04844 ┆ 9.2385 ┆ 0.04844 │\n",
+ "│ (q) ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ signalling ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ events ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "└────────────┴────────────┴────────────┴───────────┴───────────┴───────────┴───────────┴───────────┘"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "effects = combined.select(pl.col(effect_x, effect_y))\n",
+ "min_value = effects.min().min_horizontal()[0]\n",
+ "max_value = effects.max().max_horizontal()[0]\n",
+ "combined = combined.with_columns(\n",
+ " abs(pl.col(effect_x) - pl.col(effect_y)).alias(\"difference\")\n",
+ ")\n",
+ "combined_sorted = combined.sort(\"difference\", descending=True)\n",
+ "combined_pd = combined_sorted.select(\n",
+ " pl.col(\"Name\", \"min fdr\", effect_x, effect_y)\n",
+ ").to_pandas()\n",
+ "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n",
+ "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/meta_compare_etoh_wt_mgl_vs_t3.tsv\", sep=\"\\t\", index=False)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.LayerChart(...)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/.snakemake/conda/673ca4e70a0a7f8fe6fba3391c9fbb49_/lib/python3.12/site-packages/altair/utils/save.py:42: UserWarning: inline argument ignored for non HTML formats.\n",
+ " warnings.warn(\"inline argument ignored for non HTML formats.\", stacklevel=1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "import altair as alt\n",
+ "import sys\n",
+ "from IPython.display import display\n",
+ "\n",
+ "# we cannot use vegafusion here because it makes the point selection impossible since\n",
+ "# it prunes the required ext_gene column\n",
+ "#alt.data_transformers.enable(\"vegafusion\")\n",
+ "alt.data_transformers.disable_max_rows()\n",
+ "\n",
+ "\n",
+ "# Data transformation\n",
+ "alt.data_transformers.disable_max_rows()\n",
+ "\n",
+ "# Selektor für Punkte\n",
+ "point_selector = alt.selection_point(fields=[\"Name\"], empty=False)\n",
+ "\n",
+ "# Punkte\n",
+ "points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n",
+ " alt.X(effect_x, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),\n",
+ " alt.Y(effect_y, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),\n",
+ " alt.Color(\"min fdr\", scale=alt.Scale(scheme=\"viridis\")),\n",
+ " opacity=alt.value(0.5),\n",
+ ")\n",
+ "\n",
+ "# Gestrichelte Linie\n",
+ "line = alt.Chart(\n",
+ " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n",
+ ").mark_line(color=\"lightgrey\").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " strokeDash=alt.value([5, 5]),\n",
+ ")\n",
+ "\n",
+ "# Gestrichelte Linie\n",
+ "x_axis = alt.Chart(\n",
+ " pl.DataFrame({effect_x: [0, 0], effect_y: [min_value, max_value]})\n",
+ ").mark_line(color=\"lightgrey\").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " strokeDash=alt.value([5, 5]),\n",
+ ")\n",
+ "\n",
+ "y_axis = alt.Chart(\n",
+ " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [0, 0]})\n",
+ ").mark_line(color=\"lightgrey\").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " strokeDash=alt.value([5, 5]),\n",
+ ")\n",
+ "\n",
+ "text_background = alt.Chart(combined_pd).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5,\n",
+ " fill='white',\n",
+ " stroke='white',\n",
+ " strokeWidth=5,\n",
+ " ).encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"Name\", alt.value(\"\")),\n",
+ " )\n",
+ "\n",
+ "\n",
+ "# Text\n",
+ "text = alt.Chart(combined_pd).mark_text(\n",
+ " align=\"left\",\n",
+ " baseline=\"middle\",\n",
+ " dx=5,\n",
+ " dy=-5,\n",
+ ").encode(\n",
+ " x=effect_x,\n",
+ " y=effect_y,\n",
+ " text=alt.condition(point_selector, \"Name\", alt.value(\"\")),\n",
+ ")\n",
+ "\n",
+ "# Null-Linien für x und y\n",
+ "zero_lines = alt.Chart(pl.DataFrame({\"zero\": [0]})).mark_rule(color=\"black\").encode(\n",
+ " x=alt.X(\"zero\", axis=alt.Axis(title=\"\")), # keine Beschriftung der Achse\n",
+ " y=alt.Y(\"zero\", axis=alt.Axis(title=\"\")), # keine Beschriftung der Achse\n",
+ ")\n",
+ "\n",
+ "# Gesamtes Diagramm mit den Ebenen\n",
+ "chart = alt.layer(x_axis, y_axis, line, points, text_background, text).add_params(\n",
+ " point_selector\n",
+ ").interactive()\n",
+ "\n",
+ "# display(chart)\n",
+ "\n",
+ "chart.save(snakemake.output[1], inline=True)\n",
+ "# chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/meta_comparison/pathways/etoh_wt_mgl_vs_t3.json\", inline=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}