diff --git a/config/config.yaml b/config/config.yaml index 8025fd83..d9d7cac4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -51,15 +51,20 @@ diffexp: # model for sleuth differential expression analysis models: model_X: - full: ~condition + batch_effect - reduced: ~batch_effect + full: ~condition_X + batch_effect_X + reduced: ~batch_effect_X # Covariate / sample sheet column that shall be used for fold # change/effect size based downstream analyses. - primary_variable: condition + primary_variable: condition_X # base level of the primary variable (this should be one of the entries # in the primary_variable sample sheet column and will be considered as # denominator in the fold change/effect size estimation). - base_level: untreated + base_level: untreated_X + model_Y: + full: ~condition_Y + batch_effect_Y + reduced: ~batch_effect_Y + primary_variable: condition_Y + base_level: untreated_Y # significance level to use for volcano, ma- and qq-plots sig-level: volcano-plot: 0.05 @@ -105,6 +110,21 @@ enrichment: # the species specified by resources -> ref -> species above pathway_database: "reactome" +meta_comparisons: + # comparison is only run if set to `true` + activate: false + # Define any name for comparison + model_X_vs_model_Y: + items: + # Define the two underlying models for the comparison. The models must be defined in the diffexp/models in the config + - label: X # Arbitrary label + model: model_X # Must match a diffexp model from config + - label: Y # Arbitrary label + model: model_Y # Must match a diffexp model from config + desc: | + Comparison between model_X and model_Y effect. + label: model_X vs. model_Y effect + bootstrap_plots: # desired false discovery rate for bootstrap plots, i.e. a lower FDR will result in fewer boxplots generated FDR: 0.01 diff --git a/workflow/envs/pystats.yaml b/workflow/envs/pystats.yaml new file mode 100644 index 00000000..df4b5c89 --- /dev/null +++ b/workflow/envs/pystats.yaml @@ -0,0 +1,16 @@ +channels: + - conda-forge + - nodefaults +dependencies: + - polars =0.20.28 + - pyreadr =0.5 + - altair =5.2 + - pyarrow =16.1 + - vegafusion =1.6 + - vegafusion-python-embed =1.6 + - vl-convert-python =1.2 + - jupyter_core =5.7 + - ipykernel =6.29 + - nbconvert =7.14 + - notebook =7.0 + - jupyterlab_code_formatter =1.4 \ No newline at end of file diff --git a/workflow/report/meta_comparison_diffexp.rst b/workflow/report/meta_comparison_diffexp.rst new file mode 100644 index 00000000..0fc01778 --- /dev/null +++ b/workflow/report/meta_comparison_diffexp.rst @@ -0,0 +1,5 @@ +{{ snakemake.params.desc }} +Each point represents a gene, x axis shows the {{ snakemake.params.labels[0] }} effect, y-axis the {{ snakemake.params.labels[1] }} effect (both as log2 fold change). +The color encodes the corresponding q-value. +By clicking on points, their label can be displayed. +Holding the Shift key allows to select or deselect labels for multiple genes. \ No newline at end of file diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index c1b9d62f..76f1524c 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -396,4 +396,17 @@ def all_input(wildcards): ind_transcripts=config["experiment"]["3-prime-rna-seq"]["plot-qc"], ) ) + + # meta comparisons + if config["meta_comparisons"]["activate"]: + wanted_input.extend( + directory( + expand( + "results/datavzrd-reports/{report_type}_meta_comparison_{meta_comp}", + report_type=["go_terms", "diffexp", "pathways"], + meta_comp=lookup(dpath="meta_comparisons", within=config), + ) + ), + ) + return wanted_input diff --git a/workflow/rules/meta_comparisons.smk b/workflow/rules/meta_comparisons.smk new file mode 100644 index 00000000..a6fa26a5 --- /dev/null +++ b/workflow/rules/meta_comparisons.smk @@ -0,0 +1,70 @@ +rule meta_compare_diffex: + input: + expand( + "results/sleuth/diffexp/{model}.genes-representative.diffexp.rds", + model=lookup( + dpath="meta_comparisons/{meta_comp}/items/*/model", within=config + ), + ), + output: + "results/tables/diffexp/meta_compare_{meta_comp}.tsv", + "results/meta_comparison/diffexp/{meta_comp}.json", + log: + notebook="logs/meta_compare_diffexp/{meta_comp}.ipynb", + params: + desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config), + labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config), + conda: + "../envs/pystats.yaml" + notebook: + "../scripts/compare_diffexp.py.ipynb" + + +rule meta_compare_enrichment: + input: + expand( + "results/tables/go_terms/{model}.go_term_enrichment.gene_fdr_{gene_fdr}.go_term_sig_study_fdr_{go_term_fdr}.tsv", + model=lookup( + dpath="meta_comparisons/{meta_comp}/items/*/model", within=config + ), + gene_fdr=str(config["enrichment"]["goatools"]["fdr_genes"]).replace( + ".", "-" + ), + go_term_fdr=str(config["enrichment"]["goatools"]["fdr_go_terms"]).replace( + ".", "-" + ), + ), + output: + "results/tables/go_terms/meta_compare_{meta_comp}.tsv", + "results/meta_comparison/go_terms/{meta_comp}.json", + log: + notebook="logs/meta_compare_enrichment/{meta_comp}.ipynb", + params: + desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config), + labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config), + conda: + "../envs/pystats.yaml" + notebook: + "../scripts/compare_enrichment.py.ipynb" + + +rule meta_compare_pathways: + input: + expand( + "results/tables/pathways/{model}.pathways.tsv", + model=lookup( + dpath="meta_comparisons/{meta_comp}/items/*/model", within=config + ), + ), + output: + "results/tables/pathways/meta_compare_{meta_comp}.tsv", + "results/meta_comparison/pathways/{meta_comp}.json", + log: + notebook="logs/meta_compare_pathways/{meta_comp}.ipynb", + params: + desc=lookup(dpath="meta_comparisons/{meta_comp}/desc", within=config), + labels=lookup(dpath="meta_comparisons/{meta_comp}/items/*/label", within=config), + conda: + "../envs/pystats.yaml" + notebook: + "../scripts/compare_pathways.py.ipynb" diff --git a/workflow/scripts/compare_diffexp.py.ipynb b/workflow/scripts/compare_diffexp.py.ipynb new file mode 100644 index 00000000..86328e12 --- /dev/null +++ b/workflow/scripts/compare_diffexp.py.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "import pyreadr\n", + "import polars as pl\n", + "import polars.selectors as cs\n", + "\n", + "diffexp_x = pl.from_pandas(pyreadr.read_r(snakemake.input[0])[None]).lazy()\n", + "diffexp_y = pl.from_pandas(pyreadr.read_r(snakemake.input[1])[None]).lazy()\n", + "label_x = snakemake.params.labels[0]\n", + "label_y = snakemake.params.labels[1]\n", + "\n", + "# diffexp_x = pl.from_pandas(pyreadr.read_r(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_mgl_in_wt_vs_etho_in_wt_liver.genes-representative.diffexp.rds\")[None]).lazy()\n", + "# diffexp_y = pl.from_pandas(pyreadr.read_r(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_t3_in_wt_vs_etoh_in_wt_liver.genes-representative.diffexp.rds\")[None]).lazy()\n", + "# label_x = \"mgl\"\n", + "# label_y = \"t3\"\n", + "\n", + "\n", + "effect_x = f\"effect {label_x} (beta score)\"\n", + "effect_y = f\"effect {label_y} (beta score)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "def prepare(df):\n", + " return df.select(\n", + " cs.by_name(\"target_id\", \"ext_gene\", \"pval\", \"qval\"),\n", + " cs.starts_with(\"b_\").alias(\"beta\")\n", + " ).select(\n", + " cs.by_index(range(0, 5)) # only keep first b_ column\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "combined = prepare(diffexp_x).join(\n", + " prepare(diffexp_y), on=[\"target_id\", \"ext_gene\"], suffix=\"_y\"\n", + ").with_columns(\n", + " pl.min_horizontal(\"qval\", \"qval_y\").alias(\"qval_min\"),\n", + ").filter(\n", + " pl.col(\"qval_min\") <= 0.05\n", + ").rename(\n", + " {\n", + " \"beta\": effect_x,\n", + " \"beta_y\": effect_y,\n", + " \"qval_min\": \"min q-value\",\n", + " }\n", + ").collect()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "effects = combined.select(pl.col(effect_x, effect_y))\n", + "min_value = effects.min().min_horizontal()[0]\n", + "max_value = effects.max().max_horizontal()[0]\n", + "combined = combined.with_columns(\n", + " abs(pl.col(effect_x) - pl.col(effect_y)).alias(\"difference\")\n", + ")\n", + "combined_sorted = combined.sort(\"difference\", descending=True)\n", + "combined_pd = combined_sorted.select(\n", + " pl.col(\"ext_gene\", \"min q-value\", effect_x, effect_y)\n", + ").to_pandas()\n", + "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n", + "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.txt\", sep=\"\\t\", index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5_073, 10)
target_idext_genepvalqvaleffect mgl (beta score)pval_yqval_yeffect t3 (beta score)min q-valuedifference
strstrf64f64f64f64f64f64f64f64
"ENSMUST00000029670.7""Ptgfr"nullnullnull5.8900e-80.00001746.630.0000174null
"ENSMUST00000062307.5""Phf11a"nullnullnull9.9300e-70.0001064.590.000106null
"ENSMUST00000150649.9""Ifi213"nullnullnull0.00001040.0005864.280.000586null
"ENSMUST00000033617.13""Btk"nullnullnull0.00001380.0007333.970.000733null
"ENSMUST00000005950.12""Mmp12"nullnullnull0.00001520.0007934.550.000793null
"ENSMUST00000034860.5""Cyp1a2"3.0000e-119.2300e-9-1.940.00000180.000171-1.949.2300e-90.0
"ENSMUST00000093485.3""Ddx60"0.0000080.0001212.810.00005040.001962.810.0001210.0
"ENSMUST00000020301.14""Vsir"0.0007960.004451.30.004490.04731.30.004450.0
"ENSMUST00000040336.12""Slc22a23"0.0140.0358-0.4650.01770.112-0.4650.03580.0
"ENSMUST00000226516.2""Vmn1r73"0.007020.02212.110.01810.1142.110.02210.0
" + ], + "text/plain": [ + "shape: (5_073, 10)\n", + "┌───────────┬──────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", + "│ target_id ┆ ext_gene ┆ pval ┆ qval ┆ … ┆ qval_y ┆ effect t3 ┆ min ┆ differenc │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ (beta ┆ q-value ┆ e │\n", + "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ f64 ┆ score) ┆ --- ┆ --- │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ f64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ │\n", + "╞═══════════╪══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ ENSMUST00 ┆ Ptgfr ┆ null ┆ null ┆ … ┆ 0.0000174 ┆ 6.63 ┆ 0.0000174 ┆ null │\n", + "│ 000029670 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .7 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Phf11a ┆ null ┆ null ┆ … ┆ 0.000106 ┆ 4.59 ┆ 0.000106 ┆ null │\n", + "│ 000062307 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Ifi213 ┆ null ┆ null ┆ … ┆ 0.000586 ┆ 4.28 ┆ 0.000586 ┆ null │\n", + "│ 000150649 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .9 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Btk ┆ null ┆ null ┆ … ┆ 0.000733 ┆ 3.97 ┆ 0.000733 ┆ null │\n", + "│ 000033617 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .13 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Mmp12 ┆ null ┆ null ┆ … ┆ 0.000793 ┆ 4.55 ┆ 0.000793 ┆ null │\n", + "│ 000005950 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .12 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ ENSMUST00 ┆ Cyp1a2 ┆ 3.0000e-1 ┆ 9.2300e-9 ┆ … ┆ 0.000171 ┆ -1.94 ┆ 9.2300e-9 ┆ 0.0 │\n", + "│ 000034860 ┆ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Ddx60 ┆ 0.000008 ┆ 0.000121 ┆ … ┆ 0.00196 ┆ 2.81 ┆ 0.000121 ┆ 0.0 │\n", + "│ 000093485 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .3 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Vsir ┆ 0.000796 ┆ 0.00445 ┆ … ┆ 0.0473 ┆ 1.3 ┆ 0.00445 ┆ 0.0 │\n", + "│ 000020301 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .14 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Slc22a23 ┆ 0.014 ┆ 0.0358 ┆ … ┆ 0.112 ┆ -0.465 ┆ 0.0358 ┆ 0.0 │\n", + "│ 000040336 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .12 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ENSMUST00 ┆ Vmn1r73 ┆ 0.00702 ┆ 0.0221 ┆ … ┆ 0.114 ┆ 2.11 ┆ 0.0221 ┆ 0.0 │\n", + "│ 000226516 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ .2 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└───────────┴──────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_sorted\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import altair as alt\n", + "import sys\n", + "# we cannot use vegafusion here because it makes the point selection impossible since\n", + "# it prunes the required ext_gene column\n", + "#alt.data_transformers.enable(\"vegafusion\")\n", + "alt.data_transformers.disable_max_rows()\n", + "\n", + "point_selector = alt.selection_point(fields=[\"ext_gene\"], empty=False)\n", + "\n", + "points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n", + " alt.X(effect_x),\n", + " alt.Y(effect_y),\n", + " alt.Color(\"min q-value\", scale=alt.Scale(scheme=\"viridis\")),\n", + " opacity=alt.value(0.5),\n", + ")\n", + "\n", + "line = alt.Chart(\n", + " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n", + ").mark_line(color=\"lightgrey\").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " strokeDash=alt.value([5, 5]),\n", + ")\n", + "\n", + "text_background = alt.Chart(combined_pd).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5,\n", + " fill='white',\n", + " stroke='white',\n", + " strokeWidth=5,\n", + " ).encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"ext_gene\", alt.value(\"\")),\n", + " )\n", + "\n", + "text = alt.Chart(combined_pd).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5,\n", + ").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"ext_gene\", alt.value(\"\")),\n", + ")\n", + "\n", + "\n", + "chart = alt.layer(line, points, text_background, text).add_params(\n", + " point_selector\n", + ").interactive()\n", + "\n", + "display(chart)\n", + "\n", + "# chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.html\", inline=True)\n", + "\n", + "chart.save(snakemake.output[1], inline=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pystats", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/workflow/scripts/compare_enrichment.py.ipynb b/workflow/scripts/compare_enrichment.py.ipynb new file mode 100644 index 00000000..27fe9a70 --- /dev/null +++ b/workflow/scripts/compare_enrichment.py.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pyreadr\n", + "import polars as pl\n", + "import polars.selectors as cs\n", + "\n", + "diffexp_x = pl.read_csv(snakemake.input[0], separator=\"\\t\").lazy()\n", + "diffexp_y = pl.read_csv(snakemake.input[1], separator=\"\\t\").lazy()\n", + "label_x = snakemake.params.labels[0]\n", + "label_y = snakemake.params.labels[1]\n", + "\n", + "# diffexp_x = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/etoh_mgl_in_wt_vs_etho_in_wt_liver.go_term_enrichment.gene_fdr_0-05.go_term_sig_study_fdr0-05.tsv\", separator=\"\\t\").lazy()\n", + "# diffexp_y = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/etoh_t3_in_wt_vs_etoh_in_wt_liver.go_term_enrichment.gene_fdr_0-05.go_term_sig_study_fdr0-05.tsv\", separator=\"\\t\").lazy()\n", + "# label_x = \"mgl\"\n", + "# label_y = \"t3\"\n", + "\n", + "effect_x_pos = f\"positive effect {label_x}\"\n", + "effect_y_pos = f\"positive effect {label_y}\"\n", + "effect_x_neg = f\"negative effect {label_x}\"\n", + "effect_y_neg = f\"negative effect {label_y}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_study_items(value):\n", + " if value and value.strip() != \"\":\n", + " gene_values = value.split(', ')\n", + " data = []\n", + " for gene_value in gene_values:\n", + " parts = gene_value.split(':')\n", + " gene = parts[0]\n", + " val = float(parts[1])\n", + " data.append({\"gene\": gene, \"value\": val})\n", + " return data\n", + " else:\n", + " return []\n", + "\n", + "def calculate_sums(parsed_terms):\n", + " positive_sum = sum(item['value'] for item in parsed_terms if item['value'] > 0)\n", + " negative_sum = sum(item['value'] for item in parsed_terms if item['value'] < 0)\n", + " return positive_sum, negative_sum" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare(df):\n", + " # Select necessary columns\n", + " df = df.select([\n", + " cs.by_name(\"GO\", \"term\", \"p_uncorrected\", \"p_fdr_bh\", \"study_items_sig_terms\")\n", + " ])\n", + " \n", + " # map_elements functions to columns using with_columns\n", + " df = df.with_columns([\n", + " pl.col(\"study_items_sig_terms\").map_elements(extract_study_items).alias(\"parsed_terms\")])\n", + " df = df.with_columns([\n", + " pl.col(\"parsed_terms\").map_elements(lambda x: calculate_sums(x)[0]).alias(\"cumulative_b_scores_positive\"),\n", + " pl.col(\"parsed_terms\").map_elements(lambda x: calculate_sums(x)[1]).alias(\"cumulative_b_scores_negative\")\n", + " ])\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + "sys:1: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n" + ] + } + ], + "source": [ + "prepared_diffexp_x = prepare(diffexp_x)\n", + "prepared_diffexp_y = prepare(diffexp_y)\n", + "combined = prepared_diffexp_x.join(\n", + " prepared_diffexp_y, on=[\"GO\", \"term\"], suffix=\"_y\"\n", + ").with_columns(\n", + " pl.min_horizontal(\"p_fdr_bh\", \"p_fdr_bh_y\").alias(\"p_fdr_bh_min\")\n", + ").filter(\n", + " pl.col(\"p_fdr_bh_min\") <= 0.05\n", + ").rename(\n", + " {\n", + " \"cumulative_b_scores_positive\": effect_x_pos,\n", + " \"cumulative_b_scores_positive_y\": effect_y_pos,\n", + " \"cumulative_b_scores_negative\": effect_x_neg,\n", + " \"cumulative_b_scores_negative_y\": effect_y_neg,\n", + " \"p_fdr_bh_min\": \"min p_fdr_bh\",\n", + " }\n", + ").collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (353, 15)
GOtermp_uncorrectedp_fdr_bhstudy_items_sig_termsparsed_termspositive effect mglnegative effect mglp_uncorrected_yp_fdr_bh_ystudy_items_sig_terms_yparsed_terms_ypositive effect t3negative effect t3min p_fdr_bh
strstrf64f64strlist[struct[2]]f64f64f64f64strlist[struct[2]]f64f64f64
"GO:0006629""lipid metabolic process"2.7545e-613.4109e-57"Fah:0.508, Dhcr24:1.27, Lypla1…[{"Fah",0.508}, {"Dhcr24",1.27}, … {"Cyp2c38",2.2}]184.10933-135.151245.5125e-383.4131e-34"Scp2:-0.752, Ces1g:-0.8, Plbd1…[{"Scp2",-0.752}, {"Ces1g",-0.8}, … {"Neu1",0.628}]58.609-66.7523.4109e-57
"GO:0007608""sensory perception of smell"6.1322e-421.8984e-38"Or2ag20:1.69, Or5d18:1.82, Or4…[{"Or2ag20",1.69}, {"Or5d18",1.82}, … {"Or1j15",0.91}]24.873-15.233.8951e-112.8372e-8"Or1e32:2.9, Or4f14b:1.1, B2m:0…[{"Or1e32",2.9}, {"Or4f14b",1.1}, … {"Or10ag59",2.27}]6.8880.01.8984e-38
"GO:0006631""fatty acid metabolic process"7.0720e-371.7514e-33"Lypla1:-0.484, Acads:1.13, C3:…[{"Lypla1",-0.484}, {"Acads",1.13}, … {"Pecr",0.701}]60.493-50.01666.0084e-179.3002e-14"Acsl1:-0.815, Fads1:0.544, Lpi…[{"Acsl1",-0.815}, {"Fads1",0.544}, … {"Fads2",1.26}]17.256-32.781.7514e-33
"GO:0007186""G protein-coupled receptor sig…2.8894e-323.9755e-29"Or2ag20:1.69, Apoe:0.842000000…[{"Or2ag20",1.69}, {"Apoe",0.842}, … {"Apoa1",1.44}]70.384-40.4670.0001940.013482"Ccl5:3.04, Adgra3:-1.11, Ccl9:…[{"Ccl5",3.04}, {"Adgra3",-1.11}, … {"Cxcl10",4.18}]50.685-11.6933.9755e-29
"GO:0002376""immune system process"1.4476e-301.7925e-27"Hmgb2:-1.74, Cadm1:1.45, Serpi…[{"Hmgb2",-1.74}, {"Cadm1",1.45}, … {"Rsad2",3.72}]182.7968-54.58178.6964e-461.0769e-41"Iigp1:1.23, Themis2:3.46, C1ra…[{"Iigp1",1.23}, {"Themis2",3.46}, … {"Slc15a3",1.79}]162.701-15.0331.0769e-41
"GO:0004029""aldehyde dehydrogenase (NAD+) …0.0026380.034555"Aldh1a1:-0.447, Aldh1a7:0.728,…[{"Aldh1a1",-0.447}, {"Aldh1a7",0.728}, … {"Aldh7a1",-0.936}]1.8012-1.3830.0000930.005015"Aldh7a1:-0.81, Aldh2:-0.586, A…[{"Aldh7a1",-0.81}, {"Aldh2",-0.586}, … {"Aldh1a1",-0.588}]0.0-3.7150.005015
"GO:0008391""arachidonic acid monooxygenase…0.0026380.034555"Cyp4a10:0.716, Cyp2d22:-0.0859…[{"Cyp4a10",0.716}, {"Cyp2d22",-0.0859}, … {"Cyp4a31",2.49}]3.206-12.53592.4159e-70.000027"Cyp2d22:-0.741, Cyp4a14:1.88, …[{"Cyp2d22",-0.741}, {"Cyp4a14",1.88}, … {"Cyp4a10",0.793}]5.633-14.3910.000027
"GO:0008047""enzyme activator activity"0.0027490.035898"Krtcap2:0.353, Timp2:0.6559999…[{"Krtcap2",0.353}, {"Timp2",0.656}, … {"Dtx3l",0.897}]5.568-3.5480.0003280.013481"Apobec1:2.0, Arl1:0.721, Cd44:…[{"Apobec1",2.0}, {"Arl1",0.721}, … {"Alox5ap",1.56}]7.7850.00.013481
"GO:0008194""UDP-glycosyltransferase activi…0.0037990.04697"Ugt3a2:-0.982, Ugt2b37:1.76, B…[{"Ugt3a2",-0.982}, {"Ugt2b37",1.76}, … {"Ugt2b1",-2.29}]3.98-11.1850.0002740.011994"Ugt2a3:-0.9640000000000001, Ug…[{"Ugt2a3",-0.964}, {"Ugt2b5",-1.13}, … {"Ugt3a2",-0.54}]1.79-6.4040.011994
"GO:0016655""oxidoreductase activity, actin…0.0039010.047819"Akr1c12:-1.14, Akr1c6:-1.38, A…[{"Akr1c12",-1.14}, {"Akr1c6",-1.38}, … {"Aifm2",1.16}]2.61-5.540.0005250.020257"Akr1c6:-0.8140000000000001, Ai…[{"Akr1c6",-0.814}, {"Aifm2",0.658}, … {"Dcxr",-0.752}]4.348-1.5660.020257
" + ], + "text/plain": [ + "shape: (353, 15)\n", + "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n", + "│ GO ┆ term ┆ p_uncorre ┆ p_fdr_bh ┆ … ┆ parsed_te ┆ positive ┆ negative ┆ min │\n", + "│ --- ┆ --- ┆ cted ┆ --- ┆ ┆ rms_y ┆ effect t3 ┆ effect t3 ┆ p_fdr_bh │\n", + "│ str ┆ str ┆ --- ┆ f64 ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ ┆ ┆ f64 ┆ ┆ ┆ list[stru ┆ f64 ┆ f64 ┆ f64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ct[2]] ┆ ┆ ┆ │\n", + "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n", + "│ GO:000662 ┆ lipid ┆ 2.7545e-6 ┆ 3.4109e-5 ┆ … ┆ [{\"Scp2\", ┆ 58.609 ┆ -66.752 ┆ 3.4109e- │\n", + "│ 9 ┆ metabolic ┆ 1 ┆ 7 ┆ ┆ -0.752}, ┆ ┆ ┆ 57 │\n", + "│ ┆ process ┆ ┆ ┆ ┆ {\"Ces1g\", ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ -0.… ┆ ┆ ┆ │\n", + "│ GO:000760 ┆ sensory ┆ 6.1322e-4 ┆ 1.8984e-3 ┆ … ┆ [{\"Or1e32 ┆ 6.888 ┆ 0.0 ┆ 1.8984e- │\n", + "│ 8 ┆ perceptio ┆ 2 ┆ 8 ┆ ┆ \",2.9}, ┆ ┆ ┆ 38 │\n", + "│ ┆ n of ┆ ┆ ┆ ┆ {\"Or4f14b ┆ ┆ ┆ │\n", + "│ ┆ smell ┆ ┆ ┆ ┆ \",1.… ┆ ┆ ┆ │\n", + "│ GO:000663 ┆ fatty ┆ 7.0720e-3 ┆ 1.7514e-3 ┆ … ┆ [{\"Acsl1\" ┆ 17.256 ┆ -32.78 ┆ 1.7514e- │\n", + "│ 1 ┆ acid ┆ 7 ┆ 3 ┆ ┆ ,-0.815}, ┆ ┆ ┆ 33 │\n", + "│ ┆ metabolic ┆ ┆ ┆ ┆ {\"Fads1\", ┆ ┆ ┆ │\n", + "│ ┆ process ┆ ┆ ┆ ┆ 0.… ┆ ┆ ┆ │\n", + "│ GO:000718 ┆ G protein ┆ 2.8894e-3 ┆ 3.9755e-2 ┆ … ┆ [{\"Ccl5\", ┆ 50.685 ┆ -11.693 ┆ 3.9755e- │\n", + "│ 6 ┆ -coupled ┆ 2 ┆ 9 ┆ ┆ 3.04}, ┆ ┆ ┆ 29 │\n", + "│ ┆ receptor ┆ ┆ ┆ ┆ {\"Adgra3\" ┆ ┆ ┆ │\n", + "│ ┆ sig… ┆ ┆ ┆ ┆ ,-1.1… ┆ ┆ ┆ │\n", + "│ GO:000237 ┆ immune ┆ 1.4476e-3 ┆ 1.7925e-2 ┆ … ┆ [{\"Iigp1\" ┆ 162.701 ┆ -15.033 ┆ 1.0769e- │\n", + "│ 6 ┆ system ┆ 0 ┆ 7 ┆ ┆ ,1.23}, ┆ ┆ ┆ 41 │\n", + "│ ┆ process ┆ ┆ ┆ ┆ {\"Themis2 ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ \",3.… ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ GO:000402 ┆ aldehyde ┆ 0.002638 ┆ 0.034555 ┆ … ┆ [{\"Aldh7a ┆ 0.0 ┆ -3.715 ┆ 0.005015 │\n", + "│ 9 ┆ dehydroge ┆ ┆ ┆ ┆ 1\",-0.81} ┆ ┆ ┆ │\n", + "│ ┆ nase ┆ ┆ ┆ ┆ , {\"Aldh2 ┆ ┆ ┆ │\n", + "│ ┆ (NAD+) … ┆ ┆ ┆ ┆ \",-… ┆ ┆ ┆ │\n", + "│ GO:000839 ┆ arachidon ┆ 0.002638 ┆ 0.034555 ┆ … ┆ [{\"Cyp2d2 ┆ 5.633 ┆ -14.391 ┆ 0.000027 │\n", + "│ 1 ┆ ic acid ┆ ┆ ┆ ┆ 2\",-0.741 ┆ ┆ ┆ │\n", + "│ ┆ monooxyge ┆ ┆ ┆ ┆ }, {\"Cyp4 ┆ ┆ ┆ │\n", + "│ ┆ nase… ┆ ┆ ┆ ┆ a14… ┆ ┆ ┆ │\n", + "│ GO:000804 ┆ enzyme ┆ 0.002749 ┆ 0.035898 ┆ … ┆ [{\"Apobec ┆ 7.785 ┆ 0.0 ┆ 0.013481 │\n", + "│ 7 ┆ activator ┆ ┆ ┆ ┆ 1\",2.0}, ┆ ┆ ┆ │\n", + "│ ┆ activity ┆ ┆ ┆ ┆ {\"Arl1\",0 ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ .72… ┆ ┆ ┆ │\n", + "│ GO:000819 ┆ UDP-glyco ┆ 0.003799 ┆ 0.04697 ┆ … ┆ [{\"Ugt2a3 ┆ 1.79 ┆ -6.404 ┆ 0.011994 │\n", + "│ 4 ┆ syltransf ┆ ┆ ┆ ┆ \",-0.964} ┆ ┆ ┆ │\n", + "│ ┆ erase ┆ ┆ ┆ ┆ , {\"Ugt2b ┆ ┆ ┆ │\n", + "│ ┆ activi… ┆ ┆ ┆ ┆ 5\",… ┆ ┆ ┆ │\n", + "│ GO:001665 ┆ oxidoredu ┆ 0.003901 ┆ 0.047819 ┆ … ┆ [{\"Akr1c6 ┆ 4.348 ┆ -1.566 ┆ 0.020257 │\n", + "│ 5 ┆ ctase ┆ ┆ ┆ ┆ \",-0.814} ┆ ┆ ┆ │\n", + "│ ┆ activity, ┆ ┆ ┆ ┆ , {\"Aifm2 ┆ ┆ ┆ │\n", + "│ ┆ actin… ┆ ┆ ┆ ┆ \",0… ┆ ┆ ┆ │\n", + "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.HConcatChart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import altair as alt\n", + "import sys\n", + "from IPython.display import display\n", + "# we cannot use vegafusion here because it makes the point selection impossible since\n", + "# it prunes the required ext_gene column\n", + "#alt.data_transformers.enable(\"vegafusion\")\n", + "xlabel = f\"effect {label_x}\"\n", + "ylabel = f\"effect {label_y}\"\n", + "df_filtered = combined.filter((pl.col(effect_x_pos) != 0) & (pl.col(effect_y_pos) != 0) & (pl.col(effect_x_neg) != 0) & (pl.col(effect_y_neg) != 0))\n", + "df_filtered = df_filtered.with_columns(\n", + " pl.max_horizontal(abs(pl.col(effect_x_pos) - pl.col(effect_y_pos)), abs(pl.col(effect_x_neg) - pl.col(effect_y_neg))).alias(\"difference\")\n", + ")\n", + "combined_sorted = df_filtered.sort(\"difference\", descending=True)\n", + "combined_pd = combined_sorted.select(\n", + " pl.col(\"GO\", \"term\", \"min p_fdr_bh\", effect_x_pos, effect_y_pos, effect_x_neg, effect_y_neg)\n", + ").to_pandas()\n", + "# combined_pd = combined_pd[\"GO\"].astype(str)\n", + "\n", + "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n", + "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/go_terms/meta_compare_etoh_wt_mgl_vs_t3.tsv\", sep=\"\\t\", index=False)\n", + "\n", + "point_selector = alt.selection_point(fields=[\"term\"], empty=False)\n", + "\n", + "\n", + "def plot(df, effect_x, effect_y, title, xlabel, ylabel):\n", + " # Filter out rows where either effect_x or effect_y is zero because of logarithmic scale\n", + "\n", + " df = df_filtered.select(\n", + " pl.col(\"GO\", \"term\", \"min p_fdr_bh\", effect_x, effect_y)\n", + " ).to_pandas()\n", + "\n", + "\n", + " effects = df_filtered.select(pl.col(effect_x), pl.col(effect_y))\n", + " min_value = effects.min().min_horizontal()[0]\n", + " max_value = effects.max().max_horizontal()[0]\n", + "\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + "\n", + " points = alt.Chart(df).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n", + " alt.X(effect_x, title=xlabel, scale=alt.Scale(type='log', nice=False), axis=alt.Axis(grid=False)),\n", + " alt.Y(effect_y, title=ylabel, scale=alt.Scale(type='log', nice=False), axis=alt.Axis(grid=False)),\n", + " alt.Color(\"min p_fdr_bh\", scale=alt.Scale(scheme=\"viridis\")),\n", + " opacity=alt.value(0.5),\n", + " )\n", + "\n", + " line = alt.Chart(\n", + " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n", + " ).mark_line(color=\"lightgrey\").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " strokeDash=alt.value([5, 5]),\n", + " )\n", + " \n", + " text_background = alt.Chart(df).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5,\n", + " fill='white',\n", + " stroke='white',\n", + " strokeWidth=5,\n", + " ).encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"term\", alt.value(\"\")),\n", + " )\n", + "\n", + " text = alt.Chart(df).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5, \n", + " \n", + " ).encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"term\", alt.value(\"\")),\n", + " )\n", + " \n", + "\n", + " chart = alt.layer(line, points, text_background, text).add_params(\n", + " point_selector\n", + " ).properties(\n", + " title=title\n", + " ).interactive()\n", + " return chart\n", + "\n", + "# Update the plot function calls to include the logarithmic scale and filter out zero values\n", + "positive_chart = plot(combined_pd, effect_x_pos, effect_y_pos, \"Positive effect\", xlabel, ylabel)\n", + "negative_chart = plot(combined_pd, effect_x_neg, effect_y_neg, \"Negative effect\", xlabel, ylabel)\n", + "\n", + "final_chart = alt.hconcat(positive_chart, negative_chart).resolve_scale(\n", + " color='shared'\n", + ")\n", + "\n", + "display(final_chart)\n", + "\n", + "# final_chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.html\", inline=True)\n", + "final_chart.save(snakemake.output[1], inline=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/workflow/scripts/compare_pathways.py.ipynb b/workflow/scripts/compare_pathways.py.ipynb new file mode 100644 index 00000000..d3937ffc --- /dev/null +++ b/workflow/scripts/compare_pathways.py.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import pyreadr\n", + "import polars as pl\n", + "import polars.selectors as cs\n", + "diffexp_x = pl.read_csv(snakemake.input[0], separator=\"\\t\").lazy()\n", + "diffexp_y = pl.read_csv(snakemake.input[1], separator=\"\\t\").lazy()\n", + "label_x = snakemake.params.labels[0]\n", + "label_y = snakemake.params.labels[1]\n", + "\n", + "\n", + "# diffexp_x = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/etoh_mgl_in_wt_vs_etho_in_wt_liver.pathways.tsv\", separator=\"\\t\").lazy()\n", + "# diffexp_y = pl.read_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/etoh_t3_in_wt_vs_etoh_in_wt_liver.pathways.tsv\", separator=\"\\t\").lazy()\n", + "# label_x = \"mgl\"\n", + "# label_y = \"t3\"\n", + "\n", + "effect_x = f\"effect {label_x}\"\n", + "effect_y = f\"effect {label_y}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare(df):\n", + " # Select necessary columns and filter\n", + " df = df.select([\n", + " cs.by_name(\"Name\", \"Combined p-value\", \"Combined FDR\", \"total perturbation accumulation\")\n", + " ])\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "prepared_diffexp_x = prepare(diffexp_x)\n", + "prepared_diffexp_y = prepare(diffexp_y)\n", + "combined = prepared_diffexp_x.join(\n", + " prepared_diffexp_y, on=[\"Name\"], suffix=\"_y\"\n", + ").with_columns(\n", + " pl.min_horizontal(\"Combined FDR\", \"Combined FDR_y\").alias(\"fdr_min\")\n", + ").filter(\n", + " pl.col(\"fdr_min\") <= 0.05\n", + ").rename(\n", + " {\n", + " \"total perturbation accumulation\": effect_x,\n", + " \"total perturbation accumulation_y\": effect_y,\n", + " \"fdr_min\": \"min fdr\",\n", + " }\n", + ").collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (426, 8)
NameCombined p-valueCombined FDReffect mglCombined p-value_yCombined FDR_yeffect t3min fdr
strf64f64f64f64f64f64f64
"Metabolism of RNA"5.0991e-1043.8804e-101-147.8280480.0000080.000235-47.9321923.8804e-101
"Translation"3.8445e-691.4628e-66-25.02360.0254090.101636-7.7713331.4628e-66
"rRNA processing in the nucleus…1.0766e-532.7309e-51-138.8629410.0001850.003123-37.7569922.7309e-51
"Amino acid and derivative meta…3.3796e-516.4297e-498.5169271.1433e-243.4757e-22-1.6486.4297e-49
"Major pathway of rRNA processi…6.1362e-519.3392e-49-138.4422310.000160.002854-36.965179.3392e-49
"FCERI mediated Ca+2 mobilizati…0.212740.2739341.5680.0026360.0267377.44150.026737
"DAP12 interactions"0.2329550.2944443.0382730.0059220.0467575.9825450.046757
"Interleukin-3, Interleukin-5 a…0.3445680.4009420.00.0031940.0289861.020.028986
"Collagen formation"0.4255250.483321.3033540.0044060.037203-15.3085370.037203
"G alpha (q) signalling events"0.7120480.7551753.18850.0064530.048449.23850.04844
" + ], + "text/plain": [ + "shape: (426, 8)\n", + "┌────────────┬────────────┬────────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ Name ┆ Combined ┆ Combined ┆ effect ┆ Combined ┆ Combined ┆ effect t3 ┆ min fdr │\n", + "│ --- ┆ p-value ┆ FDR ┆ mgl ┆ p-value_y ┆ FDR_y ┆ --- ┆ --- │\n", + "│ str ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ f64 ┆ f64 │\n", + "│ ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ │\n", + "╞════════════╪════════════╪════════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ Metabolism ┆ 5.0991e-10 ┆ 3.8804e-10 ┆ -147.8280 ┆ 0.000008 ┆ 0.000235 ┆ -47.93219 ┆ 3.8804e-1 │\n", + "│ of RNA ┆ 4 ┆ 1 ┆ 48 ┆ ┆ ┆ 2 ┆ 01 │\n", + "│ Translatio ┆ 3.8445e-69 ┆ 1.4628e-66 ┆ -25.0236 ┆ 0.025409 ┆ 0.101636 ┆ -7.771333 ┆ 1.4628e-6 │\n", + "│ n ┆ ┆ ┆ ┆ ┆ ┆ ┆ 6 │\n", + "│ rRNA ┆ 1.0766e-53 ┆ 2.7309e-51 ┆ -138.8629 ┆ 0.000185 ┆ 0.003123 ┆ -37.75699 ┆ 2.7309e-5 │\n", + "│ processing ┆ ┆ ┆ 41 ┆ ┆ ┆ 2 ┆ 1 │\n", + "│ in the ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ nucleus… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Amino acid ┆ 3.3796e-51 ┆ 6.4297e-49 ┆ 8.516927 ┆ 1.1433e-2 ┆ 3.4757e-2 ┆ -1.648 ┆ 6.4297e-4 │\n", + "│ and ┆ ┆ ┆ ┆ 4 ┆ 2 ┆ ┆ 9 │\n", + "│ derivative ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ meta… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Major ┆ 6.1362e-51 ┆ 9.3392e-49 ┆ -138.4422 ┆ 0.00016 ┆ 0.002854 ┆ -36.96517 ┆ 9.3392e-4 │\n", + "│ pathway of ┆ ┆ ┆ 31 ┆ ┆ ┆ ┆ 9 │\n", + "│ rRNA ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ processi… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ FCERI ┆ 0.21274 ┆ 0.273934 ┆ 1.568 ┆ 0.002636 ┆ 0.026737 ┆ 7.4415 ┆ 0.026737 │\n", + "│ mediated ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Ca+2 mobil ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ izati… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ DAP12 inte ┆ 0.232955 ┆ 0.294444 ┆ 3.038273 ┆ 0.005922 ┆ 0.046757 ┆ 5.982545 ┆ 0.046757 │\n", + "│ ractions ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Interleuki ┆ 0.344568 ┆ 0.400942 ┆ 0.0 ┆ 0.003194 ┆ 0.028986 ┆ 1.02 ┆ 0.028986 │\n", + "│ n-3, Inter ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ leukin-5 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ a… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Collagen ┆ 0.425525 ┆ 0.48332 ┆ 1.303354 ┆ 0.004406 ┆ 0.037203 ┆ -15.30853 ┆ 0.037203 │\n", + "│ formation ┆ ┆ ┆ ┆ ┆ ┆ 7 ┆ │\n", + "│ G alpha ┆ 0.712048 ┆ 0.755175 ┆ 3.1885 ┆ 0.006453 ┆ 0.04844 ┆ 9.2385 ┆ 0.04844 │\n", + "│ (q) ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ signalling ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ events ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└────────────┴────────────┴────────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "effects = combined.select(pl.col(effect_x, effect_y))\n", + "min_value = effects.min().min_horizontal()[0]\n", + "max_value = effects.max().max_horizontal()[0]\n", + "combined = combined.with_columns(\n", + " abs(pl.col(effect_x) - pl.col(effect_y)).alias(\"difference\")\n", + ")\n", + "combined_sorted = combined.sort(\"difference\", descending=True)\n", + "combined_pd = combined_sorted.select(\n", + " pl.col(\"Name\", \"min fdr\", effect_x, effect_y)\n", + ").to_pandas()\n", + "combined_pd.to_csv(snakemake.output[0], sep=\"\\t\", index=False)\n", + "# combined_pd.to_csv(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/meta_compare_etoh_wt_mgl_vs_t3.tsv\", sep=\"\\t\", index=False)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/.snakemake/conda/673ca4e70a0a7f8fe6fba3391c9fbb49_/lib/python3.12/site-packages/altair/utils/save.py:42: UserWarning: inline argument ignored for non HTML formats.\n", + " warnings.warn(\"inline argument ignored for non HTML formats.\", stacklevel=1)\n" + ] + } + ], + "source": [ + "import altair as alt\n", + "import sys\n", + "from IPython.display import display\n", + "\n", + "# we cannot use vegafusion here because it makes the point selection impossible since\n", + "# it prunes the required ext_gene column\n", + "#alt.data_transformers.enable(\"vegafusion\")\n", + "alt.data_transformers.disable_max_rows()\n", + "\n", + "\n", + "# Data transformation\n", + "alt.data_transformers.disable_max_rows()\n", + "\n", + "# Selektor für Punkte\n", + "point_selector = alt.selection_point(fields=[\"Name\"], empty=False)\n", + "\n", + "# Punkte\n", + "points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={\"content\": \"data\"}).encode(\n", + " alt.X(effect_x, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),\n", + " alt.Y(effect_y, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),\n", + " alt.Color(\"min fdr\", scale=alt.Scale(scheme=\"viridis\")),\n", + " opacity=alt.value(0.5),\n", + ")\n", + "\n", + "# Gestrichelte Linie\n", + "line = alt.Chart(\n", + " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})\n", + ").mark_line(color=\"lightgrey\").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " strokeDash=alt.value([5, 5]),\n", + ")\n", + "\n", + "# Gestrichelte Linie\n", + "x_axis = alt.Chart(\n", + " pl.DataFrame({effect_x: [0, 0], effect_y: [min_value, max_value]})\n", + ").mark_line(color=\"lightgrey\").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " strokeDash=alt.value([5, 5]),\n", + ")\n", + "\n", + "y_axis = alt.Chart(\n", + " pl.DataFrame({effect_x: [min_value, max_value], effect_y: [0, 0]})\n", + ").mark_line(color=\"lightgrey\").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " strokeDash=alt.value([5, 5]),\n", + ")\n", + "\n", + "text_background = alt.Chart(combined_pd).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5,\n", + " fill='white',\n", + " stroke='white',\n", + " strokeWidth=5,\n", + " ).encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"Name\", alt.value(\"\")),\n", + " )\n", + "\n", + "\n", + "# Text\n", + "text = alt.Chart(combined_pd).mark_text(\n", + " align=\"left\",\n", + " baseline=\"middle\",\n", + " dx=5,\n", + " dy=-5,\n", + ").encode(\n", + " x=effect_x,\n", + " y=effect_y,\n", + " text=alt.condition(point_selector, \"Name\", alt.value(\"\")),\n", + ")\n", + "\n", + "# Null-Linien für x und y\n", + "zero_lines = alt.Chart(pl.DataFrame({\"zero\": [0]})).mark_rule(color=\"black\").encode(\n", + " x=alt.X(\"zero\", axis=alt.Axis(title=\"\")), # keine Beschriftung der Achse\n", + " y=alt.Y(\"zero\", axis=alt.Axis(title=\"\")), # keine Beschriftung der Achse\n", + ")\n", + "\n", + "# Gesamtes Diagramm mit den Ebenen\n", + "chart = alt.layer(x_axis, y_axis, line, points, text_background, text).add_params(\n", + " point_selector\n", + ").interactive()\n", + "\n", + "# display(chart)\n", + "\n", + "chart.save(snakemake.output[1], inline=True)\n", + "# chart.save(\"/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/meta_comparison/pathways/etoh_wt_mgl_vs_t3.json\", inline=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}