nf-core · nictru · Mar 25, 2024 · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024
diff --git a/bin/aggregate_synonyms.py b/bin/aggregate_synonyms.py
@@ -13,6 +13,8 @@
 df_affinities = pd.read_csv(args.input, index_col=0, header=0, sep="\t")
 df_genes = pd.read_csv(args.gene_map, sep="\t", index_col=0)
 
+df_affinities = df_affinities.drop(["NumPeaks", "AvgPeakDistance", "AvgPeakSize"], axis=1)
+
 conversion_dict = df_genes["gene_name"].to_dict()
 df_affinities.index = df_affinities.index.map(conversion_dict).str.upper()
 

diff --git a/bin/combine_rankings.py b/bin/combine_rankings.py
@@ -13,8 +13,12 @@
 df = pd.concat([df[['dcg']] for df in dfs])
 
 df = df.groupby(df.index).sum()
+
 df.sort_values(by=['dcg'], ascending=False, inplace=True)
 
 df['rank'] = range(1, len(df.index) + 1)
+df['dcg'] = 1 - (df['rank'] / len(df.index))
+
+df.drop(columns=['rank'], inplace=True)
 
 df.to_csv(args.output, sep='\t', index=True)
diff --git a/bin/combine_tables.py b/bin/combine_tables.py
@@ -8,7 +8,7 @@
 parser = argparse.ArgumentParser(description="Calculate statistics between two multiple files.")
 parser.add_argument("-i", "--input", type=str, nargs='+', help="List of input file paths", required=True)
 parser.add_argument("-o", "--output", type=str, help="Output file path", required=True)
-parser.add_argument("-m", "--method", type=str, choices=["mean", "sum", "ratio"], default="mean", help="Calculation method (mean, sum, ratio)")
+parser.add_argument("-m", "--method", type=str, choices=["mean", "sum", "ratio", "rank"], default="mean", help="Calculation method (mean, sum, ratio)")
 args = parser.parse_args()
 
 # Check if input and output paths are provided
@@ -18,13 +18,16 @@
 # Read all input files into a list of dataframes
 dfs = [pd.read_csv(file, sep='\t', index_col=0) for file in args.input]
 
-if args.method == "sum":
+if args.method in ["sum", "rank"]:
     index_union = dfs[0].index
+    col_union = dfs[0].columns
     for df in dfs[1:]:
         index_union = index_union.union(df.index)
+        col_union = col_union.union(df.columns)
 
-    # Add NA values for missing rows
-    dfs = [df.reindex(index_union) for df in dfs]
+    # Add zero values for missing rows
+    dfs = [df.reindex(index_union).fillna(0, inplace=False) for df in dfs]
+    dfs = [df.reindex(columns=col_union).fillna(0, inplace=False) for df in dfs]
 else:
     index_intersection = dfs[0].index
     for df in dfs[1:]:
@@ -49,6 +52,8 @@
 # Calculate the selected statistic
 if args.method == "mean":
     result = sum(dfs) / len(dfs)
+elif args.method == "rank":
+    result = 1 - (sum(dfs).rank(ascending=False) / len(dfs[0].index))
 elif args.method == "sum":
     result = sum(dfs)
 elif args.method == "ratio":

diff --git a/bin/dynamite_preprocess.py b/bin/dynamite_preprocess.py
@@ -25,8 +25,6 @@ def remove_version(gene_id):
 df_affinities = df_affinities.loc[gene_intersection]
 df_expression = df_expression.loc[gene_intersection]
 
-df_affinities = df_affinities.drop(["NumPeaks", "AvgPeakDistance", "AvgPeakSize"], axis=1)
-
 df_affinities["Expression"] = 0
 df_affinities.loc[df_expression["log2FoldChange"] > 0, "Expression"] = 1
 

diff --git a/bin/ranking.py b/bin/ranking.py
@@ -8,36 +8,45 @@
 parser = argparse.ArgumentParser(description='Create TF ranking')
 parser.add_argument('--input', type=str, help='Score file', required=True)
 parser.add_argument('--alpha', type=float, help='Alpha value', required=True)
-parser.add_argument('--output', type=str, help='Output file', required=True)
+parser.add_argument('--out_tfs', type=str, help='Output tf file', required=True)
+parser.add_argument('--out_tgs', type=str, help='Output gene file', required=True)
 
 args = parser.parse_args()
 
-df = pd.read_csv(args.input, sep='\t', header=0, index_col=0).T
-df = df.dropna(axis=1, how='all')
+df_genes = pd.read_csv(args.input, sep='\t', header=0, index_col=0)
 
 # Save whole content of the dataframe in a single, flattened list
-background = df.values.flatten().tolist()
+background = df_genes.values.flatten().tolist()
 background_median = st.median(background)
 
 def mann_whitney_u(background, foreground):
     _, p = stats.mannwhitneyu(background, foreground)
     return p
 
+df_ranking = pd.DataFrame(columns=['sum', 'mean', 'q95', 'q99', 'median', 'p-value'])
 # Transform df to have the following columns: sum, mean, q95, q99, median, p-value
-df['sum'] = df.sum(axis=1)
-df['mean'] = df.mean(axis=1)
-df['q95'] = df.quantile(0.95, axis=1)
-df['q99'] = df.quantile(0.99, axis=1)
-df['median'] = df.median(axis=1)
-df['p-value'] = df.apply(lambda x: mann_whitney_u(background, x), axis=1)
+df_ranking['sum'] = df_genes.sum()
+df_ranking['mean'] = df_genes.mean()
+df_ranking['q95'] = df_genes.quantile(0.95)
+df_ranking['q99'] = df_genes.quantile(0.99)
+df_ranking['median'] = df_genes.median()
+df_ranking['p-value'] = df_genes.apply(lambda x: mann_whitney_u(background, x))
 
-df = df[['sum', 'mean', 'q95', 'q99', 'median', 'p-value']]
-df = df[(df['median'] > background_median) & (df['p-value'] < args.alpha)]
+df_ranking = df_ranking[(df_ranking['median'] > background_median) & (df_ranking['p-value'] < args.alpha)]
 
-df.sort_values(by=['median'], ascending=False, inplace=True)
+df_ranking.sort_values(by=['median'], ascending=False, inplace=True)
 
-length = len(df.index)
-df['rank'] = range(1, length + 1)
-df['dcg'] = 1 - (df['rank'] - 1) / length
+length = len(df_ranking.index)
+df_ranking['rank'] = range(1, length + 1)
+df_ranking['dcg'] = 1 - (df_ranking['rank'] - 1) / length
 
-df.to_csv(args.output, sep='\t')
+df_ranking = df_ranking[['dcg']]
+df_ranking.to_csv(args.out_tfs, sep='\t')
+
+# Save gene-wise DCGs per TF
+significant_tfs = df_ranking.index
+df_genes = df_genes[significant_tfs]
+
+# Calculate gene-wise DCGs per TF
+df_genes = 1 - (df_genes.rank(ascending=False).astype(int) / len(df_genes.index))
+df_genes.to_csv(args.out_tgs, sep='\t')
diff --git a/conf/modules.config b/conf/modules.config
@@ -68,7 +68,15 @@ process {
         ]
     }
 
-    withName: COMBINE_RANKINGS {
+    withName: COMBINE_TFS_PER_ASSAY {
+        ext.extension = "tf_ranking.tsv"
+    }
+
+    withName: COMBINE_TGS_PER_ASSAY {
+        ext.extension = "tg_ranking.tsv"
+    }
+
+    withName: ".*:REPORT:CREATE" {
         publishDir = [
             path: { "${params.outdir}" },
             mode: params.publish_dir_mode,

diff --git a/modules/local/ranking/combine_rankings.nf b/modules/local/ranking/combine_rankings.nf
diff --git a/modules/local/ranking/ranking.nf b/modules/local/ranking/ranking.nf
@@ -12,13 +12,14 @@ process RANKING {
     val(alpha)
 
     output:
-    tuple val(meta), path("*.ranking.tsv"), emit: ranking
+    tuple val(meta), path("*.tf_ranking.tsv"), emit: tfs
+    tuple val(meta), path("*.tg_ranking.tsv"), emit: tgs
 
-    path  "versions.yml"                  , emit: versions
+    path  "versions.yml"                     , emit: versions
 
     script:
     """
-    ranking.py --input ${tf_tg_score} --alpha ${alpha} --output ${meta.id}.ranking.tsv
+    ranking.py --input ${tf_tg_score} --alpha ${alpha} --out_tfs ${meta.id}.tf_ranking.tsv --out_tgs ${meta.id}.tg_ranking.tsv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/report/create/app/__init__.py b/modules/local/report/create/app/__init__.py
diff --git a/modules/local/report/create/app/templates/__init__.py b/modules/local/report/create/app/templates/__init__.py
diff --git a/modules/local/report/create/app/templates/base.html b/modules/local/report/create/app/templates/base.html
@@ -0,0 +1,28 @@
+<!doctype html>
+<html>
+  <head>
+    <title>Report</title>
+    <link rel="stylesheet" href="https://unpkg.com/spectre.css/dist/spectre.min.css" />
+    <link rel="stylesheet" href="https://unpkg.com/spectre.css/dist/spectre-exp.min.css" />
+    <link rel="stylesheet" href="https://unpkg.com/spectre.css/dist/spectre-icons.min.css" />
+    <script src="https://cdn.plot.ly/plotly-2.30.0.min.js" charset="utf-8"></script>
+    <link rel="stylesheet" href="styles.css" />
+  </head>
+
+  <body>
+    <div style="display: flex; align-items: center; justify-content: center; height: 150px">
+      <h1>Report</h1>
+      <a href="https://nf-co.re/tfactivity">
+        <img
+          src="https://raw.githubusercontent.com/nf-core/tfactivity/dev/assets/nf-core-tfactivity_logo_light.png"
+          width="250px"
+          style="max-height: 100px"
+        />
+      </a>
+    </div>
+    {% block tabs %}{% endblock %}
+    <div class="content">{% block content %}{% endblock %}</div>
+
+    {% block scripts %}{% endblock %}
+  </body>
+</html>
diff --git a/modules/local/report/create/app/templates/configuration.html b/modules/local/report/create/app/templates/configuration.html
@@ -0,0 +1,63 @@
+{% extends "base.html" %} {% block tabs %} {% from 'macros.html' import tabs %} {{ tabs(active="conf") }} {% endblock %}
+{% block content %}
+<div class="ranking-container">
+  <p>
+    Detailed information about <code>nf-core/tfactivity</code> and its parameters can be found
+    <a href="https://nf-co.re/tfactivity/dev/parameters">here</a>.
+  </p>
+  {% for key, values in schema.items() %}
+  <div class="card" style="width: 100%">
+    <div class="accordion">
+      <input type="checkbox" id="accordion-{{ key | lower }}" name="accordion-checkbox" hidden />
+      <label class="accordion-header c-hand" for="accordion-{{ key | lower }}">
+        <i class="icon icon-arrow-right mr-1"></i>
+        {{ key }}
+      </label>
+      <div class="accordion-body">
+        {{ values["description"] }}
+        <div style="overflow: scroll; max-height: inherit">
+          <table class="table table-striped table-hover" style="margin-bottom: 50px">
+            <thead>
+              <tr>
+                <th>Name</th>
+                <th>Value</th>
+              </tr>
+            </thead>
+            <tbody>
+              {% for param, properties in values["properties"].items() %}
+              <tr>
+                <td>
+                  <div class="popover popover-right">
+                    <button class="btn">{{param}}</button>
+                    <div class="popover-container">
+                      <div class="card">
+                        <div class="card-header">
+                          <div class="card-title">{{param}}</div>
+                          <div class="card-subtitle text-gray">
+                            {{properties["type"]}} {{ " (" + properties["format"] + ")" if "format" in properties }}
+                          </div>
+                        </div>
+                        <div class="card-body">{{properties["description"]}}</div>
+                      </div>
+                    </div>
+                  </div>
+                </td>
+                {% if param in params %}
+                <td><code>{{params[param]}}</code></td>
+                {% elif param in params["genomes"][params['genome']] %}
+                <td><code>{{params["genomes"][params['genome']][param]}}</code></td>
+                {% else %}
+                <td><i>Undefined</i></td>
+                {% endif %}
+              </tr>
+              {% endfor %}
+            </tbody>
+          </table>
+        </div>
+      </div>
+    </div>
+  </div>
+  {% endfor %}
+</div>
+
+{% endblock %}
diff --git a/modules/local/report/create/app/templates/macros.html b/modules/local/report/create/app/templates/macros.html
@@ -0,0 +1,78 @@
+{% macro tabs(active="tf") %}
+<ul class="tab tab-block">
+  <li class="tab-item {{ 'active' if active=='tf'}}">
+    <a href="index.html">Transcription factors</a>
+  </li>
+  <li class="tab-item {{ 'active' if active=='tg'}}">
+    <a href="target_genes.html">Target genes</a>
+  </li>
+  <li class="tab-item {{ 'active' if active=='snp'}}">
+    <a href="snps.html">SNPs</a>
+  </li>
+  <li class="tab-item {{ 'active' if active=='conf'}}">
+    <a href="configuration.html">Configuration</a>
+  </li>
+</ul>
+{% endmacro %} {% macro tfGeneral(tf, tgs, pairings, tfDiffExp, plotsEnabled=true) %}
+<h2>Top target genes (<a id="primary-{{tf}}-gprofiler" target="_blank">g:Profiler</a>)</h2>
+<div style="display: flex; flex-wrap: wrap">
+  {% for tg in tgs %}
+  <span class="chip" id="primary-{{tf}}-secondary-{{tg}}">{{ tg }}</span>
+  {% endfor %}
+</div>
+{% if plotsEnabled %}
+<div class="divider"></div>
+<h2>Log2fc</h2>
+
+<div id="primary-{{tf}}-log2fc"></div>
+<script>
+  Plotly.newPlot(document.getElementById('primary-{{tf}}-log2fc'), [{
+      y: {{ pairings | tojson}},
+      x: [{% for pairing in pairings %} {{ tfDiffExp[pairing] }} {% if not loop.last %}, {% endif %} {% endfor %}],
+      type: 'bar',
+      orientation: 'h'
+    }], {
+    margin: { t: 0 }
+  }, {responsive: true});
+</script>
+<div class="divider"></div>
+<h2>TPM</h2>
+<div id="primary-{{tf}}-tpm"></div>
+<script>
+  Plotly.newPlot(
+    document.getElementById("primary-{{tf}}-tpm"),
+    [
+      {
+        y: ["giraffes", "orangutans", "monkeys"],
+        x: [20, 14, 23],
+        type: "bar",
+        orientation: "h",
+      },
+    ],
+    {
+      margin: { t: 0 },
+    },
+    { responsive: true },
+  );
+</script>
+<div class="divider"></div>
+<h2>Mean expression</h2>
+<div id="primary-{{tf}}-meanExp"></div>
+<script>
+  Plotly.newPlot(
+    document.getElementById("primary-{{tf}}-meanExp"),
+    [
+      {
+        y: ["giraffes", "orangutans", "monkeys"],
+        x: [20, 14, 23],
+        type: "bar",
+        orientation: "h",
+      },
+    ],
+    {
+      margin: { t: 0 },
+    },
+    { responsive: true },
+  );
+</script>
+{% endif %} {% endmacro %}