Skip to content

Commit

Permalink
Use template in combine_tables
Browse files Browse the repository at this point in the history
  • Loading branch information
nictru committed Apr 21, 2024
1 parent ec03dae commit 96fad0a
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 53 deletions.
24 changes: 0 additions & 24 deletions bin/combine_rankings.py

This file was deleted.

11 changes: 1 addition & 10 deletions modules/local/combine_tables/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,5 @@ process COMBINE_TABLES {
script:
prefix = task.ext.prefix ?: "${meta.id}"
extension = task.ext.extension ?: "tsv"
"""
combine_tables.py --input ${files} --method ${method} --output ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pandas: \$(python -c "import pandas; print(pandas.__version__)")
numpy: \$(python -c "import numpy; print(numpy.__version__)")
END_VERSIONS
"""
template "combine_tables.py"
}
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
#!/usr/bin/env python3

import argparse
import numpy as np
import pandas as pd

# Define the command-line arguments
parser = argparse.ArgumentParser(description="Calculate statistics between two multiple files.")
parser.add_argument("-i", "--input", type=str, nargs='+', help="List of input file paths", required=True)
parser.add_argument("-o", "--output", type=str, help="Output file path", required=True)
parser.add_argument("-m", "--method", type=str, choices=["mean", "sum", "ratio", "rank"], default="mean", help="Calculation method (mean, sum, ratio)")
args = parser.parse_args()

# Check if input and output paths are provided
if not args.input or not args.output:
parser.error("Input and output paths are required.")
import platform

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.
Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str

method = "$method"
if method not in ["mean", "sum", "ratio", "rank"]:
raise ValueError("Invalid method. Must be one of 'mean', 'sum', 'ratio', 'rank'.")

# Read all input files into a list of dataframes
dfs = [pd.read_csv(file, sep='\t', index_col=0) for file in args.input]
dfs = [pd.read_csv(file, sep='\\t', index_col=0) for file in "${files.join(' ')}".split()]

if args.method in ["sum", "rank"]:
if method in ["sum", "rank"]:
index_union = dfs[0].index
col_union = dfs[0].columns
for df in dfs[1:]:
Expand Down Expand Up @@ -50,13 +62,13 @@
raise ValueError("The input files must have the same column names.")

# Calculate the selected statistic
if args.method == "mean":
if method == "mean":
result = sum(dfs) / len(dfs)
elif args.method == "rank":
elif method == "rank":
result = 1 - (sum(dfs).rank(ascending=False) / len(dfs[0].index))
elif args.method == "sum":
elif method == "sum":
result = sum(dfs)
elif args.method == "ratio":
elif method == "ratio":
if len(dfs) != 2:
raise ValueError("The ratio method requires exactly two input files.")

Expand All @@ -73,4 +85,16 @@
print(f"Number of rows after dropping NA or inf values: {len(result)}")

# Write the result to a file
result.to_csv(args.output, sep='\t', index=True, quoting=0)
result.to_csv("${prefix}.${extension}", sep='\\t', index=True, quoting=0)

# Create version file
versions = {
"${task.process}" : {
"python": platform.python_version(),
"pandas": pd.__version__,
"numpy": np.__version__
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))

0 comments on commit 96fad0a

Please sign in to comment.