Skip to content

Commit

Permalink
Merge pull request #355 from nf-core/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
rannick authored Apr 24, 2023
2 parents 58afbbc + 613a4f2 commit c45b22c
Show file tree
Hide file tree
Showing 23 changed files with 600 additions and 117 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v2.3.0 = [2022/04/24]

### Added

- Shell specification to bash
- COSMIC password put into quotes
- Trimmed reads QC in MultiQC
- Add `ARRIBA_VISUALISATION` to processed affected by `--skip_vis`
- Option `fusionreport_filter` to in/activate fusionreport displaying of fusions detected by 2 or more tools

### Changed

- `Arriba` visualisation now runs for FusionInspector (combined tools) results, not only `Arriba` results
- Updated metro map with trimming options and placed `Arriba` visualisation after `FusionInspector`
- Exit with error when using squid in combination with any ensembl version different from 102

### Fixed

- Channel issue with indexing of files with using `--cram squid`
- `Arriba` references published in the correct folder

### Removed

## v2.2.0 - [2022/03/13]

### Added
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ In rnafusion the full-sized test includes reference building and fusion detectio
- [Samtool](https://github.com/samtools/samtools) sort
- [Samtool](https://github.com/samtools/samtools) index
- [Arriba](https://github.com/suhrig/arriba) fusion detection
- [Arriba](https://github.com/suhrig/arriba) visualisation
5. Pizzly subworkflow
- [Kallisto](https://pachterlab.github.io/kallisto/) quantification
- [Pizzly](https://github.com/pmelsted/pizzly) fusion detection
Expand All @@ -81,6 +80,7 @@ In rnafusion the full-sized test includes reference building and fusion detectio
- [Fusion-report](https://github.com/matq007/fusion-report)
10. FusionInspector subworkflow
- [FusionInspector](https://github.com/FusionInspector/FusionInspector)
- [Arriba](https://github.com/suhrig/arriba) visualisation
11. Stringtie subworkflow
- [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml)
12. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
Expand Down
197 changes: 197 additions & 0 deletions bin/megafusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/env python

import argparse
import logging
import sys
from pathlib import Path
import pandas as pd
import ast

logger = logging.getLogger()

FUSIONINSPECTOR_MAP = {
"fusion": {"column": 0, "delimiter": "\t", "element": 0},
"chromosomeA": {"column": 7, "delimiter": ":", "element": 0},
"chromosomeB": {"column": 10, "delimiter": ":", "element": 0},
"posA": {"column": 7, "delimiter": ":", "element": 1},
"posB": {"column": 10, "delimiter": ":", "element": 1},
"strand1": {"column": 7, "delimiter": ":", "element": 2},
"strand2": {"column": 10, "delimiter": ":", "element": 2},
"geneA": {"column": 0, "delimiter": "--", "element": 0},
"geneB": {"column": 0, "delimiter": "--", "element": 1},
"split_reads": {"column": 1, "delimiter": "\t", "element": 0},
"discordant_pairs": {"column": 2, "delimiter": "\t", "element": 0},
"ffpm": {"column": 25, "delimiter": "\t", "element": 0},
}


def parse_args(argv=None):
"""Define and immediately parse command line arguments."""
parser = argparse.ArgumentParser(
description="Validate and transform a tabular samplesheet.",
epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
)
parser.add_argument(
"--fusioninspector",
metavar="FUSIONINSPECTOR",
type=Path,
help="FusionInspector output in TSV format.",
)
parser.add_argument(
"--fusionreport",
metavar="FUSIONREPORT",
type=Path,
help="Fusionreport output in TSV format.",
)
parser.add_argument("--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample")
parser.add_argument(
"--out",
metavar="OUT",
type=Path,
help="Output path.",
)
return parser.parse_args(argv)


def header_def(sample):
return '##fileformat=VCFv4.1\n\
##ALT=<ID=BND,Description="Break end">\n\
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n\
##INFO=<ID=CHRA,Number=1,Type=String,Description="Chromosome A">\n\
##INFO=<ID=CHRB,Number=1,Type=String,Description="Chromosome B">\n\
##INFO=<ID=GENEA,Number=.,Type=String,Description="Gene A">\n\
##INFO=<ID=GENEB,Number=.,Type=String,Description="Gene B">\n\
##INFO=<ID=ORIENTATION,Number=.,Type=String,Description="Strand1 and strand2 directions">\n\
##INFO=<ID=FOUND_DB,Number=.,Type=String,Description="Databases in which the fusion has been found">\n\
##INFO=<ID=ARRIBA,Number=.,Type=String,Description="Found by arriba">\n\
##INFO=<ID=FUSIONCATCHER,Number=.,Type=String,Description="Found by fusioncatcher">\n\
##INFO=<ID=PIZZLY,Number=.,Type=String,Description="Found by pizzly">\n\
##INFO=<ID=SQUID,Number=.,Type=String,Description="Found by squid">\n\
##INFO=<ID=STARFUSION,Number=.,Type=String,Description="Found by starfusion">\n\
##INFO=<ID=TOOL_HITS,Number=.,Type=Integer,Description="Number of tools that found the fusion">\n\
##INFO=<ID=SCORE,Number=.,Type=Float,Description="Score from fusionreport">\n\
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n\
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of paired-ends that support the event">\n\
##FORMAT=<ID=RV,Number=1,Type=Integer,Description="Number of split reads that support the event">\n\
##FORMAT=<ID=FFPM,Number=1,Type=Float,Description="Fusion fragments per million total RNA-seq fragments">\n\
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format(
sample
)


def read_fusioninspector(fusioninspector_file, col_num, delimiter, element):
with open(fusioninspector_file) as fusioninspector:
return [line.split()[col_num].split(delimiter)[element] for line in fusioninspector if not line.startswith("#")]


def build_fusioninspector_dataframe(file, map):
new_dict = {}
for key in FUSIONINSPECTOR_MAP:
new_dict[key] = read_fusioninspector(
file,
map[key]["column"],
map[key]["delimiter"],
map[key]["element"],
)
return pd.DataFrame.from_dict(new_dict).set_index("fusion")


def read_build_fusionreport(fusionreport_file):
with open(fusionreport_file) as fusionreport:
from_html = [line.split('rows": [')[1] for line in fusionreport if 'name="fusion_list' in line]
expression = from_html[0].split('], "tool')[0]
return pd.DataFrame.from_dict(ast.literal_eval(expression)).set_index("fusion")


def column_manipulation(df):
df["ALT"] = ""
df = df.reset_index()
df["FORMAT"] = "GT:DV:RV:FFPM"
df["ID"] = "."
df["QUAL"] = "."
df["FILTER"] = "PASS"
df["REF"] = "N"

for index, row in df.iterrows():
# ALT
if not row["strand1"] in ["+", "-"] or not row["strand2"] in ["+", "-"]:
df.loc[index, "ALT"] = "N[{}:{}[".format(df["chromosomeB"], row["posB"])
elif row["strand1"] == "-" and row["strand2"] == "-":
df.loc[index, "ALT"] = "[{}:{}[N".format(row["chromosomeB"], row["posB"])
elif row["strand1"] == "+" and row["strand2"] == "-":
df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"])
elif row["strand1"] == "-" and row["strand2"] == "+":
df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"])
else:
df.loc[index, "ALT"] = "N[{}:{}[".format(row["chromosomeB"], row["posB"])
# INFO
df.loc[index, "INFO"] = (
"SVTYPE=BND;CHRA={};CHRB={};GENEA={};GENEB={};ORIENTATION={},{};FOUND_DB={};"
"ARRIBA={};FUSIONCATCHER={};PIZZLY={};SQUID={};STARFUSION={};TOOL_HITS={};SCORE={}".format(
row["chromosomeA"],
row["chromosomeB"],
row["geneA"],
row["geneB"],
row["strand1"],
row["strand2"],
row["found_db"],
row["arriba"],
row["fusioncatcher"],
row["pizzly"],
row["squid"],
row["starfusion"],
row["tools_hits"],
row["score"],
)
)
# FORMAT
df.loc[index, "Sample"] = "./1:{}:{}:{}".format(row["split_reads"], row["discordant_pairs"], row["ffpm"])
return df


def write_vcf(df_to_print, header, out_file):
df_to_print[
[
"chromosomeA",
"posA",
"ID",
"REF",
"ALT",
"QUAL",
"FILTER",
"INFO",
"FORMAT",
"Sample",
]
].to_csv(
path_or_buf=out_file,
sep="\t",
header=None,
index=False,
)

with open(out_file, "r+") as f:
content = f.read()
f.seek(0, 0)
f.write(header.rstrip("\r\n") + "\n" + content)


def megafusion(fusioninspector_in_file, fusionreport_in_file, sample, out):
"""Convert fusion information from FusionInspector and fusion-report into a vcf file. Adapted from https://github.com/J35P312/MegaFusion"""
merged_df = build_fusioninspector_dataframe(fusioninspector_in_file, FUSIONINSPECTOR_MAP).join(
read_build_fusionreport(fusionreport_in_file), how="left"
)
write_vcf(column_manipulation(merged_df), header_def(sample), out)


def main(argv=None):
"""Coordinate argument parsing and program execution."""
args = parse_args(argv)
if not args.fusioninspector.is_file() or not args.fusionreport.is_file():
logger.error(f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!")
sys.exit(2)
megafusion(args.fusioninspector, args.fusionreport, args.sample, args.out)


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ process {
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
shell = ['/bin/bash', '-euo', 'pipefail']

errorStrategy = { task.exitStatus in [140,143,137,104,134,139] ? 'retry' : 'finish' }
maxRetries = 1
Expand Down
28 changes: 24 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,16 @@ process {
ext.prefix = { "${meta.id}.arriba" }
}

withName: ARRIBA_DOWNLOAD {
publishDir = [
path: { "${params.genomes_base}/arriba" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: ARRIBA_VISUALISATION {
ext.when = { !params.fusioninspector_only && (params.starfusion || params.all) }
publishDir = [
path: { "${params.outdir}/arriba_visualisation" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -57,6 +66,7 @@ process {

withName: FASTQC {
ext.args = '--quiet'
ext.when = { !params.skip_qc }
}

withName: FASTQC_FOR_TRIM {
Expand Down Expand Up @@ -89,6 +99,7 @@ process {
withName: FUSIONREPORT {
ext.when = { !params.skip_vis }
ext.args = "--export csv"
ext.args2 = { params.fusionreport_filter ? "--tool-cutoff 2" : "--tool-cutoff 1"}
publishDir = [
path: { "${params.outdir}/fusionreport/${meta.id}" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -129,6 +140,15 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}
withName: MEGAFUSION {
ext.when = {!params.fusioninspector_only}
}



withName: MULTIQC {
ext.when = { !params.skip_qc }
}

withName: PICARD_COLLECTRNASEQMETRICS {
ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) }
Expand Down Expand Up @@ -207,10 +227,10 @@ process {
]
}

withName: SAMTOOLS_SORT_FOR_SQUID {
withName: SAMTOOLS_SORT_FOR_SQUID_CHIMERIC {
ext.prefix = { "${meta.id}_chimeric_sorted" }
publishDir = [
path: { "${params.outdir}/samtools_sort_for_squid" },
path: { "${params.outdir}/samtools_sort_for_squid_chimeric" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand All @@ -225,11 +245,11 @@ process {
]
}

withName: SAMTOOLS_VIEW_FOR_SQUID {
withName: SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC {
ext.prefix = { "${meta.id}_chimeric" }
ext.args = { "--output-fmt bam" }
publishDir = [
path: { "${params.outdir}/samtools_view_for_squid" },
path: { "${params.outdir}/samtools_view_for_squid_chimeric" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down
Binary file modified docs/images/nf-core-rnafusion_metro_map.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit c45b22c

Please sign in to comment.