diff --git a/CHANGELOG.md b/CHANGELOG.md index 2569db82..ff835124 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.3.0 = [2022/04/24] + +### Added + +- Shell specification to bash +- COSMIC password put into quotes +- Trimmed reads QC in MultiQC +- Add `ARRIBA_VISUALISATION` to processed affected by `--skip_vis` +- Option `fusionreport_filter` to in/activate fusionreport displaying of fusions detected by 2 or more tools + +### Changed + +- `Arriba` visualisation now runs for FusionInspector (combined tools) results, not only `Arriba` results +- Updated metro map with trimming options and placed `Arriba` visualisation after `FusionInspector` +- Exit with error when using squid in combination with any ensembl version different from 102 + +### Fixed + +- Channel issue with indexing of files with using `--cram squid` +- `Arriba` references published in the correct folder + +### Removed + ## v2.2.0 - [2022/03/13] ### Added diff --git a/README.md b/README.md index f5ee4390..d5ccda0f 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ In rnafusion the full-sized test includes reference building and fusion detectio - [Samtool](https://github.com/samtools/samtools) sort - [Samtool](https://github.com/samtools/samtools) index - [Arriba](https://github.com/suhrig/arriba) fusion detection - - [Arriba](https://github.com/suhrig/arriba) visualisation 5. Pizzly subworkflow - [Kallisto](https://pachterlab.github.io/kallisto/) quantification - [Pizzly](https://github.com/pmelsted/pizzly) fusion detection @@ -81,6 +80,7 @@ In rnafusion the full-sized test includes reference building and fusion detectio - [Fusion-report](https://github.com/matq007/fusion-report) 10. FusionInspector subworkflow - [FusionInspector](https://github.com/FusionInspector/FusionInspector) + - [Arriba](https://github.com/suhrig/arriba) visualisation 11. Stringtie subworkflow - [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml) 12. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) diff --git a/bin/megafusion.py b/bin/megafusion.py new file mode 100644 index 00000000..53e5a23d --- /dev/null +++ b/bin/megafusion.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python + +import argparse +import logging +import sys +from pathlib import Path +import pandas as pd +import ast + +logger = logging.getLogger() + +FUSIONINSPECTOR_MAP = { + "fusion": {"column": 0, "delimiter": "\t", "element": 0}, + "chromosomeA": {"column": 7, "delimiter": ":", "element": 0}, + "chromosomeB": {"column": 10, "delimiter": ":", "element": 0}, + "posA": {"column": 7, "delimiter": ":", "element": 1}, + "posB": {"column": 10, "delimiter": ":", "element": 1}, + "strand1": {"column": 7, "delimiter": ":", "element": 2}, + "strand2": {"column": 10, "delimiter": ":", "element": 2}, + "geneA": {"column": 0, "delimiter": "--", "element": 0}, + "geneB": {"column": 0, "delimiter": "--", "element": 1}, + "split_reads": {"column": 1, "delimiter": "\t", "element": 0}, + "discordant_pairs": {"column": 2, "delimiter": "\t", "element": 0}, + "ffpm": {"column": 25, "delimiter": "\t", "element": 0}, +} + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "--fusioninspector", + metavar="FUSIONINSPECTOR", + type=Path, + help="FusionInspector output in TSV format.", + ) + parser.add_argument( + "--fusionreport", + metavar="FUSIONREPORT", + type=Path, + help="Fusionreport output in TSV format.", + ) + parser.add_argument("--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample") + parser.add_argument( + "--out", + metavar="OUT", + type=Path, + help="Output path.", + ) + return parser.parse_args(argv) + + +def header_def(sample): + return '##fileformat=VCFv4.1\n\ +##ALT=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( + sample + ) + + +def read_fusioninspector(fusioninspector_file, col_num, delimiter, element): + with open(fusioninspector_file) as fusioninspector: + return [line.split()[col_num].split(delimiter)[element] for line in fusioninspector if not line.startswith("#")] + + +def build_fusioninspector_dataframe(file, map): + new_dict = {} + for key in FUSIONINSPECTOR_MAP: + new_dict[key] = read_fusioninspector( + file, + map[key]["column"], + map[key]["delimiter"], + map[key]["element"], + ) + return pd.DataFrame.from_dict(new_dict).set_index("fusion") + + +def read_build_fusionreport(fusionreport_file): + with open(fusionreport_file) as fusionreport: + from_html = [line.split('rows": [')[1] for line in fusionreport if 'name="fusion_list' in line] + expression = from_html[0].split('], "tool')[0] + return pd.DataFrame.from_dict(ast.literal_eval(expression)).set_index("fusion") + + +def column_manipulation(df): + df["ALT"] = "" + df = df.reset_index() + df["FORMAT"] = "GT:DV:RV:FFPM" + df["ID"] = "." + df["QUAL"] = "." + df["FILTER"] = "PASS" + df["REF"] = "N" + + for index, row in df.iterrows(): + # ALT + if not row["strand1"] in ["+", "-"] or not row["strand2"] in ["+", "-"]: + df.loc[index, "ALT"] = "N[{}:{}[".format(df["chromosomeB"], row["posB"]) + elif row["strand1"] == "-" and row["strand2"] == "-": + df.loc[index, "ALT"] = "[{}:{}[N".format(row["chromosomeB"], row["posB"]) + elif row["strand1"] == "+" and row["strand2"] == "-": + df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) + elif row["strand1"] == "-" and row["strand2"] == "+": + df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) + else: + df.loc[index, "ALT"] = "N[{}:{}[".format(row["chromosomeB"], row["posB"]) + # INFO + df.loc[index, "INFO"] = ( + "SVTYPE=BND;CHRA={};CHRB={};GENEA={};GENEB={};ORIENTATION={},{};FOUND_DB={};" + "ARRIBA={};FUSIONCATCHER={};PIZZLY={};SQUID={};STARFUSION={};TOOL_HITS={};SCORE={}".format( + row["chromosomeA"], + row["chromosomeB"], + row["geneA"], + row["geneB"], + row["strand1"], + row["strand2"], + row["found_db"], + row["arriba"], + row["fusioncatcher"], + row["pizzly"], + row["squid"], + row["starfusion"], + row["tools_hits"], + row["score"], + ) + ) + # FORMAT + df.loc[index, "Sample"] = "./1:{}:{}:{}".format(row["split_reads"], row["discordant_pairs"], row["ffpm"]) + return df + + +def write_vcf(df_to_print, header, out_file): + df_to_print[ + [ + "chromosomeA", + "posA", + "ID", + "REF", + "ALT", + "QUAL", + "FILTER", + "INFO", + "FORMAT", + "Sample", + ] + ].to_csv( + path_or_buf=out_file, + sep="\t", + header=None, + index=False, + ) + + with open(out_file, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(header.rstrip("\r\n") + "\n" + content) + + +def megafusion(fusioninspector_in_file, fusionreport_in_file, sample, out): + """Convert fusion information from FusionInspector and fusion-report into a vcf file. Adapted from https://github.com/J35P312/MegaFusion""" + merged_df = build_fusioninspector_dataframe(fusioninspector_in_file, FUSIONINSPECTOR_MAP).join( + read_build_fusionreport(fusionreport_in_file), how="left" + ) + write_vcf(column_manipulation(merged_df), header_def(sample), out) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if not args.fusioninspector.is_file() or not args.fusionreport.is_file(): + logger.error(f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!") + sys.exit(2) + megafusion(args.fusioninspector, args.fusionreport, args.sample, args.out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index 2496946e..09f68194 100644 --- a/conf/base.config +++ b/conf/base.config @@ -13,6 +13,7 @@ process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } + shell = ['/bin/bash', '-euo', 'pipefail'] errorStrategy = { task.exitStatus in [140,143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 diff --git a/conf/modules.config b/conf/modules.config index 69471829..7b8f2787 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,7 +27,16 @@ process { ext.prefix = { "${meta.id}.arriba" } } + withName: ARRIBA_DOWNLOAD { + publishDir = [ + path: { "${params.genomes_base}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: ARRIBA_VISUALISATION { + ext.when = { !params.fusioninspector_only && (params.starfusion || params.all) } publishDir = [ path: { "${params.outdir}/arriba_visualisation" }, mode: params.publish_dir_mode, @@ -57,6 +66,7 @@ process { withName: FASTQC { ext.args = '--quiet' + ext.when = { !params.skip_qc } } withName: FASTQC_FOR_TRIM { @@ -89,6 +99,7 @@ process { withName: FUSIONREPORT { ext.when = { !params.skip_vis } ext.args = "--export csv" + ext.args2 = { params.fusionreport_filter ? "--tool-cutoff 2" : "--tool-cutoff 1"} publishDir = [ path: { "${params.outdir}/fusionreport/${meta.id}" }, mode: params.publish_dir_mode, @@ -129,6 +140,15 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + withName: MEGAFUSION { + ext.when = {!params.fusioninspector_only} + } + + + + withName: MULTIQC { + ext.when = { !params.skip_qc } + } withName: PICARD_COLLECTRNASEQMETRICS { ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } @@ -207,10 +227,10 @@ process { ] } - withName: SAMTOOLS_SORT_FOR_SQUID { + withName: SAMTOOLS_SORT_FOR_SQUID_CHIMERIC { ext.prefix = { "${meta.id}_chimeric_sorted" } publishDir = [ - path: { "${params.outdir}/samtools_sort_for_squid" }, + path: { "${params.outdir}/samtools_sort_for_squid_chimeric" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -225,11 +245,11 @@ process { ] } - withName: SAMTOOLS_VIEW_FOR_SQUID { + withName: SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC { ext.prefix = { "${meta.id}_chimeric" } ext.args = { "--output-fmt bam" } publishDir = [ - path: { "${params.outdir}/samtools_view_for_squid" }, + path: { "${params.outdir}/samtools_view_for_squid_chimeric" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/docs/images/nf-core-rnafusion_metro_map.png b/docs/images/nf-core-rnafusion_metro_map.png index c477719c..314b61a2 100644 Binary files a/docs/images/nf-core-rnafusion_metro_map.png and b/docs/images/nf-core-rnafusion_metro_map.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.svg b/docs/images/nf-core-rnafusion_metro_map.svg index 2e6d773b..38eb709b 100644 --- a/docs/images/nf-core-rnafusion_metro_map.svg +++ b/docs/images/nf-core-rnafusion_metro_map.svg @@ -26,13 +26,13 @@ inkscape:pagecheckerboard="0" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="5.1406553" - inkscape:cx="265.04403" - inkscape:cy="305.99212" + inkscape:zoom="1.1491182" + inkscape:cx="311.10813" + inkscape:cy="413.79557" inkscape:window-width="1440" - inkscape:window-height="876" + inkscape:window-height="847" inkscape:window-x="0" - inkscape:window-y="24" + inkscape:window-y="25" inkscape:window-maximized="0" inkscape:current-layer="layer1" inkscape:snap-grids="false"> @@ -63,7 +63,8 @@ id="flowPara10551" style="font-size:32px;line-height:1.25;stroke-width:2.10014px" /> + id="g14717" + transform="translate(-12.764633,0.557051)"> + + + + Arriba SQUID pizzly kallisto + hardtrimming + fastptrimming FusionCatcher - Arribavisualisation + STAR-Fusion FusionInspector + x="231.811" + y="71.951057">FusionInspector CollectWgsMetrics - SQUID annotate + x="154.09494" + y="44.439342" + id="tspan61753-2">SQUIDannotate + + + StringTie + Arribavisualisation diff --git a/docs/output.md b/docs/output.md index 8ed07454..b7afedf1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -20,9 +20,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [StringTie](#stringtie) - StringTie assembly - [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection - [Samtools](#samtools) - SAM/BAM file manipulation -- [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report - [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman and FusionGBD databases - [FusionInspector](#fusionInspector) - IGV-based visualisation tool for fusions filtered by fusion-report +- [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report for FusionInspector fusions - [Qualimap](#qualimap) - Quality control of alignment - [Picard](#picard) - Collect metrics - [FastQC](#fastqc) - Raw read quality control @@ -287,7 +287,7 @@ Samtools view is used to convert the chimeric SAM output from STAR_FOR_SQUID to #### Samtools sort -Samtools sort is used to sort BAM files from STAR_FOR_ARRIBA (for arriba visualisation) and the chimeric BAM from STAR_FOR_SQUID +Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation) and the chimeric BAM from STAR_FOR_SQUID
Output files @@ -323,6 +323,8 @@ Samtools index is used to index BAM files from STAR_FOR_ARRIBA (for arriba visua
+The score is explained [on the original fusion-report github page](https://matq007.github.io/fusion-report/#/score). + ### FusionInspector
diff --git a/docs/usage.md b/docs/usage.md index fe0f1912..5eb23ccd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -155,14 +155,29 @@ nextflow run nf-core/rnafusion \ 2. hard trimming In this case, only reads fed to fusioncatcher are trimmed. This is a harsh workaround in case of high read-through. The recommended trimming is thus the fastp_trim one. The trimming is done at 75 bp from the tails. Example usage: -````bash +```bash nextflow run nf-core/rnafusion \ -- -- ... \ --input \ --genomes_base \ --outdir \ --trim -`` +``` + +#### Filter fusions detected by 2 or more tools + +```bash +nextflow run nf-core/rnafusion \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --fusioninspector_filter + --fusionreport_filter +``` + +`--fusioninspector_filter` feed only fusions detected by 2 or more tools to fusioninspector for closer analysis (false by default). +`--fusionreport_filter` displays only fusions detected by 2 or more tools in fusionreport html index (true by default). #### Adding custom fusions to consider as well as the detected set: whitelist @@ -192,7 +207,7 @@ nextflow run nf-core/rnafusion \ --fusioninspector_fusions \ --input \ --outdir -```` +``` The custom fusion file should have the following format: @@ -201,6 +216,32 @@ GENE1--GENE2 GENE3--GENE4 ``` +#### Skipping QC + +```bash +nextflow run nf-core/rnafusion \ +--skip_qc \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all QC-related processes. + +#### Skipping visualisation + +```bash +nextflow run nf-core/rnafusion \ +--skip_vis \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all visualisation processes, including `fusion-report`, `FusionInspector` and `Arriba` visualisation. + #### Optional manual feed-in of fusion files It is possible to give the output of each tool manually using the argument: `--_fusions PATH/TO/FUSION/FILE`: this feature need more testing, don't hesitate to open an issue if you encounter problems. diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf index 1b5f87c8..b55666ca 100644 --- a/modules/local/arriba/visualisation/main.nf +++ b/modules/local/arriba/visualisation/main.nf @@ -9,7 +9,6 @@ process ARRIBA_VISUALISATION { input: tuple val(meta), path(bam), path(bai), path(fusions) - path reference path gtf path protein_domains path cytobands diff --git a/modules/local/fusioninspector/main.nf b/modules/local/fusioninspector/main.nf index 5de9f096..6f59a590 100644 --- a/modules/local/fusioninspector/main.nf +++ b/modules/local/fusioninspector/main.nf @@ -2,7 +2,7 @@ process FUSIONINSPECTOR { tag "$meta.id" label 'process_high' - conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::trinity=2.13.2 bioconda::samtools=1.9 bioconda::star=2.7.8a" + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::samtools=1.9 bioconda::star=2.7.8a" container 'docker.io/trinityctat/starfusion:1.12.0' input: @@ -10,8 +10,9 @@ process FUSIONINSPECTOR { path reference output: - path "*" , emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv + path "*" , emit: output + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -39,6 +40,7 @@ process FUSIONINSPECTOR { stub: """ touch FusionInspector.log + touch FusionInspector.fusions.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf index 7e686ce6..5dca9b2a 100644 --- a/modules/local/fusionreport/download/main.nf +++ b/modules/local/fusionreport/download/main.nf @@ -17,7 +17,7 @@ process FUSIONREPORT_DOWNLOAD { script: def args = task.ext.args ?: '' """ - fusion_report download --cosmic_usr $username --cosmic_passwd $passwd $args ./ + fusion_report download --cosmic_usr "$username" --cosmic_passwd "$passwd" $args ./ cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/megafusion/main.nf b/modules/local/megafusion/main.nf new file mode 100644 index 00000000..2965177e --- /dev/null +++ b/modules/local/megafusion/main.nf @@ -0,0 +1,41 @@ +process MEGAFUSION { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + input: + tuple val(meta), path(tsv), path(report) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*vcf") , emit: vcf + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + megafusion.py --fusioninspector $tsv --fusionreport $report --sample ${prefix} --out ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/megafusion/meta.yml b/modules/local/megafusion/meta.yml new file mode 100644 index 00000000..a25cd74c --- /dev/null +++ b/modules/local/megafusion/meta.yml @@ -0,0 +1,39 @@ +name: megafusion +description: megafusion +keywords: + - sort +tools: + - fusionreport: + description: megafusion + homepage: Adapted from https://github.com/J35P312/MegaFusion + documentation: https://github.com/J35P312/MegaFusion + doi: "" + licence: [""] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: path + description: Path to FusionInspector tsv output + pattern: "*" + - report: + type: path + description: Path to fusionreport report + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: File containing the summary of all fusions as vcf file + pattern: "*.tsv" + +authors: + - "@rannick" diff --git a/nextflow.config b/nextflow.config index 81028ad0..b82db937 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,6 +33,7 @@ params { // Filtering fusioninspector_filter = false + fusionreport_filter = true // Trimming trim = false @@ -255,7 +256,7 @@ manifest { description = """Nextflow rnafusion analysis pipeline, part of the nf-core community.""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '2.2.0' + version = '2.3.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index be0436d0..7f517b3e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -179,6 +179,12 @@ "fa_icon": "far fa-file-code", "description": "Path to fusionreport references" }, + "fusionreport_filter": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "default": true, + "description": "Display fusions identified with 2 tools or more" + }, "pizzly": { "type": "boolean", "fa_icon": "far fa-file-code", diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf index 6653974e..0712f787 100644 --- a/subworkflows/local/arriba_workflow.nf +++ b/subworkflows/local/arriba_workflow.nf @@ -1,9 +1,8 @@ include { ARRIBA } from '../../modules/nf-core/arriba/main' -include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA} from '../../modules/nf-core/samtools/index/main' -include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA} from '../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' workflow ARRIBA_WORKFLOW { take: @@ -41,11 +40,6 @@ workflow ARRIBA_WORKFLOW { ch_arriba_fusions = ARRIBA.out.fusions ch_arriba_fusion_fail = ARRIBA.out.fusions_fail.map{ meta, file -> return file} } - bam_indexed_arriba_fusions = bam_indexed.join(ch_arriba_fusions) - ARRIBA_VISUALISATION(bam_indexed_arriba_fusions, params.arriba_ref, ch_gtf, params.arriba_ref_protein_domain, params.arriba_ref_cytobands) - ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) - - ch_arriba_visualisation = ARRIBA_VISUALISATION.out.pdf if (params.cram.contains('arriba') ){ SAMTOOLS_VIEW_FOR_ARRIBA(bam_indexed, ch_fasta, []) @@ -61,13 +55,11 @@ workflow ARRIBA_WORKFLOW { .map { meta, reads, fusions -> [ meta, fusions ] } ch_arriba_fusion_fail = ch_dummy_file - ch_arriba_visualisation = ch_dummy_file } emit: fusions = ch_arriba_fusions fusions_fail = ch_arriba_fusion_fail versions = ch_versions.ifEmpty(null) - pdf = ch_arriba_visualisation } diff --git a/subworkflows/local/fusioninspector_workflow.nf b/subworkflows/local/fusioninspector_workflow.nf index c714b993..388f42a6 100644 --- a/subworkflows/local/fusioninspector_workflow.nf +++ b/subworkflows/local/fusioninspector_workflow.nf @@ -1,11 +1,16 @@ -include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' +include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { MEGAFUSION } from '../../modules/local/megafusion/main' +include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' workflow FUSIONINSPECTOR_WORKFLOW { take: reads fusion_list fusion_list_filtered + report + bam_sorted_indexed + ch_gtf main: ch_versions = Channel.empty() @@ -27,7 +32,15 @@ workflow FUSIONINSPECTOR_WORKFLOW { FUSIONINSPECTOR( reads_fusion, index) ch_versions = ch_versions.mix(FUSIONINSPECTOR.out.versions) + fusion_data = FUSIONINSPECTOR.out.tsv.join(report) + MEGAFUSION(fusion_data) + ch_versions = ch_versions.mix(MEGAFUSION.out.versions) + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only && !params.skip_vis) { + bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) + ARRIBA_VISUALISATION(bam_sorted_indexed_fusions, ch_gtf, params.arriba_ref_protein_domain, params.arriba_ref_cytobands) + ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) + } emit: versions = ch_versions.ifEmpty(null) diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index 748b8ab1..478986a4 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -13,6 +13,7 @@ workflow FUSIONREPORT_WORKFLOW { main: ch_versions = Channel.empty() + ch_report = Channel.empty() if (!params.fusioninspector_only) { reads_fusions = reads @@ -26,6 +27,7 @@ workflow FUSIONREPORT_WORKFLOW { ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + ch_report = FUSIONREPORT.out.report } else { ch_fusion_list = reads.combine(Channel.value(file(params.fusioninspector_fusions, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } @@ -37,5 +39,6 @@ workflow FUSIONREPORT_WORKFLOW { versions = ch_versions.ifEmpty(null) fusion_list = ch_fusion_list fusion_list_filtered = ch_fusion_list_filtered + report = ch_report.ifEmpty(null) } diff --git a/subworkflows/local/squid_workflow.nf b/subworkflows/local/squid_workflow.nf index 2079da61..c4f29425 100644 --- a/subworkflows/local/squid_workflow.nf +++ b/subworkflows/local/squid_workflow.nf @@ -1,5 +1,7 @@ -include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_SQUID } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC } from '../../modules/nf-core/samtools/view/main' include { SQUID } from '../../modules/local/squid/detect/main' @@ -24,33 +26,43 @@ workflow SQUID_WORKFLOW { .map { meta, reads, fusions -> [ meta, fusions ] } } else { - STAR_FOR_SQUID( reads, ch_starindex_ensembl_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') - ch_versions = ch_versions.mix(STAR_FOR_SQUID.out.versions ) + STAR_FOR_SQUID(reads, ch_starindex_ensembl_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_SQUID.out.versions) STAR_FOR_SQUID.out.sam .map { meta, sam -> return [meta, sam, []] - }.set { chimeric_sam_indexed } + }.set { chimeric_sam } - SAMTOOLS_VIEW_FOR_SQUID ( chimeric_sam_indexed, ch_fasta, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID.out.versions ) - SAMTOOLS_SORT_FOR_SQUID ( SAMTOOLS_VIEW_FOR_SQUID.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_SQUID.out.versions ) - bam_sorted = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_SORT_FOR_SQUID.out.bam ) + SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC (chimeric_sam, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.versions) + + SAMTOOLS_SORT_FOR_SQUID_CHIMERIC (SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.versions) + + bam_chimeric = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) if (params.cram.contains('squid')){ - SAMTOOLS_VIEW_FOR_SQUID_CRAM ( STAR_FOR_SQUID.out.bam_sorted, ch_fasta, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions ) - SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC ( chimeric_sam_indexed, ch_fasta, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions ) + SAMTOOLS_INDEX_FOR_SQUID(STAR_FOR_SQUID.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID.out.versions) + SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.versions) + + bam_sorted_indexed = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_SQUID.out.bai) + chimeric_sorted_indexed = SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam.join(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.bai) + + SAMTOOLS_VIEW_FOR_SQUID_CRAM (bam_sorted_indexed, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) + SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC (chimeric_sorted_indexed, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) } - SQUID ( bam_sorted ) + SQUID (bam_chimeric) ch_versions = ch_versions.mix(SQUID.out.versions) - SQUID_ANNOTATE ( SQUID.out.fusions, ch_gtf ) + SQUID_ANNOTATE (SQUID.out.fusions, ch_gtf) ch_versions = ch_versions.mix(SQUID_ANNOTATE.out.versions) ch_squid_fusions = SQUID_ANNOTATE.out.fusions_annotated diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index 1849d7e1..1656ec7a 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -13,6 +13,8 @@ workflow STARFUSION_WORKFLOW { main: ch_versions = Channel.empty() ch_align = Channel.empty() + bam_sorted_indexed = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { @@ -24,10 +26,11 @@ workflow STARFUSION_WORKFLOW { ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) ch_align = STAR_FOR_STARFUSION.out.bam_sorted + SAMTOOLS_INDEX_FOR_STARFUSION(STAR_FOR_STARFUSION.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION.out.versions) + bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) + if (params.cram.contains('starfusion')){ - SAMTOOLS_INDEX_FOR_STARFUSION(STAR_FOR_STARFUSION.out.bam_sorted) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION.out.versions) - bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) } @@ -46,10 +49,10 @@ workflow STARFUSION_WORKFLOW { ch_star_stats = Channel.empty() } emit: - fusions = ch_starfusion_fusions - star_stats = ch_star_stats - bam_sorted = ch_align - versions = ch_versions.ifEmpty(null) - + fusions = ch_starfusion_fusions + star_stats = ch_star_stats + bam_sorted = ch_align + versions = ch_versions.ifEmpty(null) + ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty(null) } diff --git a/subworkflows/local/trim_workflow.nf b/subworkflows/local/trim_workflow.nf index 7420c20e..01baeec7 100644 --- a/subworkflows/local/trim_workflow.nf +++ b/subworkflows/local/trim_workflow.nf @@ -10,6 +10,7 @@ workflow TRIM_WORKFLOW { main: ch_versions = Channel.empty() + ch_reports = Channel.empty() if (params.trim) { @@ -20,6 +21,7 @@ workflow TRIM_WORKFLOW { ch_reads_all = reads ch_reads_fusioncatcher = REFORMAT.out.reads_out + ch_reports = FASTQC_FOR_TRIM.out.zip.collect{it[1]}.ifEmpty([]) } else if (params.fastp_trim) { FASTP(reads, params.adapter_fasta, false, false) @@ -30,6 +32,11 @@ workflow TRIM_WORKFLOW { ch_reads_all = FASTP.out.reads ch_reads_fusioncatcher = ch_reads_all + ch_reports = ch_reports.mix( + FASTQC_FOR_FASTP.out.zip.collect{it[1]}.ifEmpty([]), + FASTP.out.json.collect{meta, json -> json}, + FASTP.out.html.collect{meta, html -> html} + ) } else { ch_reads_all = reads @@ -39,6 +46,7 @@ workflow TRIM_WORKFLOW { emit: ch_reads_all ch_reads_fusioncatcher + ch_reports versions = ch_versions.ifEmpty(null) } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index bf521135..45a9e623 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -39,7 +39,7 @@ if (params.fasta[0,1] == "s3") { else { for (param in checkPathParamList) if ((param.toString())!= file(param).toString() && !params.build_references) { exit 1, "Problem with ${param}: ABSOLUTE PATHS are required! Check for trailing '/' at the end of paths too." } } -if ((params.squid || params.all) && params.ensembl_version == 105) { exit 1, 'Ensembl version 105 is not supported by squid' } +if ((params.squid || params.all) && params.ensembl_version != 102) { exit 1, 'Ensembl version is not supported by squid' } ch_fasta = file(params.fasta) ch_gtf = file(params.gtf) @@ -227,7 +227,10 @@ workflow RNAFUSION { FUSIONINSPECTOR_WORKFLOW ( ch_reads_all, FUSIONREPORT_WORKFLOW.out.fusion_list, - FUSIONREPORT_WORKFLOW.out.fusion_list_filtered + FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, + FUSIONREPORT_WORKFLOW.out.report, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + ch_chrgtf ) ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions.first().ifEmpty(null)) @@ -266,6 +269,9 @@ workflow RNAFUSION { ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_reports.ifEmpty([])) + + MULTIQC ( ch_multiqc_files.collect(),