diff --git a/CHANGELOG.md b/CHANGELOG.md index d1e27f7698..08a95781ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1339](https://github.com/nf-core/sarek/pull/1339) - Update sentieon-modules - [#1344](https://github.com/nf-core/sarek/pull/1344) - Enable CRAM QC, when starting from variantcalling +- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local modules +- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local subworkflows - [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11` ### Fixed diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index e948e1eea5..e54138538c 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -164,6 +164,12 @@ process { } withName: 'UNTAR_CHR_DIR' { + ext.prefix = 'chr_dir' ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/" }, + saveAs: { (params.save_reference || params.build_only_index) && !it.equals('versions.yml') ? it : null } + ] } } diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config index 655a227f8a..65e97512fc 100644 --- a/conf/modules/prepare_intervals.config +++ b/conf/modules/prepare_intervals.config @@ -33,7 +33,7 @@ process { ] } - withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' { ext.prefix = {"${meta.id}"} publishDir = [ mode: params.publish_dir_mode, diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 88160ccbf4..6a3c9c5a47 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -9,6 +9,7 @@ process CREATE_INTERVALS_BED { input: path(intervals) + val(nucleotides_per_second) output: path("*.bed") , emit: bed @@ -27,7 +28,7 @@ process CREATE_INTERVALS_BED { t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${params.nucleotides_per_second} + t = (\$3 - \$2) / ${nucleotides_per_second} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index f561ea420c..f8717e6b66 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -59,7 +59,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { cram, allele_files, loci_files, - intervals_bed_combined, + (wes ? intervals_bed_combined : []), // No intervals needed if not WES fasta, gc_file, rt_file diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf index 64f45508ab..22802cfb58 100644 --- a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf @@ -21,7 +21,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT { ch_versions = Channel.empty() - if (!params.wes) intervals_bed = [] // No intervals needed if not WES ASCAT(cram_pair, allele_files, loci_files, intervals_bed, fasta, gc_file, rt_file) ch_versions = ch_versions.mix(ASCAT.out.versions) diff --git a/subworkflows/local/channel_align_create_csv/main.nf b/subworkflows/local/channel_align_create_csv/main.nf index 692ffa0ef4..965b36d33d 100644 --- a/subworkflows/local/channel_align_create_csv/main.nf +++ b/subworkflows/local/channel_align_create_csv/main.nf @@ -4,20 +4,22 @@ workflow CHANNEL_ALIGN_CREATE_CSV { take: - bam_indexed // channel: [mandatory] meta, bam, bai + bam_indexed // channel: [mandatory] meta, bam, bai + outdir // + save_output_as_bam // main: // Creating csv files to restart from this step - bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai -> + bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, bam, bai -> patient = meta.patient sample = meta.sample sex = meta.sex status = meta.status - bam = "${params.outdir}/preprocessing/mapped/${sample}/${bam.name}" - bai = "${params.outdir}/preprocessing/mapped/${sample}/${bai.name}" + bam = "${outdir}/preprocessing/mapped/${sample}/${bam.name}" + bai = "${outdir}/preprocessing/mapped/${sample}/${bai.name}" - type = params.save_output_as_bam ? "bam" : "cram" - type_index = params.save_output_as_bam ? "bai" : "crai" + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" ["mapped.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${bam},${bai}\n"] } diff --git a/subworkflows/local/channel_applybqsr_create_csv/main.nf b/subworkflows/local/channel_applybqsr_create_csv/main.nf index 2396574ced..faa9aa5293 100644 --- a/subworkflows/local/channel_applybqsr_create_csv/main.nf +++ b/subworkflows/local/channel_applybqsr_create_csv/main.nf @@ -5,19 +5,21 @@ workflow CHANNEL_APPLYBQSR_CREATE_CSV { take: cram_recalibrated_index // channel: [mandatory] meta, cram, crai + outdir // + save_output_as_bam // main: // Creating csv files to restart from this step - cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index -> + cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index -> patient = meta.patient sample = meta.sample sex = meta.sex status = meta.status - file = "${params.outdir}/preprocessing/recalibrated/${sample}/${file.name}" - index = "${params.outdir}/preprocessing/recalibrated/${sample}/${index.name}" + file = "${outdir}/preprocessing/recalibrated/${sample}/${file.name}" + index = "${outdir}/preprocessing/recalibrated/${sample}/${index.name}" - type = params.save_output_as_bam ? "bam" : "cram" - type_index = params.save_output_as_bam ? "bai" : "crai" + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" ["recalibrated.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${file},${index}\n"] } diff --git a/subworkflows/local/channel_baserecalibrator_create_csv/main.nf b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf index 8bbfacc85c..88ff0cfc38 100644 --- a/subworkflows/local/channel_baserecalibrator_create_csv/main.nf +++ b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf @@ -4,11 +4,11 @@ workflow CHANNEL_BASERECALIBRATOR_CREATE_CSV { take: - cram_table_bqsr // channel: [mandatory] meta, cram, crai, table - tools - skip_tools - save_output_as_bam - outdir + cram_table_bqsr // channel: [mandatory] meta, cram, crai, table + tools // + skip_tools // + outdir // + save_output_as_bam // main: // Creating csv files to restart from this step diff --git a/subworkflows/local/channel_markduplicates_create_csv/main.nf b/subworkflows/local/channel_markduplicates_create_csv/main.nf index 06e9a9826b..3cec161c02 100644 --- a/subworkflows/local/channel_markduplicates_create_csv/main.nf +++ b/subworkflows/local/channel_markduplicates_create_csv/main.nf @@ -4,10 +4,10 @@ workflow CHANNEL_MARKDUPLICATES_CREATE_CSV { take: - cram_markduplicates // channel: [mandatory] meta, cram, crai - csv_subfolder - outdir - save_output_as_bam + cram_markduplicates // channel: [mandatory] meta, cram, crai + csv_subfolder // + outdir // + save_output_as_bam // main: // Creating csv files to restart from this step diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf index b8de11bf8b..9de3fa58ba 100644 --- a/subworkflows/local/channel_variant_calling_create_csv/main.nf +++ b/subworkflows/local/channel_variant_calling_create_csv/main.nf @@ -4,15 +4,16 @@ workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { take: - vcf_to_annotate // channel: [mandatory] meta, vcf + vcf_to_annotate // channel: [mandatory] meta, vcf + outdir // main: // Creating csv files to restart from this step - vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf -> + vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${outdir}/csv"){ meta, vcf -> patient = meta.patient sample = meta.id variantcaller = meta.variantcaller - vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" + vcf = "${outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" ["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"] } } diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf index bf23ff13d4..6b75d2c6b8 100644 --- a/subworkflows/local/post_variantcalling/main.nf +++ b/subworkflows/local/post_variantcalling/main.nf @@ -13,7 +13,7 @@ workflow POST_VARIANTCALLING { main: versions = Channel.empty() - if(concatenate_vcfs){ + if (concatenate_vcfs){ CONCATENATE_GERMLINE_VCFS(vcfs) vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index f9b9e62c95..5ceb16f8aa 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -28,12 +28,12 @@ include { UNZIP as UNZIP_RT } from '../../../modules/nf- workflow PREPARE_GENOME { take: - ascat_alleles // channel: [optional] ascat allele files - ascat_loci // channel: [optional] ascat loci files - ascat_loci_gc // channel: [optional] ascat gc content file - ascat_loci_rt // channel: [optional] ascat replictiming file + ascat_alleles // params.ascat_alleles + ascat_loci // params.ascat_loci + ascat_loci_gc // params.ascat_loci_gc + ascat_loci_rt // params.ascat_loci_rt bcftools_annotations // channel: [optional] bcftools annotations file - chr_dir // channel: [optional] chromosome files + chr_dir // params.chr_dir dbsnp // channel: [optional] dbsnp fasta // channel: [mandatory] fasta fasta_fai // channel: [optional] fasta_fai @@ -53,7 +53,7 @@ workflow PREPARE_GENOME { GATK4_CREATESEQUENCEDICTIONARY(fasta) MSISENSORPRO_SCAN(fasta) - SAMTOOLS_FAIDX(fasta, [['id':null], []]) + SAMTOOLS_FAIDX(fasta, [ [ id:fasta.baseName ], [] ] ) // the following are flattened and mapped in case the user supplies more than one value for the param // written for KNOWN_INDELS, but preemptively applied to the rest @@ -66,40 +66,41 @@ workflow PREPARE_GENOME { TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - // prepare ascat reference files - allele_files = ascat_alleles - if (params.ascat_alleles && params.ascat_alleles.endsWith('.zip')) { - UNZIP_ALLELES(ascat_alleles.map{ it -> [[id:it[0].baseName], it]}) + // prepare ascat and controlfreec reference files + if (!ascat_alleles) allele_files = Channel.empty() + else if (ascat_alleles.endsWith(".zip")) { + UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) allele_files = UNZIP_ALLELES.out.unzipped_archive.map{ it[1] } versions = versions.mix(UNZIP_ALLELES.out.versions) - } + } else allele_files = Channel.fromPath(ascat_alleles).collect() - loci_files = ascat_loci - if (params.ascat_loci && params.ascat_loci.endsWith('.zip')) { - UNZIP_LOCI(ascat_loci.map{ it -> [[id:it[0].baseName], it]}) + if (!ascat_loci) loci_files = Channel.empty() + else if (ascat_loci.endsWith(".zip")) { + UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) loci_files = UNZIP_LOCI.out.unzipped_archive.map{ it[1] } versions = versions.mix(UNZIP_LOCI.out.versions) - } - gc_file = ascat_loci_gc - if (params.ascat_loci_gc && params.ascat_loci_gc.endsWith('.zip')) { - UNZIP_GC(ascat_loci_gc.map{ it -> [[id:it[0].baseName], it]}) + } else loci_files = Channel.fromPath(ascat_loci).collect() + + if (!ascat_loci_gc) gc_file = Channel.value([]) + else if (ascat_loci_gc.endsWith(".zip")) { + UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) gc_file = UNZIP_GC.out.unzipped_archive.map{ it[1] } versions = versions.mix(UNZIP_GC.out.versions) - } - rt_file = ascat_loci_rt - if (params.ascat_loci_rt && params.ascat_loci_rt.endsWith('.zip')) { - UNZIP_RT(ascat_loci_rt.map{ it -> [[id:it[0].baseName], it]}) + } else gc_file = Channel.fromPath(ascat_loci_gc).collect() + + if (!ascat_loci_rt) rt_file = Channel.value([]) + else if (ascat_loci_rt.endsWith(".zip")) { + UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) rt_file = UNZIP_RT.out.unzipped_archive.map{ it[1] } versions = versions.mix(UNZIP_RT.out.versions) - } + } else rt_file = Channel.fromPath(ascat_loci_rt).collect() - - chr_files = chr_dir - if (params.chr_dir && params.chr_dir.endsWith('tar.gz')) { - UNTAR_CHR_DIR(chr_dir.map{ it -> [ [ id:'chr_dir' ], it ] }) + if (!chr_dir) chr_files = Channel.value([]) + else if (chr_dir.endsWith(".tar.gz")) { + UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] } versions = versions.mix(UNTAR_CHR_DIR.out.versions) - } + } else chr_files = Channel.fromPath(chr_dir).collect() // Gather versions of all tools used versions = versions.mix(SAMTOOLS_FAIDX.out.versions) @@ -116,7 +117,7 @@ workflow PREPARE_GENOME { versions = versions.mix(TABIX_PON.out.versions) emit: - bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // bcftools_annotations.vcf.gz.tbi + bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: bcftools_annotations.vcf.gz.tbi bwa = BWAMEM1_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwa/* bwamem2 = BWAMEM2_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwamem2/* hashtable = DRAGMAP_HASHTABLE.out.hashmap.map{ meta, index -> [index] }.collect() // path: dragmap/* @@ -128,11 +129,12 @@ workflow PREPARE_GENOME { known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi - allele_files - chr_files - gc_file - loci_files - rt_file - versions // channel: [ versions.yml ] + allele_files // path: allele_files + chr_files // path: chr_files + gc_file // path: gc_file + loci_files // path: loci_files + rt_file // path: rt_file + + versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index f4079e3e81..574d234a0d 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -17,6 +17,9 @@ workflow PREPARE_INTERVALS { fasta_fai // mandatory [ fasta_fai ] intervals // [ params.intervals ] no_intervals // [ params.no_intervals ] + nucleotides_per_second + outdir + step main: versions = Channel.empty() @@ -26,21 +29,21 @@ workflow PREPARE_INTERVALS { intervals_combined = Channel.empty() // Single bed file containing all intervals if (no_intervals) { - file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" - file("${params.outdir}/no_intervals.bed.gz").text = "no_intervals\n" - file("${params.outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n" - - intervals_bed = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } - intervals_bed_gz_tbi = Channel.fromPath(file("${params.outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } - intervals_combined = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } - } else if (params.step != 'annotate' && params.step != 'controlfreec') { + file("${outdir}/no_intervals.bed").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n" + + intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } + intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } + intervals_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } + } else if (step != 'annotate' && step != 'controlfreec') { // If no interval/target file is provided, then generated intervals from FASTA file if (!intervals) { BUILD_INTERVALS(fasta_fai.map{it -> [ [ id:it.baseName ], it ] }) intervals_combined = BUILD_INTERVALS.out.bed - CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }).bed + CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second) intervals_bed = CREATE_INTERVALS_BED.out.bed @@ -48,7 +51,9 @@ workflow PREPARE_INTERVALS { versions = versions.mix(CREATE_INTERVALS_BED.out.versions) } else { intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] } - intervals_bed = CREATE_INTERVALS_BED(file(intervals)).bed + CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second) + + intervals_bed = CREATE_INTERVALS_BED.out.bed versions = versions.mix(CREATE_INTERVALS_BED.out.versions) @@ -74,7 +79,7 @@ workflow PREPARE_INTERVALS { else { start = fields[1].toInteger() end = fields[2].toInteger() - duration += (end - start) / params.nucleotides_per_second + duration += (end - start) / nucleotides_per_second } } [ duration, intervalFile ] diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 6784b4616b..985924efa8 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -1,7 +1,34 @@ workflow SAMPLESHEET_TO_CHANNEL{ take: - ch_from_samplesheet + ch_from_samplesheet // + aligner // + ascat_alleles // + ascat_loci // + ascat_loci_rt // + bcftools_annotations // + bcftools_annotations_tbi // + bcftools_header_lines // + build_only_index // + dbsnp // + fasta // + germline_resource // + intervals // + joint_germline // + joint_mutect2 // + known_indels // + known_snps // + no_intervals // + pon // + sentieon_dnascope_emit_mode // + sentieon_haplotyper_emit_mode // + seq_center // + seq_platform // + skip_tools // + step // + tools // + umi_read_structure // + wes // main: ch_from_samplesheet.dump(tag:"ch_from_samplesheet") @@ -18,31 +45,31 @@ workflow SAMPLESHEET_TO_CHANNEL{ (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items if (meta.lane && fastq_2) { meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def CN = seq_center ? "CN:${seq_center}\\t" : '' def flowcell = flowcellLaneFromFastq(fastq_1) // Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] - if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] + if (step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] else { - error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains fastq files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } // start from BAM } else if (meta.lane && bam) { - if (params.step != 'mapping' && !bai) { + if (step != 'mapping' && !bai) { error("BAM index (bai) should be provided.") } meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + def CN = seq_center ? "CN:${seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] - if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] + if (step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] else { error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } @@ -51,61 +78,61 @@ workflow SAMPLESHEET_TO_CHANNEL{ } else if (table && cram) { meta = meta + [id: meta.sample, data_type: 'cram'] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] + if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } // recalibration when skipping MarkDuplicates } else if (table && bam) { meta = meta + [id: meta.sample, data_type: 'bam'] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] + if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } // prepare_recalibration or variant_calling } else if (cram) { meta = meta + [id: meta.sample, data_type: 'cram'] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] + if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` } else if (bam) { meta = meta + [id: meta.sample, data_type: 'bam'] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] + if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } // annotation } else if (vcf) { meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] - if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] + if (step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] else { - error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Samplesheet contains vcf files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } } else { error("Missing or unknown field in csv file header. Please check your samplesheet") } } - if (params.step != 'annotate' && params.tools && !params.build_only_index) { + if (step != 'annotate' && tools && !build_only_index) { // Two checks for ensuring that the pipeline stops with a meaningful error message if // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples - if (!params.build_only_index) { + if (!build_only_index) { def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] def tools_tumor_asked = [] tools_tumor.each{ tool -> - if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + if (tools.split(',').contains(tool)) tools_tumor_asked.add(tool) } if (!tools_tumor_asked.isEmpty()) { error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) @@ -117,7 +144,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) } if (!requested_tools_requiring_normal_samples.isEmpty()) { error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) @@ -126,81 +153,76 @@ workflow SAMPLESHEET_TO_CHANNEL{ } // Fails when wrongfull extension for intervals file - if (params.wes && !params.step == 'annotate') { - if (params.intervals && !params.intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") + if (wes && !step == 'annotate') { + if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") - } else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") + } else if (intervals && !intervals.endsWith("bed") && !intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") - if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))) { + if (step == 'mapping' && aligner.contains("dragmap") && !(skip_tools && skip_tools.split(',').contains("baserecalibrator"))) { log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") } - if (params.step == 'mapping' && params.aligner.contains("sentieon-bwamem") && params.umi_read_structure) { + if (step == 'mapping' && aligner.contains("sentieon-bwamem") && umi_read_structure) { error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") } - if (params.tools && params.tools.split(',').contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) { + if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !(sentieon_haplotyper_emit_mode.contains('gvcf')))) { error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") } // Fails or warns when missing files or params for ascat - if (params.tools && params.tools.split(',').contains('ascat')) { - if (!params.ascat_alleles) { + if (tools && tools.split(',').contains('ascat')) { + if (!ascat_alleles) { error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") } - if (!params.ascat_loci) { + if (!ascat_loci) { error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") } - if (!params.ascat_loci_gc && !params.ascat_loci_rt) { + if (!ascat_loci_gc && !ascat_loci_rt) { log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") } - if (params.wes) { + if (wes) { log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") } } // Warns when missing files or params for mutect2 - if (params.tools && params.tools.split(',').contains('mutect2')) { - if (!params.pon) { + if (tools && tools.split(',').contains('mutect2')) { + if (!pon) { log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") } - if (!params.germline_resource) { + if (!germline_resource) { log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") } - if (params.pon && params.pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { + if (pon && pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") } } // Fails when missing resources for baserecalibrator // Warns when missing resources for haplotypecaller - if (!params.dbsnp && !params.known_indels) { - if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('baserecalibrator')))) { + if (!dbsnp && !known_indels) { + if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") } - if (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) { + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { log.warn "If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-" } } - if (params.joint_germline && (!params.tools || !(params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope')))) { + if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") } if ( - params.tools && + tools && ( - params.tools.split(',').contains('haplotypecaller') || - params.tools.split(',').contains('sentieon_haplotyper') || - params.tools.split(',').contains('sentieon_dnascope') + tools.split(',').contains('haplotypecaller') || + tools.split(',').contains('sentieon_haplotyper') || + tools.split(',').contains('sentieon_dnascope') ) && - params.joint_germline && - ( - !params.dbsnp || - !params.known_indels || - !params.known_snps || - params.no_intervals - ) - ) { + joint_germline && + ( !dbsnp || !known_indels || !known_snps || no_intervals ) + ) { log.warn("""If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, \ but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), \ no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information \ @@ -209,41 +231,33 @@ workflow SAMPLESHEET_TO_CHANNEL{ As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""") } - if (params.tools && - params.tools.split(',').contains('sentieon_dnascope') && - params.joint_germline && - ( - !params.sentieon_dnascope_emit_mode || - !params.sentieon_dnascope_emit_mode.split(',').contains('gvcf') - ) - ) { + if (tools && + tools.split(',').contains('sentieon_dnascope') && joint_germline && + ( !sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf') ) + ) { error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") } - if (params.tools && - params.tools.split(',').contains('sentieon_haplotyper') && - params.joint_germline && - ( - !params.sentieon_haplotyper_emit_mode || - !params.sentieon_haplotyper_emit_mode.split(',').contains('gvcf') - ) - ) { + if (tools && + tools.split(',').contains('sentieon_haplotyper') && joint_germline && + ( !sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf') ) + ) { error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") } // Fails when --joint_mutect2 is used without enabling mutect2 - if (params.joint_mutect2 && (!params.tools || !params.tools.split(',').contains('mutect2'))) { + if (joint_mutect2 && (!tools || !tools.split(',').contains('mutect2'))) { error("The mutect2 should be specified as one of the tools when doing joint somatic variant calling with Mutect2. (The mutect2 could be specified by adding `--tools mutect2` to the nextflow command.)") } // Fails when missing tools for variant_calling or annotate - if ((params.step == 'variant_calling' || params.step == 'annotate') && !params.tools) { - error("Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/sarek/parameters#tools") + if ((step == 'variant_calling' || step == 'annotate') && !tools) { + error("Please specify at least one tool when using `--step ${step}`.\nhttps://nf-co.re/sarek/parameters#tools") } // Fails when missing sex information for CNV tools - if (params.tools && (params.tools.split(',').contains('ascat') || params.tools.split(',').contains('controlfreec'))) { + if (tools && (tools.split(',').contains('ascat') || tools.split(',').contains('controlfreec'))) { input_sample.map{ if (it[0].sex == 'NA' ) { error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") @@ -252,7 +266,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } // Fails when bcftools annotate is used but no files are supplied - if (params.tools && params.tools.split(',').contains('bcfann') && !(params.bcftools_annotations && params.bcftools_annotations_tbi && params.bcftools_header_lines)) { + if (tools && tools.split(',').contains('bcfann') && !(bcftools_annotations && bcftools_annotations_tbi && bcftools_header_lines)) { error("Please specify --bcftools_annotations, --bcftools_annotations_tbi, and --bcftools_header_lines, when using BCFTools annotations") } diff --git a/tests/test_controlfreec.yml b/tests/test_controlfreec.yml index dad942b50e..464aeb9860 100644 --- a/tests/test_controlfreec.yml +++ b/tests/test_controlfreec.yml @@ -7,8 +7,6 @@ - copy_number_calling files: - path: results/multiqc - - path: results/untar/chr_dir/chr21.fasta - md5sum: 69bd44ef67566a76d6cbb8aa4a25ae35 - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt contains: [ @@ -92,7 +90,6 @@ md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/untar/chr_dir - path: results/variant_calling/controlfreec/sample4_vs_sample3/GC_profile.sample4_vs_sample3.cpn md5sum: d41d8cd98f00b204e9800998ecf8427e # This is the md5sum of an empty file. Are all these files suppose to be empty? - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt diff --git a/tests/test_intervals.yml b/tests/test_intervals.yml index f0cc2495c3..0196376807 100644 --- a/tests/test_intervals.yml +++ b/tests/test_intervals.yml @@ -138,8 +138,6 @@ # binary changes md5sums on reruns - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changes md5sums on reruns - - path: results/reference/intervals - should_exist: false - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 17094 1534 168 1046782 12429 197 0 0.635998", "1.0 0.999991 1171"] diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 7a90bd6757..88baa30567 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -83,14 +83,9 @@ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) */ // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ascat_alleles = params.ascat_alleles ? Channel.fromPath(params.ascat_alleles).collect() : Channel.empty() -ascat_loci = params.ascat_loci ? Channel.fromPath(params.ascat_loci).collect() : Channel.empty() -ascat_loci_gc = params.ascat_loci_gc ? Channel.fromPath(params.ascat_loci_gc).collect() : Channel.value([]) -ascat_loci_rt = params.ascat_loci_rt ? Channel.fromPath(params.ascat_loci_rt).collect() : Channel.value([]) bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : Channel.value([]) dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() @@ -238,7 +233,35 @@ workflow SAREK { // Parse samplesheet // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart") - SAMPLESHEET_TO_CHANNEL(ch_from_samplesheet) + SAMPLESHEET_TO_CHANNEL( + ch_from_samplesheet, + params.aligner, + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_rt, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_header_lines, + params.build_only_index, + params.dbsnp, + params.fasta, + params.germline_resource, + params.intervals, + params.joint_germline, + params.joint_mutect2, + params.known_indels, + params.known_snps, + params.no_intervals, + params.pon, + params.sentieon_dnascope_emit_mode, + params.sentieon_haplotyper_emit_mode, + params.seq_center, + params.seq_platform, + params.skip_tools, + params.step, + params.tools, + params.umi_read_structure, + params.wes) input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample @@ -283,12 +306,12 @@ workflow SAREK { // Build indices if needed PREPARE_GENOME( - ascat_alleles, - ascat_loci, - ascat_loci_gc, - ascat_loci_rt, + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, bcftools_annotations, - chr_dir, + params.chr_dir, dbsnp, fasta, fasta_fai, @@ -342,7 +365,7 @@ workflow SAREK { known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() // Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals) + PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) // Intervals for speed up preprocessing/variant calling by spread/gather // [interval.bed] all intervals in one file @@ -552,7 +575,8 @@ workflow SAREK { BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) // Create CSV to restart from this step - params.save_output_as_bam ? CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai) : CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.alignment_index) + if (params.save_output_as_bam) CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) + else CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.alignment_index, params.outdir, params.save_output_as_bam) // Gather used softwares versions versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) @@ -673,7 +697,8 @@ workflow SAREK { // Create CSV to restart from this step csv_subfolder = (params.tools && params.tools.split(',').contains('sentieon_dedup')) ? 'sentieon_dedup' : 'markduplicates' - params.save_output_as_bam ? CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.alignment_index, csv_subfolder, params.outdir, params.save_output_as_bam) : CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) + if (params.save_output_as_bam) CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.alignment_index, csv_subfolder, params.outdir, params.save_output_as_bam) + else CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) } if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { @@ -760,7 +785,7 @@ workflow SAREK { cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) // Create CSV to restart from this step - CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_md_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.save_output_as_bam, params.outdir) + CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_md_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.outdir, params.save_output_as_bam) } } @@ -837,7 +862,7 @@ workflow SAREK { csv_recalibration = params.save_output_as_bam ? CRAM_TO_BAM_RECAL.out.alignment_index : cram_variant_calling // Create CSV to restart from this step - CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration) + CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration, params.outdir, params.save_output_as_bam) } else if (params.step == 'recalibrate') { // cram_variant_calling contains either: @@ -1052,7 +1077,7 @@ workflow SAREK { reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary }) - CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate) + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate, params.outdir) // Gather used variant calling softwares versions versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions)