Merge pull request nf-core#1359 from maxulysse/no_params

remove params from subworkflows
FriederikeHanssen · Jan 10, 2024 · 8b492b4 · 8b492b4
2 parents 0fe1ab3 + eb52c68
commit 8b492b4
Show file tree

Hide file tree

Showing 18 changed files with 225 additions and 171 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#1339](https://github.com/nf-core/sarek/pull/1339) - Update sentieon-modules
 - [#1344](https://github.com/nf-core/sarek/pull/1344) - Enable CRAM QC, when starting from variantcalling
+- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local modules
+- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local subworkflows
 - [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11`
 
 ### Fixed

diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config
@@ -164,6 +164,12 @@ process {
     }
 
     withName: 'UNTAR_CHR_DIR' {
+        ext.prefix       = 'chr_dir'
         ext.when         = { params.tools && params.tools.split(',').contains('controlfreec')}
+        publishDir       = [
+            mode: params.publish_dir_mode,
+            path: { "${params.outdir}/reference/" },
+            saveAs: { (params.save_reference || params.build_only_index) && !it.equals('versions.yml') ? it : null }
+        ]
     }
 }
diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config
@@ -33,7 +33,7 @@ process {
         ]
     }
 
-    withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' {
+    withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' {
         ext.prefix       = {"${meta.id}"}
         publishDir       = [
             mode: params.publish_dir_mode,

diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf
@@ -9,6 +9,7 @@ process CREATE_INTERVALS_BED {
 
     input:
     path(intervals)
+    val(nucleotides_per_second)
 
     output:
     path("*.bed")       , emit: bed
@@ -27,7 +28,7 @@ process CREATE_INTERVALS_BED {
             t = \$5  # runtime estimate
             if (t == "") {
                 # no runtime estimate in this row, assume default value
-                t = (\$3 - \$2) / ${params.nucleotides_per_second}
+                t = (\$3 - \$2) / ${nucleotides_per_second}
             }
             if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
                 # start a new chunk

diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf
@@ -59,7 +59,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
             cram,
             allele_files,
             loci_files,
-            intervals_bed_combined,
+            (wes ? intervals_bed_combined : []), // No intervals needed if not WES
             fasta,
             gc_file,
             rt_file

diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf
@@ -21,7 +21,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT {
 
     ch_versions = Channel.empty()
 
-    if (!params.wes) intervals_bed = [] // No intervals needed if not WES
     ASCAT(cram_pair, allele_files, loci_files, intervals_bed, fasta, gc_file, rt_file)
 
     ch_versions = ch_versions.mix(ASCAT.out.versions)

diff --git a/subworkflows/local/channel_align_create_csv/main.nf b/subworkflows/local/channel_align_create_csv/main.nf
@@ -4,20 +4,22 @@
 
 workflow CHANNEL_ALIGN_CREATE_CSV {
     take:
-        bam_indexed         // channel: [mandatory] meta, bam, bai
+        bam_indexed             // channel: [mandatory] meta, bam, bai
+        outdir                  //
+        save_output_as_bam      //
 
     main:
         // Creating csv files to restart from this step
-        bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai ->
+        bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, bam, bai ->
             patient = meta.patient
             sample  = meta.sample
             sex     = meta.sex
             status  = meta.status
-            bam   = "${params.outdir}/preprocessing/mapped/${sample}/${bam.name}"
-            bai   = "${params.outdir}/preprocessing/mapped/${sample}/${bai.name}"
+            bam   = "${outdir}/preprocessing/mapped/${sample}/${bam.name}"
+            bai   = "${outdir}/preprocessing/mapped/${sample}/${bai.name}"
 
-            type = params.save_output_as_bam ? "bam" : "cram"
-            type_index = params.save_output_as_bam ? "bai" : "crai"
+            type = save_output_as_bam ? "bam" : "cram"
+            type_index = save_output_as_bam ? "bai" : "crai"
 
             ["mapped.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${bam},${bai}\n"]
         }

diff --git a/subworkflows/local/channel_applybqsr_create_csv/main.nf b/subworkflows/local/channel_applybqsr_create_csv/main.nf
@@ -5,19 +5,21 @@
 workflow CHANNEL_APPLYBQSR_CREATE_CSV {
     take:
         cram_recalibrated_index // channel: [mandatory] meta, cram, crai
+        outdir                  //
+        save_output_as_bam      //
 
     main:
         // Creating csv files to restart from this step
-        cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
+        cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index ->
             patient = meta.patient
             sample  = meta.sample
             sex     = meta.sex
             status  = meta.status
-            file = "${params.outdir}/preprocessing/recalibrated/${sample}/${file.name}"
-            index = "${params.outdir}/preprocessing/recalibrated/${sample}/${index.name}"
+            file = "${outdir}/preprocessing/recalibrated/${sample}/${file.name}"
+            index = "${outdir}/preprocessing/recalibrated/${sample}/${index.name}"
 
-            type = params.save_output_as_bam ? "bam" : "cram"
-            type_index = params.save_output_as_bam ? "bai" : "crai"
+            type = save_output_as_bam ? "bam" : "cram"
+            type_index = save_output_as_bam ? "bai" : "crai"
 
             ["recalibrated.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${file},${index}\n"]
         }

diff --git a/subworkflows/local/channel_baserecalibrator_create_csv/main.nf b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf
@@ -4,11 +4,11 @@
 
 workflow CHANNEL_BASERECALIBRATOR_CREATE_CSV {
     take:
-        cram_table_bqsr // channel: [mandatory] meta, cram, crai, table
-        tools
-        skip_tools
-        save_output_as_bam
-        outdir
+        cram_table_bqsr         // channel: [mandatory] meta, cram, crai, table
+        tools                   //
+        skip_tools              //
+        outdir                  //
+        save_output_as_bam      //
 
     main:
         // Creating csv files to restart from this step

diff --git a/subworkflows/local/channel_markduplicates_create_csv/main.nf b/subworkflows/local/channel_markduplicates_create_csv/main.nf
@@ -4,10 +4,10 @@
 
 workflow CHANNEL_MARKDUPLICATES_CREATE_CSV {
     take:
-        cram_markduplicates // channel: [mandatory] meta, cram, crai
-        csv_subfolder
-        outdir
-        save_output_as_bam
+        cram_markduplicates     // channel: [mandatory] meta, cram, crai
+        csv_subfolder           //
+        outdir                  //
+        save_output_as_bam      //
 
     main:
         // Creating csv files to restart from this step

diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf
@@ -4,15 +4,16 @@
 
 workflow CHANNEL_VARIANT_CALLING_CREATE_CSV {
     take:
-        vcf_to_annotate // channel: [mandatory] meta, vcf
+        vcf_to_annotate         // channel: [mandatory] meta, vcf
+        outdir                  //
 
     main:
         // Creating csv files to restart from this step
-        vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf ->
+        vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${outdir}/csv"){ meta, vcf ->
             patient       = meta.patient
             sample        = meta.id
             variantcaller = meta.variantcaller
-            vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}"
+            vcf = "${outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}"
             ["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"]
         }
 }
diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf
@@ -13,7 +13,7 @@ workflow POST_VARIANTCALLING {
     main:
     versions = Channel.empty()
 
-    if(concatenate_vcfs){
+    if (concatenate_vcfs){
         CONCATENATE_GERMLINE_VCFS(vcfs)
 
         vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs)

diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
@@ -28,12 +28,12 @@ include { UNZIP as UNZIP_RT                         } from '../../../modules/nf-
 
 workflow PREPARE_GENOME {
     take:
-    ascat_alleles        // channel: [optional]  ascat allele files
-    ascat_loci           // channel: [optional]  ascat loci files
-    ascat_loci_gc        // channel: [optional]  ascat gc content file
-    ascat_loci_rt        // channel: [optional]  ascat replictiming file
+    ascat_alleles        // params.ascat_alleles
+    ascat_loci           // params.ascat_loci
+    ascat_loci_gc        // params.ascat_loci_gc
+    ascat_loci_rt        // params.ascat_loci_rt
     bcftools_annotations // channel: [optional] bcftools annotations file
-    chr_dir              // channel: [optional]  chromosome files
+    chr_dir              // params.chr_dir
     dbsnp                // channel: [optional]  dbsnp
     fasta                // channel: [mandatory] fasta
     fasta_fai            // channel: [optional]  fasta_fai
@@ -53,7 +53,7 @@ workflow PREPARE_GENOME {
 
     GATK4_CREATESEQUENCEDICTIONARY(fasta)
     MSISENSORPRO_SCAN(fasta)
-    SAMTOOLS_FAIDX(fasta, [['id':null], []])
+    SAMTOOLS_FAIDX(fasta, [ [ id:fasta.baseName ], [] ] )
 
     // the following are flattened and mapped in case the user supplies more than one value for the param
     // written for KNOWN_INDELS, but preemptively applied to the rest
@@ -66,40 +66,41 @@ workflow PREPARE_GENOME {
     TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } )
     TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] })
 
-    // prepare ascat reference files
-    allele_files = ascat_alleles
-    if (params.ascat_alleles && params.ascat_alleles.endsWith('.zip')) {
-        UNZIP_ALLELES(ascat_alleles.map{ it -> [[id:it[0].baseName], it]})
+    // prepare ascat and controlfreec reference files
+    if (!ascat_alleles) allele_files = Channel.empty()
+    else if (ascat_alleles.endsWith(".zip")) {
+        UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
         allele_files = UNZIP_ALLELES.out.unzipped_archive.map{ it[1] }
         versions = versions.mix(UNZIP_ALLELES.out.versions)
-    }
+    } else allele_files = Channel.fromPath(ascat_alleles).collect()
 
-    loci_files = ascat_loci
-    if (params.ascat_loci && params.ascat_loci.endsWith('.zip')) {
-        UNZIP_LOCI(ascat_loci.map{ it -> [[id:it[0].baseName], it]})
+    if (!ascat_loci) loci_files = Channel.empty()
+    else if (ascat_loci.endsWith(".zip")) {
+        UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
         loci_files = UNZIP_LOCI.out.unzipped_archive.map{ it[1] }
         versions = versions.mix(UNZIP_LOCI.out.versions)
-    }
-    gc_file = ascat_loci_gc
-    if (params.ascat_loci_gc && params.ascat_loci_gc.endsWith('.zip')) {
-        UNZIP_GC(ascat_loci_gc.map{ it -> [[id:it[0].baseName], it]})
+    } else loci_files = Channel.fromPath(ascat_loci).collect()
+
+    if (!ascat_loci_gc) gc_file = Channel.value([])
+    else if (ascat_loci_gc.endsWith(".zip")) {
+        UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
         gc_file = UNZIP_GC.out.unzipped_archive.map{ it[1] }
         versions = versions.mix(UNZIP_GC.out.versions)
-    }
-    rt_file = ascat_loci_rt
-    if (params.ascat_loci_rt && params.ascat_loci_rt.endsWith('.zip')) {
-        UNZIP_RT(ascat_loci_rt.map{ it -> [[id:it[0].baseName], it]})
+    } else gc_file = Channel.fromPath(ascat_loci_gc).collect()
+
+    if (!ascat_loci_rt) rt_file = Channel.value([])
+    else if (ascat_loci_rt.endsWith(".zip")) {
+        UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
         rt_file = UNZIP_RT.out.unzipped_archive.map{ it[1] }
         versions = versions.mix(UNZIP_RT.out.versions)
-    }
+    } else rt_file = Channel.fromPath(ascat_loci_rt).collect()
 
-
-    chr_files = chr_dir
-    if (params.chr_dir && params.chr_dir.endsWith('tar.gz')) {
-        UNTAR_CHR_DIR(chr_dir.map{ it -> [ [ id:'chr_dir' ], it ] })
+    if (!chr_dir) chr_files = Channel.value([])
+    else if (chr_dir.endsWith(".tar.gz")) {
+        UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
         chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] }
         versions = versions.mix(UNTAR_CHR_DIR.out.versions)
-    }
+    } else chr_files = Channel.fromPath(chr_dir).collect()
 
     // Gather versions of all tools used
     versions = versions.mix(SAMTOOLS_FAIDX.out.versions)
@@ -116,7 +117,7 @@ workflow PREPARE_GENOME {
     versions = versions.mix(TABIX_PON.out.versions)
 
     emit:
-    bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // bcftools_annotations.vcf.gz.tbi
+    bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: bcftools_annotations.vcf.gz.tbi
     bwa                      = BWAMEM1_INDEX.out.index.map{ meta, index -> [index] }.collect()        // path: bwa/*
     bwamem2                  = BWAMEM2_INDEX.out.index.map{ meta, index -> [index] }.collect()        // path: bwamem2/*
     hashtable                = DRAGMAP_HASHTABLE.out.hashmap.map{ meta, index -> [index] }.collect()  // path: dragmap/*
@@ -128,11 +129,12 @@ workflow PREPARE_GENOME {
     known_indels_tbi         = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect()         // path: {known_indels*}.vcf.gz.tbi
     msisensorpro_scan        = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] }                 // path: genome_msi.list
     pon_tbi                  = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect()                  // path: pon.vcf.gz.tbi
-    allele_files
-    chr_files
-    gc_file
-    loci_files
-    rt_file
 
-    versions // channel: [ versions.yml ]
+    allele_files    // path: allele_files
+    chr_files       // path: chr_files
+    gc_file         // path: gc_file
+    loci_files      // path: loci_files
+    rt_file         // path: rt_file
+
+    versions        // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf
@@ -17,6 +17,9 @@ workflow PREPARE_INTERVALS {
     fasta_fai    // mandatory [ fasta_fai ]
     intervals    // [ params.intervals ]
     no_intervals // [ params.no_intervals ]
+    nucleotides_per_second
+    outdir
+    step
 
     main:
     versions = Channel.empty()
@@ -26,29 +29,31 @@ workflow PREPARE_INTERVALS {
     intervals_combined   = Channel.empty() // Single bed file containing all intervals
 
     if (no_intervals) {
-        file("${params.outdir}/no_intervals.bed").text        = "no_intervals\n"
-        file("${params.outdir}/no_intervals.bed.gz").text     = "no_intervals\n"
-        file("${params.outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n"
-
-        intervals_bed        = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] }
-        intervals_bed_gz_tbi = Channel.fromPath(file("${params.outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] }
-        intervals_combined   = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] }
-    } else if (params.step != 'annotate' && params.step != 'controlfreec') {
+        file("${outdir}/no_intervals.bed").text        = "no_intervals\n"
+        file("${outdir}/no_intervals.bed.gz").text     = "no_intervals\n"
+        file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n"
+
+        intervals_bed        = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] }
+        intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] }
+        intervals_combined   = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] }
+    } else if (step != 'annotate' && step != 'controlfreec') {
         // If no interval/target file is provided, then generated intervals from FASTA file
         if (!intervals) {
             BUILD_INTERVALS(fasta_fai.map{it -> [ [ id:it.baseName ], it ] })
 
             intervals_combined = BUILD_INTERVALS.out.bed
 
-            CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }).bed
+            CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second)
 
             intervals_bed = CREATE_INTERVALS_BED.out.bed
 
             versions = versions.mix(BUILD_INTERVALS.out.versions)
             versions = versions.mix(CREATE_INTERVALS_BED.out.versions)
         } else {
             intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] }
-            intervals_bed = CREATE_INTERVALS_BED(file(intervals)).bed
+            CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second)
+
+            intervals_bed = CREATE_INTERVALS_BED.out.bed
 
             versions = versions.mix(CREATE_INTERVALS_BED.out.versions)
 
@@ -74,7 +79,7 @@ workflow PREPARE_INTERVALS {
                     else {
                         start = fields[1].toInteger()
                         end = fields[2].toInteger()
-                        duration += (end - start) / params.nucleotides_per_second
+                        duration += (end - start) / nucleotides_per_second
                     }
                 }
                 [ duration, intervalFile ]