Skip to content

Commit

Permalink
Merge pull request nf-core#1359 from maxulysse/no_params
Browse files Browse the repository at this point in the history
remove params from subworkflows
  • Loading branch information
maxulysse authored Jan 10, 2024
2 parents 0fe1ab3 + eb52c68 commit 8b492b4
Show file tree
Hide file tree
Showing 18 changed files with 225 additions and 171 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- [#1339](https://github.com/nf-core/sarek/pull/1339) - Update sentieon-modules
- [#1344](https://github.com/nf-core/sarek/pull/1344) - Enable CRAM QC, when starting from variantcalling
- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local modules
- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local subworkflows
- [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11`

### Fixed
Expand Down
6 changes: 6 additions & 0 deletions conf/modules/prepare_genome.config
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ process {
}

withName: 'UNTAR_CHR_DIR' {
ext.prefix = 'chr_dir'
ext.when = { params.tools && params.tools.split(',').contains('controlfreec')}
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/" },
saveAs: { (params.save_reference || params.build_only_index) && !it.equals('versions.yml') ? it : null }
]
}
}
2 changes: 1 addition & 1 deletion conf/modules/prepare_intervals.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ process {
]
}

withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' {
withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' {
ext.prefix = {"${meta.id}"}
publishDir = [
mode: params.publish_dir_mode,
Expand Down
3 changes: 2 additions & 1 deletion modules/local/create_intervals_bed/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process CREATE_INTERVALS_BED {

input:
path(intervals)
val(nucleotides_per_second)

output:
path("*.bed") , emit: bed
Expand All @@ -27,7 +28,7 @@ process CREATE_INTERVALS_BED {
t = \$5 # runtime estimate
if (t == "") {
# no runtime estimate in this row, assume default value
t = (\$3 - \$2) / ${params.nucleotides_per_second}
t = (\$3 - \$2) / ${nucleotides_per_second}
}
if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
# start a new chunk
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/bam_variant_calling_somatic_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
cram,
allele_files,
loci_files,
intervals_bed_combined,
(wes ? intervals_bed_combined : []), // No intervals needed if not WES
fasta,
gc_file,
rt_file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT {

ch_versions = Channel.empty()

if (!params.wes) intervals_bed = [] // No intervals needed if not WES
ASCAT(cram_pair, allele_files, loci_files, intervals_bed, fasta, gc_file, rt_file)

ch_versions = ch_versions.mix(ASCAT.out.versions)
Expand Down
14 changes: 8 additions & 6 deletions subworkflows/local/channel_align_create_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,22 @@

workflow CHANNEL_ALIGN_CREATE_CSV {
take:
bam_indexed // channel: [mandatory] meta, bam, bai
bam_indexed // channel: [mandatory] meta, bam, bai
outdir //
save_output_as_bam //

main:
// Creating csv files to restart from this step
bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai ->
bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, bam, bai ->
patient = meta.patient
sample = meta.sample
sex = meta.sex
status = meta.status
bam = "${params.outdir}/preprocessing/mapped/${sample}/${bam.name}"
bai = "${params.outdir}/preprocessing/mapped/${sample}/${bai.name}"
bam = "${outdir}/preprocessing/mapped/${sample}/${bam.name}"
bai = "${outdir}/preprocessing/mapped/${sample}/${bai.name}"

type = params.save_output_as_bam ? "bam" : "cram"
type_index = params.save_output_as_bam ? "bai" : "crai"
type = save_output_as_bam ? "bam" : "cram"
type_index = save_output_as_bam ? "bai" : "crai"

["mapped.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${bam},${bai}\n"]
}
Expand Down
12 changes: 7 additions & 5 deletions subworkflows/local/channel_applybqsr_create_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,21 @@
workflow CHANNEL_APPLYBQSR_CREATE_CSV {
take:
cram_recalibrated_index // channel: [mandatory] meta, cram, crai
outdir //
save_output_as_bam //

main:
// Creating csv files to restart from this step
cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index ->
patient = meta.patient
sample = meta.sample
sex = meta.sex
status = meta.status
file = "${params.outdir}/preprocessing/recalibrated/${sample}/${file.name}"
index = "${params.outdir}/preprocessing/recalibrated/${sample}/${index.name}"
file = "${outdir}/preprocessing/recalibrated/${sample}/${file.name}"
index = "${outdir}/preprocessing/recalibrated/${sample}/${index.name}"

type = params.save_output_as_bam ? "bam" : "cram"
type_index = params.save_output_as_bam ? "bai" : "crai"
type = save_output_as_bam ? "bam" : "cram"
type_index = save_output_as_bam ? "bai" : "crai"

["recalibrated.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${file},${index}\n"]
}
Expand Down
10 changes: 5 additions & 5 deletions subworkflows/local/channel_baserecalibrator_create_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

workflow CHANNEL_BASERECALIBRATOR_CREATE_CSV {
take:
cram_table_bqsr // channel: [mandatory] meta, cram, crai, table
tools
skip_tools
save_output_as_bam
outdir
cram_table_bqsr // channel: [mandatory] meta, cram, crai, table
tools //
skip_tools //
outdir //
save_output_as_bam //

main:
// Creating csv files to restart from this step
Expand Down
8 changes: 4 additions & 4 deletions subworkflows/local/channel_markduplicates_create_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

workflow CHANNEL_MARKDUPLICATES_CREATE_CSV {
take:
cram_markduplicates // channel: [mandatory] meta, cram, crai
csv_subfolder
outdir
save_output_as_bam
cram_markduplicates // channel: [mandatory] meta, cram, crai
csv_subfolder //
outdir //
save_output_as_bam //

main:
// Creating csv files to restart from this step
Expand Down
7 changes: 4 additions & 3 deletions subworkflows/local/channel_variant_calling_create_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@

workflow CHANNEL_VARIANT_CALLING_CREATE_CSV {
take:
vcf_to_annotate // channel: [mandatory] meta, vcf
vcf_to_annotate // channel: [mandatory] meta, vcf
outdir //

main:
// Creating csv files to restart from this step
vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf ->
vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${outdir}/csv"){ meta, vcf ->
patient = meta.patient
sample = meta.id
variantcaller = meta.variantcaller
vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}"
vcf = "${outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}"
["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"]
}
}
2 changes: 1 addition & 1 deletion subworkflows/local/post_variantcalling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ workflow POST_VARIANTCALLING {
main:
versions = Channel.empty()

if(concatenate_vcfs){
if (concatenate_vcfs){
CONCATENATE_GERMLINE_VCFS(vcfs)

vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs)
Expand Down
72 changes: 37 additions & 35 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ include { UNZIP as UNZIP_RT } from '../../../modules/nf-

workflow PREPARE_GENOME {
take:
ascat_alleles // channel: [optional] ascat allele files
ascat_loci // channel: [optional] ascat loci files
ascat_loci_gc // channel: [optional] ascat gc content file
ascat_loci_rt // channel: [optional] ascat replictiming file
ascat_alleles // params.ascat_alleles
ascat_loci // params.ascat_loci
ascat_loci_gc // params.ascat_loci_gc
ascat_loci_rt // params.ascat_loci_rt
bcftools_annotations // channel: [optional] bcftools annotations file
chr_dir // channel: [optional] chromosome files
chr_dir // params.chr_dir
dbsnp // channel: [optional] dbsnp
fasta // channel: [mandatory] fasta
fasta_fai // channel: [optional] fasta_fai
Expand All @@ -53,7 +53,7 @@ workflow PREPARE_GENOME {

GATK4_CREATESEQUENCEDICTIONARY(fasta)
MSISENSORPRO_SCAN(fasta)
SAMTOOLS_FAIDX(fasta, [['id':null], []])
SAMTOOLS_FAIDX(fasta, [ [ id:fasta.baseName ], [] ] )

// the following are flattened and mapped in case the user supplies more than one value for the param
// written for KNOWN_INDELS, but preemptively applied to the rest
Expand All @@ -66,40 +66,41 @@ workflow PREPARE_GENOME {
TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } )
TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] })

// prepare ascat reference files
allele_files = ascat_alleles
if (params.ascat_alleles && params.ascat_alleles.endsWith('.zip')) {
UNZIP_ALLELES(ascat_alleles.map{ it -> [[id:it[0].baseName], it]})
// prepare ascat and controlfreec reference files
if (!ascat_alleles) allele_files = Channel.empty()
else if (ascat_alleles.endsWith(".zip")) {
UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
allele_files = UNZIP_ALLELES.out.unzipped_archive.map{ it[1] }
versions = versions.mix(UNZIP_ALLELES.out.versions)
}
} else allele_files = Channel.fromPath(ascat_alleles).collect()

loci_files = ascat_loci
if (params.ascat_loci && params.ascat_loci.endsWith('.zip')) {
UNZIP_LOCI(ascat_loci.map{ it -> [[id:it[0].baseName], it]})
if (!ascat_loci) loci_files = Channel.empty()
else if (ascat_loci.endsWith(".zip")) {
UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
loci_files = UNZIP_LOCI.out.unzipped_archive.map{ it[1] }
versions = versions.mix(UNZIP_LOCI.out.versions)
}
gc_file = ascat_loci_gc
if (params.ascat_loci_gc && params.ascat_loci_gc.endsWith('.zip')) {
UNZIP_GC(ascat_loci_gc.map{ it -> [[id:it[0].baseName], it]})
} else loci_files = Channel.fromPath(ascat_loci).collect()

if (!ascat_loci_gc) gc_file = Channel.value([])
else if (ascat_loci_gc.endsWith(".zip")) {
UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
gc_file = UNZIP_GC.out.unzipped_archive.map{ it[1] }
versions = versions.mix(UNZIP_GC.out.versions)
}
rt_file = ascat_loci_rt
if (params.ascat_loci_rt && params.ascat_loci_rt.endsWith('.zip')) {
UNZIP_RT(ascat_loci_rt.map{ it -> [[id:it[0].baseName], it]})
} else gc_file = Channel.fromPath(ascat_loci_gc).collect()

if (!ascat_loci_rt) rt_file = Channel.value([])
else if (ascat_loci_rt.endsWith(".zip")) {
UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
rt_file = UNZIP_RT.out.unzipped_archive.map{ it[1] }
versions = versions.mix(UNZIP_RT.out.versions)
}
} else rt_file = Channel.fromPath(ascat_loci_rt).collect()


chr_files = chr_dir
if (params.chr_dir && params.chr_dir.endsWith('tar.gz')) {
UNTAR_CHR_DIR(chr_dir.map{ it -> [ [ id:'chr_dir' ], it ] })
if (!chr_dir) chr_files = Channel.value([])
else if (chr_dir.endsWith(".tar.gz")) {
UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map{ it -> [ [ id:it[0].baseName ], it ] })
chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] }
versions = versions.mix(UNTAR_CHR_DIR.out.versions)
}
} else chr_files = Channel.fromPath(chr_dir).collect()

// Gather versions of all tools used
versions = versions.mix(SAMTOOLS_FAIDX.out.versions)
Expand All @@ -116,7 +117,7 @@ workflow PREPARE_GENOME {
versions = versions.mix(TABIX_PON.out.versions)

emit:
bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // bcftools_annotations.vcf.gz.tbi
bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: bcftools_annotations.vcf.gz.tbi
bwa = BWAMEM1_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwa/*
bwamem2 = BWAMEM2_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwamem2/*
hashtable = DRAGMAP_HASHTABLE.out.hashmap.map{ meta, index -> [index] }.collect() // path: dragmap/*
Expand All @@ -128,11 +129,12 @@ workflow PREPARE_GENOME {
known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi
msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list
pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi
allele_files
chr_files
gc_file
loci_files
rt_file

versions // channel: [ versions.yml ]
allele_files // path: allele_files
chr_files // path: chr_files
gc_file // path: gc_file
loci_files // path: loci_files
rt_file // path: rt_file

versions // channel: [ versions.yml ]
}
27 changes: 16 additions & 11 deletions subworkflows/local/prepare_intervals/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ workflow PREPARE_INTERVALS {
fasta_fai // mandatory [ fasta_fai ]
intervals // [ params.intervals ]
no_intervals // [ params.no_intervals ]
nucleotides_per_second
outdir
step

main:
versions = Channel.empty()
Expand All @@ -26,29 +29,31 @@ workflow PREPARE_INTERVALS {
intervals_combined = Channel.empty() // Single bed file containing all intervals

if (no_intervals) {
file("${params.outdir}/no_intervals.bed").text = "no_intervals\n"
file("${params.outdir}/no_intervals.bed.gz").text = "no_intervals\n"
file("${params.outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n"

intervals_bed = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] }
intervals_bed_gz_tbi = Channel.fromPath(file("${params.outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] }
intervals_combined = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] }
} else if (params.step != 'annotate' && params.step != 'controlfreec') {
file("${outdir}/no_intervals.bed").text = "no_intervals\n"
file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n"
file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n"

intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] }
intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] }
intervals_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] }
} else if (step != 'annotate' && step != 'controlfreec') {
// If no interval/target file is provided, then generated intervals from FASTA file
if (!intervals) {
BUILD_INTERVALS(fasta_fai.map{it -> [ [ id:it.baseName ], it ] })

intervals_combined = BUILD_INTERVALS.out.bed

CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }).bed
CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second)

intervals_bed = CREATE_INTERVALS_BED.out.bed

versions = versions.mix(BUILD_INTERVALS.out.versions)
versions = versions.mix(CREATE_INTERVALS_BED.out.versions)
} else {
intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] }
intervals_bed = CREATE_INTERVALS_BED(file(intervals)).bed
CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second)

intervals_bed = CREATE_INTERVALS_BED.out.bed

versions = versions.mix(CREATE_INTERVALS_BED.out.versions)

Expand All @@ -74,7 +79,7 @@ workflow PREPARE_INTERVALS {
else {
start = fields[1].toInteger()
end = fields[2].toInteger()
duration += (end - start) / params.nucleotides_per_second
duration += (end - start) / nucleotides_per_second
}
}
[ duration, intervalFile ]
Expand Down
Loading

0 comments on commit 8b492b4

Please sign in to comment.