From ade6b495112f8920db3f22c02554acdf4675a43b Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 09:57:57 +0000 Subject: [PATCH 01/24] simplify hc filtering logic --- .../bam_variant_calling_germline_all/main.nf | 32 +++++++++++----- .../main.nf | 38 ++++--------------- 2 files changed, 29 insertions(+), 41 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 82a4991727..3703b2d5ed 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -119,22 +119,16 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { dbsnp, dbsnp_tbi, dbsnp_vqsr, - known_sites_indels, - known_sites_indels_tbi, - known_indels_vqsr, - known_sites_snps, - known_sites_snps_tbi, - known_snps_vqsr, - intervals, - intervals_bed_combined_haplotypec, - ((skip_tools && skip_tools.split(',').contains('haplotypecaller_filter') || joint_germline))) + intervals) vcf_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.vcf + tbi_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions) if (joint_germline) { BAM_JOINT_CALLING_GERMLINE_GATK( - BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.genotype_intervals, + BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.gvcf_tbi_intervals, fasta, fasta_fai, dict, @@ -150,6 +144,24 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_GATK.out.genotype_vcf versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_GATK.out.versions) + } else { + + // If single sample track, check if filtering should be done + if (!skip_haplotypecaller_filter) { + + VCF_VARIANT_FILTERING_GATK( + vcf_haplotypecaller.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true), + fasta, + fasta_fai, + dict.map{ meta, dict -> [ dict ] }, + intervals_bed_combined_haplotypec, + known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), + known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) + + vcf_haplotypecaller = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf + + versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions) + } } } diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf index 9eee7766e4..5b5da39909 100644 --- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -5,7 +5,6 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools/main' -include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' include { GATK4_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/haplotypecaller/main' include { GATK4_MERGEVCFS as MERGE_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/mergevcfs/main' @@ -18,20 +17,12 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { dbsnp // channel: [optional] dbsnp_tbi // channel: [optional] dbsnp_vqsr // channel: [optional] - known_sites_indels // channel: [optional] - known_sites_indels_tbi // channel: [optional] - known_indels_vqsr // channel: [optional] - known_sites_snps // channel: [optional] - known_sites_snps_tbi // channel: [optional] - known_snps_vqsr // channel: [optional] intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals - intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals - skip_haplotypecaller_filter // boolean: [mandatory] [default: false] skip haplotypecaller filter main: versions = Channel.empty() - vcf = Channel.empty() + vcf = Channel.empty() realigned_bam = Channel.empty() // Combine cram and intervals for spread and gather strategy @@ -42,7 +33,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi) // For joint genotyping - genotype_intervals = GATK4_HAPLOTYPECALLER.out.vcf + gvcf_tbi_intervals = GATK4_HAPLOTYPECALLER.out.vcf .join(GATK4_HAPLOTYPECALLER.out.tbi, failOnMismatch: true) .join(cram_intervals, failOnMismatch: true) .map{ meta, gvcf, tbi, cram, crai, intervals, dragstr_model -> [ meta, gvcf, tbi, intervals ] } @@ -87,33 +78,18 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { realigned_bam = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai - if (!skip_haplotypecaller_filter) { - - VCF_VARIANT_FILTERING_GATK( - haplotypecaller_vcf.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true), - fasta, - fasta_fai, - dict.map{ meta, dict -> [ dict ] }, - intervals_bed_combined, - known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), - known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) - - vcf = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf - - versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions) - - } else vcf = haplotypecaller_vcf - versions = versions.mix(GATK4_HAPLOTYPECALLER.out.versions) versions = versions.mix(MERGE_HAPLOTYPECALLER.out.versions) // Remove no longer necessary field: num_intervals - vcf = vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } + vcf = haplotypecaller_vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } + tbi = haplotypecaller_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } emit: - genotype_intervals // For joint genotyping + gvcf_tbi_intervals // For joint genotyping realigned_bam // Optional - vcf // vcf filtered or not + vcf // vcf + tbi // tbi versions } From eabf99bec045f87514df337977d86476c23d70db Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 10:23:59 +0000 Subject: [PATCH 02/24] pull params out of sw --- subworkflows/local/bam_variant_calling_germline_all/main.nf | 4 +++- workflows/sarek.nf | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 3703b2d5ed..6ce8e45aa2 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -13,6 +13,7 @@ include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main' include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main' +include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' workflow BAM_VARIANT_CALLING_GERMLINE_ALL { take: @@ -38,6 +39,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { known_sites_snps_tbi known_snps_vqsr joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants + skip_haplotypecaller_filter // boolean: [mandatory] [default: false] whether to filter haplotypecaller single sample vcfs sentieon_haplotyper_emit_mode // channel: [mandatory] value channel with string main: @@ -150,7 +152,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { if (!skip_haplotypecaller_filter) { VCF_VARIANT_FILTERING_GATK( - vcf_haplotypecaller.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true), + vcf_haplotypecaller.join(tbi_haplotypecaller, failOnDuplicate: true, failOnMismatch: true), fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 669dac1bd5..df4d6b758c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1112,6 +1112,7 @@ workflow SAREK { known_sites_snps_tbi, known_snps_vqsr, params.joint_germline, + params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), // true if filtering should be skipped params.sentieon_haplotyper_emit_mode) // TUMOR ONLY VARIANT CALLING From db7cb23ea29e3c4d600ae7b071ce022f041cf191 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 10:27:12 +0000 Subject: [PATCH 03/24] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2af6c6497..2b2b0b0bd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md` - [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller - [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.) +- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor Haplotyecaller subworkflows ## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare From 114f1b2fd74055a3413760ba0de40c068bea872b Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 11:40:52 +0000 Subject: [PATCH 04/24] only join on file name to make this more stable --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 6c53e15b60..2edd71bad3 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -39,8 +39,11 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { gendb_input = input .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } - .groupTuple(by:[0, 3]) - .map{ meta, gvcf, tbi, intervals -> [ meta, gvcf, tbi, intervals, [], [] ] } + .groupTuple(by:3) //join on interval file + .map{ meta_list, gvcf, tbi, intervals -> + // meta is now a list of [meta1, meta2] but they are all the same. So take the first element. + [ meta_list[0], gvcf, tbi, intervals, [], [] ] + } // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport GATK4_GENOMICSDBIMPORT(gendb_input, false, false, false) From 9e24c7ae42a3769f941aee095c721fca7068c6a7 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 15:42:49 +0000 Subject: [PATCH 05/24] add stub tests --- conf/modules/joint_germline.config | 6 +++ conf/test/tools_germline.config | 3 ++ modules/nf-core/gatk4/applyvqsr/main.nf | 12 ++++++ .../nf-core/gatk4/variantrecalibrator/main.nf | 14 +++++++ .../csv/3.0/recalibrated_tumoronly_joint.csv | 3 +- tests/test_joint_germline.yml | 38 ++++++++++++++++++- 6 files changed, 73 insertions(+), 3 deletions(-) diff --git a/conf/modules/joint_germline.config b/conf/modules/joint_germline.config index c03e629b22..ead7c9a86e 100644 --- a/conf/modules/joint_germline.config +++ b/conf/modules/joint_germline.config @@ -68,11 +68,17 @@ process { withName: 'GATK4_APPLYVQSR_SNP' { ext.prefix = { "${meta.id}_SNP" } ext.args = '--truth-sensitivity-filter-level 99.9 -mode SNP' + publishDir = [ + enabled: false + ] } withName: 'GATK4_APPLYVQSR_INDEL' { ext.prefix = { "${meta.id}_INDEL" } ext.args = '--truth-sensitivity-filter-level 99.9 -mode INDEL' + publishDir = [ + enabled: false + ] } withName: 'MERGE_VQSR' { diff --git a/conf/test/tools_germline.config b/conf/test/tools_germline.config index edbcef73b8..31cb79cd93 100644 --- a/conf/test/tools_germline.config +++ b/conf/test/tools_germline.config @@ -15,6 +15,9 @@ params { fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] known_indels = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'] + known_indels_vqsr = "--resource:1000G,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.hg38.vcf.gz" + known_snps = params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'] + known_snps_vqsr = "--resource:hapmap,known=false,training=true,truth=true,prior=10.0 hapmap_3.3.hg38.vcf.gz" nucleotides_per_second = 20 step = 'variant_calling' tools = null diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf index 06010cc29f..381af40fb1 100644 --- a/modules/nf-core/gatk4/applyvqsr/main.nf +++ b/modules/nf-core/gatk4/applyvqsr/main.nf @@ -47,4 +47,16 @@ process GATK4_APPLYVQSR { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf index 6b4c2ece71..adfd1063ed 100644 --- a/modules/nf-core/gatk4/variantrecalibrator/main.nf +++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf @@ -53,4 +53,18 @@ process GATK4_VARIANTRECALIBRATOR { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.recal + touch ${prefix}.idx + touch ${prefix}.tranches + touch ${prefix}plots.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/tests/csv/3.0/recalibrated_tumoronly_joint.csv b/tests/csv/3.0/recalibrated_tumoronly_joint.csv index f3ded832ec..daf3fb3c9b 100644 --- a/tests/csv/3.0/recalibrated_tumoronly_joint.csv +++ b/tests/csv/3.0/recalibrated_tumoronly_joint.csv @@ -1,3 +1,2 @@ patient,sex,status,sample,cram,crai -test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai -test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai +test,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 45dc2b06f0..a85cc8de4c 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -1,5 +1,5 @@ - name: Run joint germline variant calling with haplotypecaller - command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results + command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --known_snps_vqsr false --known_indels_vqsr false tags: - germline - joint_germline @@ -30,3 +30,39 @@ - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi - path: results/haplotypecaller should_exist: false +- name: Run joint germline variant calling with haplotypecaller + command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run + tags: + - germline + - joint_germline + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d2dffdbd2b4f1f26a06637592d24dab3 + - path: results/multiqc + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt + # Not stable enough + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary + # Not stable enough + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count + # Not stable enough + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual + # Not stable enough + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz + - path: results/variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz.tbi + - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz + - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi + - path: results/haplotypecaller + should_exist: false From d4de7e7afecbc3c59e89ddedc5a6752b627ea09c Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 15:43:46 +0000 Subject: [PATCH 06/24] rename test --- tests/test_joint_germline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index a85cc8de4c..c8467a6bb0 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -30,7 +30,7 @@ - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi - path: results/haplotypecaller should_exist: false -- name: Run joint germline variant calling with haplotypecaller +- name: Run joint germline variant calling with haplotypecaller with Stub for VQSR command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run tags: - germline From 21392d11f502193260a5eedbda0046d24656581f Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 15:44:14 +0000 Subject: [PATCH 07/24] add tags --- tests/test_joint_germline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index c8467a6bb0..f2afd9fe30 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -36,6 +36,7 @@ - germline - joint_germline - variant_calling + - vqsr files: - path: results/csv/variantcalled.csv md5sum: d2dffdbd2b4f1f26a06637592d24dab3 From ae75902cd67ba0f90c47c70ab670d4e275d4a508 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 15:45:00 +0000 Subject: [PATCH 08/24] revert local changes to csv --- tests/csv/3.0/recalibrated_tumoronly_joint.csv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/csv/3.0/recalibrated_tumoronly_joint.csv b/tests/csv/3.0/recalibrated_tumoronly_joint.csv index daf3fb3c9b..f3ded832ec 100644 --- a/tests/csv/3.0/recalibrated_tumoronly_joint.csv +++ b/tests/csv/3.0/recalibrated_tumoronly_joint.csv @@ -1,2 +1,3 @@ patient,sex,status,sample,cram,crai -test,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai From 1e087765fa0f27647226d92c16667c318778cd0d Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 15:57:43 +0000 Subject: [PATCH 09/24] update modules --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 03e831b920..94a7fa1894 100644 --- a/modules.json +++ b/modules.json @@ -173,7 +173,7 @@ }, "gatk4/applyvqsr": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0", "installed_by": ["modules"] }, "gatk4/baserecalibrator": { @@ -283,7 +283,7 @@ }, "gatk4/variantrecalibrator": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0", "installed_by": ["modules"] }, "manta/germline": { From 03a2f1fe9f7e3836b599eefb121db8c1bceac64c Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 16:29:49 +0000 Subject: [PATCH 10/24] fix duplicate entries in vcf --- conf/modules/joint_germline.config | 9 +----- .../bam_joint_calling_germline_gatk/main.nf | 29 +++++++++---------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/conf/modules/joint_germline.config b/conf/modules/joint_germline.config index ead7c9a86e..5905c482fd 100644 --- a/conf/modules/joint_germline.config +++ b/conf/modules/joint_germline.config @@ -74,15 +74,8 @@ process { } withName: 'GATK4_APPLYVQSR_INDEL' { - ext.prefix = { "${meta.id}_INDEL" } + ext.prefix = { "joint_germline_recalibrated" } ext.args = '--truth-sensitivity-filter-level 99.9 -mode INDEL' - publishDir = [ - enabled: false - ] - } - - withName: 'MERGE_VQSR' { - ext.prefix = "joint_germline_recalibrated" publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/"}, diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 2edd71bad3..b908c99bd7 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -85,7 +85,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { fai, dict.map{ meta, dict -> [ dict ] }) - //Prepare INDELs and SNPs separately for ApplyVQSR + //Prepare SNPs and INDELs for ApplyVQSR + // Step 1. : ApplyVQSR to SNPs + // Step 2. : Use ApplyVQSR_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html // Join results of variant recalibration into a single channel tuple // Rework meta for variantscalled.csv and annotation tools @@ -94,33 +96,28 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { .join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true) .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } - // Join results of variant recalibration into a single channel tuple - // Rework meta for variantscalled.csv and annotation tools - vqsr_input_indel = vqsr_input.join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true) - .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true) - .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true) - .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } - GATK4_APPLYVQSR_SNP( vqsr_input_snp, fasta, fai, dict.map{ meta, dict -> [ dict ] }) + // Join results of ApplyVQSR_SNP and use as input for Indels to avoid duplicate entries in the result + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta - meta.subMap('id') + [ id:'joint_variant_calling' ], vcf, tbi ]} + .join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + GATK4_APPLYVQSR_INDEL( vqsr_input_indel, fasta, fai, dict.map{ meta, dict -> [ dict ] }) - vqsr_snp_vcf = GATK4_APPLYVQSR_SNP.out.vcf - vqsr_indel_vcf = GATK4_APPLYVQSR_INDEL.out.vcf - - //Merge VQSR outputs into final VCF - MERGE_VQSR(vqsr_snp_vcf.mix(vqsr_indel_vcf).groupTuple(), dict) - - genotype_vcf = MERGE_GENOTYPEGVCFS.out.vcf.mix(MERGE_VQSR.out.vcf) - genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(MERGE_VQSR.out.tbi) + genotype_vcf = MERGE_GENOTYPEGVCFS.out.vcf.mix(GATK4_APPLYVQSR_INDEL.out.vcf) + genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(GATK4_APPLYVQSR_INDEL.out.tbi) versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions) From 3dca26fd6bc5dbd682abf4f3dd6a7f69c4da320c Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 16:43:38 +0000 Subject: [PATCH 11/24] update changelof --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b2b0b0bd7..4e77df1a17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1153](https://github.com/nf-core/sarek/pull/1153) - Add input validation for Sentieon & FGBio UMI incompatibility - [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint - [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster +- [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs ### Changed @@ -21,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1157](https://github.com/nf-core/sarek/pull/1157) - Move all vep args from `ext.args` to `params.vep_custom_args` to allow easier modifications - [#1059](https://github.com/nf-core/sarek/pull/1059) - Add `nf-validation` for samplesheet validation - [#1160](https://github.com/nf-core/sarek/pull/1160) - Updating tiddit to v3.6.1 +- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor single sample filtering of Haplotypecaller generated VCFs ([#1053](https://github.com/nf-core/sarek/pull/1053)) ### Fixed @@ -30,7 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md` - [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller - [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.) -- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor Haplotyecaller subworkflows +- [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)), + fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137)) ## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare From b4a42ae96f82522a0e51236bca5441843268f061 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sat, 5 Aug 2023 21:23:32 +0000 Subject: [PATCH 12/24] fix stub test --- tests/test_joint_germline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index f2afd9fe30..571a9063db 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -31,7 +31,7 @@ - path: results/haplotypecaller should_exist: false - name: Run joint germline variant calling with haplotypecaller with Stub for VQSR - command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run + command: nextflow run main.nf -profile test_cache,tools_germline,docker --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run tags: - germline - joint_germline From 8cb9232bcc9a86252cfeabd73458e81de3d78171 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sun, 6 Aug 2023 10:21:47 +0000 Subject: [PATCH 13/24] test fix for samplesheet --- workflows/sarek.nf | 157 +++++++++++++++++++++++---------------------- 1 file changed, 81 insertions(+), 76 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index df4d6b758c..d038b30033 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -77,101 +77,106 @@ if (params.input) { ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") } -ch_from_samplesheet -.map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> - [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] -}.tap{ ch_with_patient_sample } -.groupTuple() -.map { patient_sample, ch_items -> - [ patient_sample, ch_items.size() ] -}.combine(ch_with_patient_sample, by: 0) -.map { patient_sample, num_lanes, ch_items -> - (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items - if (meta.lane && fastq_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - - def flowcell = flowcellLaneFromFastq(fastq_1) - // Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - - meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] - - if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] - else { - error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") +input_sample = ch_from_samplesheet + .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] } - - // start from BAM - } else if (meta.lane && bam) { - if (params.step != 'mapping' && !bai) { - error("BAM index (bai) should be provided.") + .tap{ ch_with_patient_sample } // save the channel + .groupTuple() //group by patient_sample to get all lanes + .map { patient_sample, ch_items -> + // get number of lanes per sample + [ patient_sample, ch_items.size() ] } - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes + .map { patient_sample, num_lanes, ch_items -> - meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } + def flowcell = flowcellLaneFromFastq(fastq_1) + // Don't use a random element for ID, it breaks resuming + def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - // recalibration - } else if (table && cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] - else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } + if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] + else { + error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } - // recalibration when skipping MarkDuplicates - } else if (table && bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] + // start from BAM + } else if (meta.lane && bam) { + if (params.step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] - else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] - // prepare_recalibration or variant_calling - } else if (cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] + if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] - else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } + // recalibration + } else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] + // recalibration when skipping MarkDuplicates + } else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } - // annotation - } else if (vcf) { - meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + // prepare_recalibration or variant_calling + } else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] - if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] - else { - error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` + } else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // annotation + } else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] + else { + error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } else { + error("Missing or unknown field in csv file header. Please check your samplesheet") + } } - } else { - error("Missing or unknown field in csv file header. Please check your samplesheet") - } -}.set { input_sample } if (params.step != 'annotate' && params.tools && !params.build_only_index) { // Two checks for ensuring that the pipeline stops with a meaningful error message if // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - ch_from_samplesheet.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples + input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples if (!params.build_only_index) { def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] def tools_tumor_asked = [] @@ -183,7 +188,7 @@ if (params.step != 'annotate' && params.tools && !params.build_only_index) { } } } - ch_from_samplesheet.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples + input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> From 32fb0f4c542a67e2b05aad4be089e78de56c8beb Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sun, 6 Aug 2023 17:47:11 +0000 Subject: [PATCH 14/24] add view for debugging [skip actions] --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index b908c99bd7..489ea578dc 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -36,7 +36,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // Map input for GenomicsDBImport // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel // Group by [0, 3] to avoid a list of metas and make sure that any intervals - + input.view() gendb_input = input .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } .groupTuple(by:3) //join on interval file From bcbc77334a2c2f6ba4af16cadef2e1fc41af6ba8 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sun, 6 Aug 2023 18:57:13 +0000 Subject: [PATCH 15/24] add intervals name to avoid random joining --- .../local/bam_joint_calling_germline_gatk/main.nf | 1 - .../bam_variant_calling_haplotypecaller/main.nf | 15 +++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 489ea578dc..0c29298fba 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -36,7 +36,6 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // Map input for GenomicsDBImport // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel // Group by [0, 3] to avoid a list of metas and make sure that any intervals - input.view() gendb_input = input .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } .groupTuple(by:3) //join on interval file diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf index 5b5da39909..6e825a784b 100644 --- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -28,7 +28,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { // Combine cram and intervals for spread and gather strategy cram_intervals = cram.combine(intervals) // Move num_intervals to meta map - .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] } + .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.simpleName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] } GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi) @@ -39,21 +39,28 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { .map{ meta, gvcf, tbi, cram, crai, intervals, dragstr_model -> [ meta, gvcf, tbi, intervals ] } // Figuring out if there is one or more vcf(s) from the same sample - haplotypecaller_vcf = GATK4_HAPLOTYPECALLER.out.vcf.branch{ + haplotypecaller_vcf = GATK4_HAPLOTYPECALLER.out.vcf.map{ + meta, vcf -> [ meta - meta.subMap('interval_name'), vcf] + } + .branch{ // Use meta.num_intervals to asses number of intervals intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 } // Figuring out if there is one or more tbi(s) from the same sample - haplotypecaller_tbi = GATK4_HAPLOTYPECALLER.out.tbi.branch{ + haplotypecaller_tbi = GATK4_HAPLOTYPECALLER.out.tbi.map{ + meta, tbi -> [ meta - meta.subMap('interval_name'), tbi] + }.branch{ // Use meta.num_intervals to asses number of intervals intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 } // Figuring out if there is one or more bam(s) from the same sample - haplotypecaller_bam = GATK4_HAPLOTYPECALLER.out.bam.branch{ + haplotypecaller_bam = GATK4_HAPLOTYPECALLER.out.bam.map{ + meta, bam -> [ meta - meta.subMap('interval_name'), bam] + }.branch{ // Use meta.num_intervals to asses number of intervals intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 From 992cb7f8b60b19c2e2e95eae5032cd83d845f510 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Sun, 6 Aug 2023 19:08:20 +0000 Subject: [PATCH 16/24] add comments --- subworkflows/local/bam_variant_calling_haplotypecaller/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf index 6e825a784b..1dbef4c613 100644 --- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -28,6 +28,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { // Combine cram and intervals for spread and gather strategy cram_intervals = cram.combine(intervals) // Move num_intervals to meta map + // Add interval_name to allow correct merging with interval files .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.simpleName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] } GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi) From a73b0b3a86542f3f3b33890e4414707a63827af7 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 15 Aug 2023 17:02:19 +0200 Subject: [PATCH 17/24] remove docker tag --- tests/test_joint_germline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 9a43cc26e3..6f503671b8 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -65,7 +65,7 @@ should_exist: false - name: Run joint germline variant calling with haplotypecaller with Stub for VQSR - command: nextflow run main.nf -profile test_cache,tools_germline,docker --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run + command: nextflow run main.nf -profile test_cache,tools_germline --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run tags: - germline - joint_germline From 1fceca9e47496d195a73a4d4817d38a9f5640e9d Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 10:57:18 +0200 Subject: [PATCH 18/24] add ugly channel magic beast --- .../bam_joint_calling_germline_gatk/main.nf | 43 ++++++++++++++++++- tests/test_joint_germline.yml | 2 +- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 0c29298fba..57abdcd9cc 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -115,8 +115,47 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { fai, dict.map{ meta, dict -> [ dict ] }) - genotype_vcf = MERGE_GENOTYPEGVCFS.out.vcf.mix(GATK4_APPLYVQSR_INDEL.out.vcf) - genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(GATK4_APPLYVQSR_INDEL.out.tbi) + + // The following is an ugly monster to achieve the following: + // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR + // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS + + // Remap both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements + merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]} + merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]} + + vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]} + vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]} + + // Join on metamap + // If both --> meta, vcf_merged, vcf_bqsr + // If not VQSR --> meta, vcf_merged, [] + // if the second is empty, use the first + genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{ + meta, joint_vcf, recal_vcf -> + + new_id = "joint_variant_calling" + vcf_out = joint_vcf + if(recal_vcf){ + new_id = "recalibrated_joint_variant_calling" + vcf_out = recal_vcf + } + + [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out] + } + + genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{ + meta, joint_tbi, recal_tbi -> + + new_id = "joint_variant_calling" + tbi_out = joint_tbi + if(recal_tbi){ + new_id = "recalibrated_joint_variant_calling" + tbi_out = recal_tbi + } + + [[id:new_id], tbi_out] + } versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 6f503671b8..1eeaa6a792 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -37,7 +37,7 @@ - germline - joint_germline - variant_calling - files: + files: - path: results/csv/variantcalled.csv md5sum: d2dffdbd2b4f1f26a06637592d24dab3 - path: results/multiqc From 9b192e7be25d5c927cb469174154d628c2f6133b Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 16 Aug 2023 09:00:20 +0000 Subject: [PATCH 19/24] [automated] Fix linting with Prettier --- tests/test_joint_germline.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 1eeaa6a792..00826ddc5d 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -101,4 +101,3 @@ - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi - path: results/haplotypecaller should_exist: false - From 8f33e7f1626a0184aec25e15f1aa15b91bfa945e Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 11:03:42 +0200 Subject: [PATCH 20/24] add missing keys --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 57abdcd9cc..452c7add6c 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -154,7 +154,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { tbi_out = recal_tbi } - [[id:new_id], tbi_out] + [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] } versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) From d70294d9dec15cd83d810f997e63c1105ac3b7ec Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 15:19:47 +0200 Subject: [PATCH 21/24] update md5sums --- tests/test_joint_germline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 00826ddc5d..084f6dff12 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -73,7 +73,7 @@ - vqsr files: - path: results/csv/variantcalled.csv - md5sum: d2dffdbd2b4f1f26a06637592d24dab3 + md5sum: 1a7e405250ac5f253197ebf4672b1f98 - path: results/multiqc - path: results/preprocessing/recalibrated/test/test.recal.cram should_exist: false From 7b2f17f8332a174e82d5332bf42b55842d96c007 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 16:06:47 +0200 Subject: [PATCH 22/24] simplify logic + fix output paths --- .../bam_joint_calling_germline_gatk/main.nf | 33 ++++++------------- tests/test_joint_germline.yml | 8 ++--- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 452c7add6c..dae4c621fd 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -93,7 +93,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { vqsr_input_snp = vqsr_input.join(VARIANTRECALIBRATOR_SNP.out.recal, failOnDuplicate: true) .join(VARIANTRECALIBRATOR_SNP.out.idx, failOnDuplicate: true) .join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true) - .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } GATK4_APPLYVQSR_SNP( vqsr_input_snp, @@ -103,11 +103,11 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // Join results of ApplyVQSR_SNP and use as input for Indels to avoid duplicate entries in the result // Rework meta for variantscalled.csv and annotation tools - vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta - meta.subMap('id') + [ id:'joint_variant_calling' ], vcf, tbi ]} + vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]} .join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true) .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true) .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true) - .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } GATK4_APPLYVQSR_INDEL( vqsr_input_indel, @@ -120,12 +120,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS - // Remap both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements - merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]} - merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]} - - vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]} - vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]} + // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements + vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} // Join on metamap // If both --> meta, vcf_merged, vcf_bqsr @@ -134,27 +131,17 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{ meta, joint_vcf, recal_vcf -> - new_id = "joint_variant_calling" - vcf_out = joint_vcf - if(recal_vcf){ - new_id = "recalibrated_joint_variant_calling" - vcf_out = recal_vcf - } + vcf_out = recal_vcf ?: joint_vcf - [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out] + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out] } genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{ meta, joint_tbi, recal_tbi -> - new_id = "joint_variant_calling" - tbi_out = joint_tbi - if(recal_tbi){ - new_id = "recalibrated_joint_variant_calling" - tbi_out = recal_tbi - } + tbi_out = recal_tbi ?: joint_tbi - [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] } versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index 084f6dff12..ba19cf5aa2 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -79,13 +79,13 @@ should_exist: false - path: results/preprocessing/recalibrated/test/test.recal.cram.crai should_exist: false - - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt + - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt # Not stable enough - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.FILTER.summary # Not stable enough - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.TsTv.count # Not stable enough - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.TsTv.qual # Not stable enough - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz # binary changes md5sums on reruns From 4aab5585fc0027252de422722175904b42626054 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 16:53:47 +0200 Subject: [PATCH 23/24] fix output paths --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index dae4c621fd..50c75b0c04 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -120,6 +120,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS + merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} @@ -144,6 +147,8 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] } + genotype_vcf.view() + versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions) versions = versions.mix(VARIANTRECALIBRATOR_SNP.out.versions) From 89163ced41f416e8819dbee38872127fc114c58f Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 16 Aug 2023 17:19:22 +0200 Subject: [PATCH 24/24] remove view statement --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 -- tests/test_joint_germline.yml | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index 50c75b0c04..f0d9148c07 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -147,8 +147,6 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] } - genotype_vcf.view() - versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions) versions = versions.mix(VARIANTRECALIBRATOR_SNP.out.versions) diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml index ba19cf5aa2..6bd7b4532f 100644 --- a/tests/test_joint_germline.yml +++ b/tests/test_joint_germline.yml @@ -73,7 +73,7 @@ - vqsr files: - path: results/csv/variantcalled.csv - md5sum: 1a7e405250ac5f253197ebf4672b1f98 + md5sum: 8513cd4aef3f54e2a72940461617c6c7 - path: results/multiqc - path: results/preprocessing/recalibrated/test/test.recal.cram should_exist: false