From ade6b495112f8920db3f22c02554acdf4675a43b Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 09:57:57 +0000
Subject: [PATCH 01/24] simplify hc filtering logic

---
 .../bam_variant_calling_germline_all/main.nf  | 32 +++++++++++-----
 .../main.nf                                   | 38 ++++---------------
 2 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index 82a4991727..3703b2d5ed 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -119,22 +119,16 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
             dbsnp,
             dbsnp_tbi,
             dbsnp_vqsr,
-            known_sites_indels,
-            known_sites_indels_tbi,
-            known_indels_vqsr,
-            known_sites_snps,
-            known_sites_snps_tbi,
-            known_snps_vqsr,
-            intervals,
-            intervals_bed_combined_haplotypec,
-            ((skip_tools && skip_tools.split(',').contains('haplotypecaller_filter') || joint_germline)))
+            intervals)
 
         vcf_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.vcf
+        tbi_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.tbi
+
         versions = versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions)
 
         if (joint_germline) {
             BAM_JOINT_CALLING_GERMLINE_GATK(
-                BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.genotype_intervals,
+                BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.gvcf_tbi_intervals,
                 fasta,
                 fasta_fai,
                 dict,
@@ -150,6 +144,24 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
 
             vcf_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_GATK.out.genotype_vcf
             versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_GATK.out.versions)
+        } else {
+
+            // If single sample track, check if filtering should be done
+            if (!skip_haplotypecaller_filter) {
+
+                VCF_VARIANT_FILTERING_GATK(
+                    vcf_haplotypecaller.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true),
+                    fasta,
+                    fasta_fai,
+                    dict.map{ meta, dict -> [ dict ] },
+                    intervals_bed_combined_haplotypec,
+                    known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
+                    known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())
+
+                vcf_haplotypecaller = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf
+
+                versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
+            }
         }
     }
 
diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
index 9eee7766e4..5b5da39909 100644
--- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
+++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
@@ -5,7 +5,6 @@
 // A when clause condition is defined in the conf/modules.config to determine if the module should be run
 
 include { BAM_MERGE_INDEX_SAMTOOLS                            } from '../bam_merge_index_samtools/main'
-include { VCF_VARIANT_FILTERING_GATK                          } from '../vcf_variant_filtering_gatk/main'
 include { GATK4_HAPLOTYPECALLER                               } from '../../../modules/nf-core/gatk4/haplotypecaller/main'
 include { GATK4_MERGEVCFS            as MERGE_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/mergevcfs/main'
 
@@ -18,20 +17,12 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
     dbsnp                        // channel: [optional]
     dbsnp_tbi                    // channel: [optional]
     dbsnp_vqsr                   // channel: [optional]
-    known_sites_indels           // channel: [optional]
-    known_sites_indels_tbi       // channel: [optional]
-    known_indels_vqsr            // channel: [optional]
-    known_sites_snps             // channel: [optional]
-    known_sites_snps_tbi         // channel: [optional]
-    known_snps_vqsr              // channel: [optional]
     intervals                    // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals
-    intervals_bed_combined       // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals
-    skip_haplotypecaller_filter  // boolean: [mandatory] [default: false] skip haplotypecaller filter
 
     main:
     versions = Channel.empty()
 
-    vcf = Channel.empty()
+    vcf           = Channel.empty()
     realigned_bam = Channel.empty()
 
     // Combine cram and intervals for spread and gather strategy
@@ -42,7 +33,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
     GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi)
 
     // For joint genotyping
-    genotype_intervals = GATK4_HAPLOTYPECALLER.out.vcf
+    gvcf_tbi_intervals = GATK4_HAPLOTYPECALLER.out.vcf
         .join(GATK4_HAPLOTYPECALLER.out.tbi, failOnMismatch: true)
         .join(cram_intervals, failOnMismatch: true)
         .map{ meta, gvcf, tbi, cram, crai, intervals, dragstr_model -> [ meta, gvcf, tbi, intervals ] }
@@ -87,33 +78,18 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
 
     realigned_bam = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai
 
-    if (!skip_haplotypecaller_filter) {
-
-        VCF_VARIANT_FILTERING_GATK(
-            haplotypecaller_vcf.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true),
-            fasta,
-            fasta_fai,
-            dict.map{ meta, dict -> [ dict ] },
-            intervals_bed_combined,
-            known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
-            known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())
-
-        vcf = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf
-
-        versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
-
-    } else vcf = haplotypecaller_vcf
-
     versions = versions.mix(GATK4_HAPLOTYPECALLER.out.versions)
     versions = versions.mix(MERGE_HAPLOTYPECALLER.out.versions)
 
     // Remove no longer necessary field: num_intervals
-    vcf = vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
+    vcf = haplotypecaller_vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
+    tbi = haplotypecaller_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] }
 
     emit:
-    genotype_intervals // For joint genotyping
+    gvcf_tbi_intervals // For joint genotyping
     realigned_bam      // Optional
-    vcf                // vcf filtered or not
+    vcf                // vcf
+    tbi                // tbi
 
     versions
 }

From eabf99bec045f87514df337977d86476c23d70db Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 10:23:59 +0000
Subject: [PATCH 02/24] pull params out of sw

---
 subworkflows/local/bam_variant_calling_germline_all/main.nf | 4 +++-
 workflows/sarek.nf                                          | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index 3703b2d5ed..6ce8e45aa2 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -13,6 +13,7 @@ include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling
 include { BAM_VARIANT_CALLING_MPILEUP             } from '../bam_variant_calling_mpileup/main'
 include { BAM_VARIANT_CALLING_SINGLE_STRELKA      } from '../bam_variant_calling_single_strelka/main'
 include { BAM_VARIANT_CALLING_SINGLE_TIDDIT       } from '../bam_variant_calling_single_tiddit/main'
+include { VCF_VARIANT_FILTERING_GATK              } from '../vcf_variant_filtering_gatk/main'
 
 workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
     take:
@@ -38,6 +39,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
     known_sites_snps_tbi
     known_snps_vqsr
     joint_germline                    // boolean: [mandatory] [default: false] joint calling of germline variants
+    skip_haplotypecaller_filter       // boolean: [mandatory] [default: false] whether to filter haplotypecaller single sample vcfs
     sentieon_haplotyper_emit_mode     // channel: [mandatory] value channel with string
 
     main:
@@ -150,7 +152,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
             if (!skip_haplotypecaller_filter) {
 
                 VCF_VARIANT_FILTERING_GATK(
-                    vcf_haplotypecaller.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true),
+                    vcf_haplotypecaller.join(tbi_haplotypecaller, failOnDuplicate: true, failOnMismatch: true),
                     fasta,
                     fasta_fai,
                     dict.map{ meta, dict -> [ dict ] },
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index 669dac1bd5..df4d6b758c 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -1112,6 +1112,7 @@ workflow SAREK {
             known_sites_snps_tbi,
             known_snps_vqsr,
             params.joint_germline,
+            params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), // true if filtering should be skipped
             params.sentieon_haplotyper_emit_mode)
 
         // TUMOR ONLY VARIANT CALLING

From db7cb23ea29e3c4d600ae7b071ce022f041cf191 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 10:27:12 +0000
Subject: [PATCH 03/24] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2af6c6497..2b2b0b0bd7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
 - [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
 - [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.)
+- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor Haplotyecaller subworkflows
 
 ## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare
 

From 114f1b2fd74055a3413760ba0de40c068bea872b Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 11:40:52 +0000
Subject: [PATCH 04/24] only join on file name to make this more stable

---
 subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 6c53e15b60..2edd71bad3 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -39,8 +39,11 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
 
     gendb_input = input
         .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] }
-        .groupTuple(by:[0, 3])
-        .map{ meta, gvcf, tbi, intervals -> [ meta, gvcf, tbi, intervals, [], [] ] }
+        .groupTuple(by:3) //join on interval file
+        .map{ meta_list, gvcf, tbi, intervals ->
+            // meta is now a list of [meta1, meta2] but they are all the same. So take the first element.
+            [ meta_list[0], gvcf, tbi, intervals, [], [] ]
+        }
 
     // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport
     GATK4_GENOMICSDBIMPORT(gendb_input, false, false, false)

From 9e24c7ae42a3769f941aee095c721fca7068c6a7 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 15:42:49 +0000
Subject: [PATCH 05/24] add stub tests

---
 conf/modules/joint_germline.config            |  6 +++
 conf/test/tools_germline.config               |  3 ++
 modules/nf-core/gatk4/applyvqsr/main.nf       | 12 ++++++
 .../nf-core/gatk4/variantrecalibrator/main.nf | 14 +++++++
 .../csv/3.0/recalibrated_tumoronly_joint.csv  |  3 +-
 tests/test_joint_germline.yml                 | 38 ++++++++++++++++++-
 6 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/conf/modules/joint_germline.config b/conf/modules/joint_germline.config
index c03e629b22..ead7c9a86e 100644
--- a/conf/modules/joint_germline.config
+++ b/conf/modules/joint_germline.config
@@ -68,11 +68,17 @@ process {
     withName: 'GATK4_APPLYVQSR_SNP' {
         ext.prefix = { "${meta.id}_SNP" }
         ext.args = '--truth-sensitivity-filter-level 99.9 -mode SNP'
+        publishDir = [
+            enabled: false
+        ]
     }
 
     withName: 'GATK4_APPLYVQSR_INDEL' {
         ext.prefix    = { "${meta.id}_INDEL" }
         ext.args      = '--truth-sensitivity-filter-level 99.9 -mode INDEL'
+        publishDir = [
+            enabled: false
+        ]
     }
 
     withName: 'MERGE_VQSR' {
diff --git a/conf/test/tools_germline.config b/conf/test/tools_germline.config
index edbcef73b8..31cb79cd93 100644
--- a/conf/test/tools_germline.config
+++ b/conf/test/tools_germline.config
@@ -15,6 +15,9 @@ params {
     fasta                  = params.test_data['homo_sapiens']['genome']['genome_21_fasta']
     intervals              = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed']
     known_indels           = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz']
+    known_indels_vqsr      = "--resource:1000G,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.hg38.vcf.gz"
+    known_snps             = params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz']
+    known_snps_vqsr        = "--resource:hapmap,known=false,training=true,truth=true,prior=10.0 hapmap_3.3.hg38.vcf.gz"
     nucleotides_per_second = 20
     step                   = 'variant_calling'
     tools                  = null
diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf
index 06010cc29f..381af40fb1 100644
--- a/modules/nf-core/gatk4/applyvqsr/main.nf
+++ b/modules/nf-core/gatk4/applyvqsr/main.nf
@@ -47,4 +47,16 @@ process GATK4_APPLYVQSR {
         gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.vcf.gz
+    touch ${prefix}.vcf.gz.tbi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf
index 6b4c2ece71..adfd1063ed 100644
--- a/modules/nf-core/gatk4/variantrecalibrator/main.nf
+++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf
@@ -53,4 +53,18 @@ process GATK4_VARIANTRECALIBRATOR {
         gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.recal
+    touch ${prefix}.idx
+    touch ${prefix}.tranches
+    touch ${prefix}plots.R
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
 }
diff --git a/tests/csv/3.0/recalibrated_tumoronly_joint.csv b/tests/csv/3.0/recalibrated_tumoronly_joint.csv
index f3ded832ec..daf3fb3c9b 100644
--- a/tests/csv/3.0/recalibrated_tumoronly_joint.csv
+++ b/tests/csv/3.0/recalibrated_tumoronly_joint.csv
@@ -1,3 +1,2 @@
 patient,sex,status,sample,cram,crai
-test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai
-test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai
+test,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai
diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 45dc2b06f0..a85cc8de4c 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -1,5 +1,5 @@
 - name: Run joint germline variant calling with haplotypecaller
-  command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results
+  command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --known_snps_vqsr false --known_indels_vqsr false
   tags:
     - germline
     - joint_germline
@@ -30,3 +30,39 @@
     - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi
     - path: results/haplotypecaller
       should_exist: false
+- name: Run joint germline variant calling with haplotypecaller
+  command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run
+  tags:
+    - germline
+    - joint_germline
+    - variant_calling
+  files:
+    - path: results/csv/variantcalled.csv
+      md5sum: d2dffdbd2b4f1f26a06637592d24dab3
+    - path: results/multiqc
+    - path: results/preprocessing/recalibrated/test/test.recal.cram
+      should_exist: false
+    - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+      should_exist: false
+    - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt
+    # Not stable enough
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary
+    # Not stable enough
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count
+    # Not stable enough
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual
+    # Not stable enough
+    - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz
+    # binary changes md5sums on reruns
+    - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi
+    # binary changes md5sums on reruns
+    - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.vcf.gz
+    # binary changes md5sums on reruns
+    - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.vcf.gz.tbi
+    # binary changes md5sums on reruns
+    - path: results/variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz
+    - path: results/variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz.tbi
+    - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz
+    - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi
+    - path: results/haplotypecaller
+      should_exist: false

From d4de7e7afecbc3c59e89ddedc5a6752b627ea09c Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 15:43:46 +0000
Subject: [PATCH 06/24] rename test

---
 tests/test_joint_germline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index a85cc8de4c..c8467a6bb0 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -30,7 +30,7 @@
     - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi
     - path: results/haplotypecaller
       should_exist: false
-- name: Run joint germline variant calling with haplotypecaller
+- name: Run joint germline variant calling with haplotypecaller with Stub for VQSR
   command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run
   tags:
     - germline

From 21392d11f502193260a5eedbda0046d24656581f Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 15:44:14 +0000
Subject: [PATCH 07/24] add tags

---
 tests/test_joint_germline.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index c8467a6bb0..f2afd9fe30 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -36,6 +36,7 @@
     - germline
     - joint_germline
     - variant_calling
+    - vqsr
   files:
     - path: results/csv/variantcalled.csv
       md5sum: d2dffdbd2b4f1f26a06637592d24dab3

From ae75902cd67ba0f90c47c70ab670d4e275d4a508 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 15:45:00 +0000
Subject: [PATCH 08/24] revert local changes to csv

---
 tests/csv/3.0/recalibrated_tumoronly_joint.csv | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/csv/3.0/recalibrated_tumoronly_joint.csv b/tests/csv/3.0/recalibrated_tumoronly_joint.csv
index daf3fb3c9b..f3ded832ec 100644
--- a/tests/csv/3.0/recalibrated_tumoronly_joint.csv
+++ b/tests/csv/3.0/recalibrated_tumoronly_joint.csv
@@ -1,2 +1,3 @@
 patient,sex,status,sample,cram,crai
-test,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai
+test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai
+test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai

From 1e087765fa0f27647226d92c16667c318778cd0d Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 15:57:43 +0000
Subject: [PATCH 09/24] update modules

---
 modules.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules.json b/modules.json
index 03e831b920..94a7fa1894 100644
--- a/modules.json
+++ b/modules.json
@@ -173,7 +173,7 @@
                     },
                     "gatk4/applyvqsr": {
                         "branch": "master",
-                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0",
                         "installed_by": ["modules"]
                     },
                     "gatk4/baserecalibrator": {
@@ -283,7 +283,7 @@
                     },
                     "gatk4/variantrecalibrator": {
                         "branch": "master",
-                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0",
                         "installed_by": ["modules"]
                     },
                     "manta/germline": {

From 03a2f1fe9f7e3836b599eefb121db8c1bceac64c Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 16:29:49 +0000
Subject: [PATCH 10/24] fix duplicate entries in vcf

---
 conf/modules/joint_germline.config            |  9 +-----
 .../bam_joint_calling_germline_gatk/main.nf   | 29 +++++++++----------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/conf/modules/joint_germline.config b/conf/modules/joint_germline.config
index ead7c9a86e..5905c482fd 100644
--- a/conf/modules/joint_germline.config
+++ b/conf/modules/joint_germline.config
@@ -74,15 +74,8 @@ process {
     }
 
     withName: 'GATK4_APPLYVQSR_INDEL' {
-        ext.prefix    = { "${meta.id}_INDEL" }
+        ext.prefix    = { "joint_germline_recalibrated" }
         ext.args      = '--truth-sensitivity-filter-level 99.9 -mode INDEL'
-        publishDir = [
-            enabled: false
-        ]
-    }
-
-    withName: 'MERGE_VQSR' {
-        ext.prefix       = "joint_germline_recalibrated"
         publishDir       = [
             mode: params.publish_dir_mode,
             path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/"},
diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 2edd71bad3..b908c99bd7 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -85,7 +85,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
         fai,
         dict.map{ meta, dict -> [ dict ] })
 
-    //Prepare INDELs and SNPs separately for ApplyVQSR
+    //Prepare SNPs and INDELs for ApplyVQSR
+    // Step 1. : ApplyVQSR to SNPs
+    // Step 2. : Use ApplyVQSR_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html
 
     // Join results of variant recalibration into a single channel tuple
     // Rework meta for variantscalled.csv and annotation tools
@@ -94,33 +96,28 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
         .join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true)
         .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
 
-    // Join results of variant recalibration into a single channel tuple
-    // Rework meta for variantscalled.csv and annotation tools
-    vqsr_input_indel = vqsr_input.join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true)
-        .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true)
-        .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true)
-        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
-
     GATK4_APPLYVQSR_SNP(
         vqsr_input_snp,
         fasta,
         fai,
         dict.map{ meta, dict -> [ dict ] })
 
+    // Join results of ApplyVQSR_SNP and use as input for Indels to avoid duplicate entries in the result
+    // Rework meta for variantscalled.csv and annotation tools
+    vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta - meta.subMap('id') + [ id:'joint_variant_calling' ], vcf, tbi ]}
+        .join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true)
+        .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true)
+        .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true)
+        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
+
     GATK4_APPLYVQSR_INDEL(
         vqsr_input_indel,
         fasta,
         fai,
         dict.map{ meta, dict -> [ dict ] })
 
-    vqsr_snp_vcf = GATK4_APPLYVQSR_SNP.out.vcf
-    vqsr_indel_vcf = GATK4_APPLYVQSR_INDEL.out.vcf
-
-    //Merge VQSR outputs into final VCF
-    MERGE_VQSR(vqsr_snp_vcf.mix(vqsr_indel_vcf).groupTuple(), dict)
-
-    genotype_vcf   = MERGE_GENOTYPEGVCFS.out.vcf.mix(MERGE_VQSR.out.vcf)
-    genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(MERGE_VQSR.out.tbi)
+    genotype_vcf   = MERGE_GENOTYPEGVCFS.out.vcf.mix(GATK4_APPLYVQSR_INDEL.out.vcf)
+    genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(GATK4_APPLYVQSR_INDEL.out.tbi)
 
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)
     versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions)

From 3dca26fd6bc5dbd682abf4f3dd6a7f69c4da320c Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 16:43:38 +0000
Subject: [PATCH 11/24] update changelof

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b2b0b0bd7..4e77df1a17 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1153](https://github.com/nf-core/sarek/pull/1153) - Add input validation for Sentieon & FGBio UMI incompatibility
 - [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint
 - [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster
+- [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs
 
 ### Changed
 
@@ -21,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1157](https://github.com/nf-core/sarek/pull/1157) - Move all vep args from `ext.args` to `params.vep_custom_args` to allow easier modifications
 - [#1059](https://github.com/nf-core/sarek/pull/1059) - Add `nf-validation` for samplesheet validation
 - [#1160](https://github.com/nf-core/sarek/pull/1160) - Updating tiddit to v3.6.1
+- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor single sample filtering of Haplotypecaller generated VCFs ([#1053](https://github.com/nf-core/sarek/pull/1053))
 
 ### Fixed
 
@@ -30,7 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
 - [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
 - [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.)
-- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor Haplotyecaller subworkflows
+- [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)),
+  fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137))
 
 ## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare
 

From b4a42ae96f82522a0e51236bca5441843268f061 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sat, 5 Aug 2023 21:23:32 +0000
Subject: [PATCH 12/24] fix stub test

---
 tests/test_joint_germline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index f2afd9fe30..571a9063db 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -31,7 +31,7 @@
     - path: results/haplotypecaller
       should_exist: false
 - name: Run joint germline variant calling with haplotypecaller with Stub for VQSR
-  command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results --stub_run
+  command: nextflow run main.nf -profile test_cache,tools_germline,docker --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run
   tags:
     - germline
     - joint_germline

From 8cb9232bcc9a86252cfeabd73458e81de3d78171 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sun, 6 Aug 2023 10:21:47 +0000
Subject: [PATCH 13/24] test fix for samplesheet

---
 workflows/sarek.nf | 157 +++++++++++++++++++++++----------------------
 1 file changed, 81 insertions(+), 76 deletions(-)

diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index df4d6b758c..d038b30033 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -77,101 +77,106 @@ if (params.input) {
     ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart")
 }
 
-ch_from_samplesheet
-.map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller ->
-    [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ]
-}.tap{ ch_with_patient_sample }
-.groupTuple()
-.map { patient_sample, ch_items ->
-    [ patient_sample, ch_items.size() ]
-}.combine(ch_with_patient_sample, by: 0)
-.map { patient_sample, num_lanes, ch_items ->
-    (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items
-    if (meta.lane && fastq_2) {
-        meta           = meta + [id: "${meta.sample}-${meta.lane}".toString()]
-        def CN         = params.seq_center ? "CN:${params.seq_center}\\t" : ''
-
-        def flowcell   = flowcellLaneFromFastq(fastq_1)
-        // Don't use a random element for ID, it breaks resuming
-        def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
-
-        meta           = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1]
-
-        if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ]
-        else {
-            error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+input_sample = ch_from_samplesheet
+        .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller ->
+            // generate patient_sample key to group lanes together
+            [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ]
         }
-
-    // start from BAM
-    } else if (meta.lane && bam) {
-        if (params.step != 'mapping' && !bai) {
-            error("BAM index (bai) should be provided.")
+        .tap{ ch_with_patient_sample } // save the channel
+        .groupTuple() //group by patient_sample to get all lanes
+        .map { patient_sample, ch_items ->
+            // get number of lanes per sample
+            [ patient_sample, ch_items.size() ]
         }
-        meta            = meta + [id: "${meta.sample}-${meta.lane}".toString()]
-        def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
-        def read_group  = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
+        .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes
+        .map { patient_sample, num_lanes, ch_items ->
 
-        meta            = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1]
+            (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items
+            if (meta.lane && fastq_2) {
+                meta           = meta + [id: "${meta.sample}-${meta.lane}".toString()]
+                def CN         = params.seq_center ? "CN:${params.seq_center}\\t" : ''
 
-        if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ]
-        else {
-            error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
-        }
+                def flowcell   = flowcellLaneFromFastq(fastq_1)
+                // Don't use a random element for ID, it breaks resuming
+                def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
 
-    // recalibration
-    } else if (table && cram) {
-        meta = meta + [id: meta.sample, data_type: 'cram']
+                meta           = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1]
 
-        if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ]
-        else {
-            error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
-        }
+                if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ]
+                else {
+                    error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
 
-    // recalibration when skipping MarkDuplicates
-    } else if (table && bam) {
-        meta = meta + [id: meta.sample, data_type: 'bam']
+            // start from BAM
+            } else if (meta.lane && bam) {
+                if (params.step != 'mapping' && !bai) {
+                    error("BAM index (bai) should be provided.")
+                }
+                meta            = meta + [id: "${meta.sample}-${meta.lane}".toString()]
+                def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
+                def read_group  = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
 
-        if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ]
-        else {
-            error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
-        }
+                meta            = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1]
 
-    // prepare_recalibration or variant_calling
-    } else if (cram) {
-        meta = meta + [id: meta.sample, data_type: 'cram']
+                if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ]
+                else {
+                    error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
 
-        if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ]
-        else {
-            error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
-        }
+            // recalibration
+            } else if (table && cram) {
+                meta = meta + [id: meta.sample, data_type: 'cram']
+
+                if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ]
+                else {
+                    error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
 
-    // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates`
-    } else if (bam) {
-        meta = meta + [id: meta.sample, data_type: 'bam']
+            // recalibration when skipping MarkDuplicates
+            } else if (table && bam) {
+                meta = meta + [id: meta.sample, data_type: 'bam']
 
-        if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ]
-        else {
-            error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
-        }
+                if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ]
+                else {
+                    error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
 
-    // annotation
-    } else if (vcf) {
-        meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: '']
+            // prepare_recalibration or variant_calling
+            } else if (cram) {
+                meta = meta + [id: meta.sample, data_type: 'cram']
 
-        if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ]
-        else {
-            error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ]
+                else {
+                    error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
+
+            // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates`
+            } else if (bam) {
+                meta = meta + [id: meta.sample, data_type: 'bam']
+
+                if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ]
+                else {
+                    error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
+
+            // annotation
+            } else if (vcf) {
+                meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: '']
+
+                if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ]
+                else {
+                    error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations")
+                }
+            } else {
+                error("Missing or unknown field in csv file header. Please check your samplesheet")
+            }
         }
-    } else {
-        error("Missing or unknown field in csv file header. Please check your samplesheet")
-    }
-}.set { input_sample }
 
 if (params.step != 'annotate' && params.tools && !params.build_only_index) {
     // Two checks for ensuring that the pipeline stops with a meaningful error message if
     // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and
     // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples.
-    ch_from_samplesheet.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples
+    input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples
         if (!params.build_only_index) {
             def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro']
             def tools_tumor_asked = []
@@ -183,7 +188,7 @@ if (params.step != 'annotate' && params.tools && !params.build_only_index) {
             }
         }
     }
-    ch_from_samplesheet.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples
+    input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples
         def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro']
         def requested_tools_requiring_normal_samples = []
         tools_requiring_normal_samples.each{ tool_requiring_normal_samples ->

From 32fb0f4c542a67e2b05aad4be089e78de56c8beb Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sun, 6 Aug 2023 17:47:11 +0000
Subject: [PATCH 14/24] add view for debugging [skip actions]

---
 subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index b908c99bd7..489ea578dc 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -36,7 +36,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     // Map input for GenomicsDBImport
     // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel
     // Group by [0, 3] to avoid a list of metas and make sure that any intervals
-
+    input.view()
     gendb_input = input
         .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] }
         .groupTuple(by:3) //join on interval file

From bcbc77334a2c2f6ba4af16cadef2e1fc41af6ba8 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sun, 6 Aug 2023 18:57:13 +0000
Subject: [PATCH 15/24] add intervals name to avoid random joining

---
 .../local/bam_joint_calling_germline_gatk/main.nf |  1 -
 .../bam_variant_calling_haplotypecaller/main.nf   | 15 +++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 489ea578dc..0c29298fba 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -36,7 +36,6 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     // Map input for GenomicsDBImport
     // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel
     // Group by [0, 3] to avoid a list of metas and make sure that any intervals
-    input.view()
     gendb_input = input
         .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] }
         .groupTuple(by:3) //join on interval file
diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
index 5b5da39909..6e825a784b 100644
--- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
+++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
@@ -28,7 +28,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
     // Combine cram and intervals for spread and gather strategy
     cram_intervals = cram.combine(intervals)
         // Move num_intervals to meta map
-        .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] }
+        .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.simpleName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] }
 
     GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi)
 
@@ -39,21 +39,28 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
         .map{ meta, gvcf, tbi, cram, crai, intervals, dragstr_model -> [ meta, gvcf, tbi, intervals ] }
 
     // Figuring out if there is one or more vcf(s) from the same sample
-    haplotypecaller_vcf = GATK4_HAPLOTYPECALLER.out.vcf.branch{
+    haplotypecaller_vcf = GATK4_HAPLOTYPECALLER.out.vcf.map{
+            meta, vcf -> [ meta - meta.subMap('interval_name'), vcf]
+        }
+        .branch{
         // Use meta.num_intervals to asses number of intervals
             intervals:    it[0].num_intervals > 1
             no_intervals: it[0].num_intervals <= 1
         }
 
     // Figuring out if there is one or more tbi(s) from the same sample
-    haplotypecaller_tbi = GATK4_HAPLOTYPECALLER.out.tbi.branch{
+    haplotypecaller_tbi = GATK4_HAPLOTYPECALLER.out.tbi.map{
+            meta, tbi -> [ meta - meta.subMap('interval_name'), tbi]
+        }.branch{
         // Use meta.num_intervals to asses number of intervals
             intervals:    it[0].num_intervals > 1
             no_intervals: it[0].num_intervals <= 1
         }
 
     // Figuring out if there is one or more bam(s) from the same sample
-    haplotypecaller_bam = GATK4_HAPLOTYPECALLER.out.bam.branch{
+    haplotypecaller_bam = GATK4_HAPLOTYPECALLER.out.bam.map{
+            meta, bam -> [ meta - meta.subMap('interval_name'), bam]
+        }.branch{
         // Use meta.num_intervals to asses number of intervals
             intervals:    it[0].num_intervals > 1
             no_intervals: it[0].num_intervals <= 1

From 992cb7f8b60b19c2e2e95eae5032cd83d845f510 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sun, 6 Aug 2023 19:08:20 +0000
Subject: [PATCH 16/24] add comments

---
 subworkflows/local/bam_variant_calling_haplotypecaller/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
index 6e825a784b..1dbef4c613 100644
--- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
+++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
@@ -28,6 +28,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
     // Combine cram and intervals for spread and gather strategy
     cram_intervals = cram.combine(intervals)
         // Move num_intervals to meta map
+        // Add interval_name to allow correct merging with interval files
         .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.simpleName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] }
 
     GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi)

From a73b0b3a86542f3f3b33890e4414707a63827af7 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 15 Aug 2023 17:02:19 +0200
Subject: [PATCH 17/24] remove docker tag

---
 tests/test_joint_germline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 9a43cc26e3..6f503671b8 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -65,7 +65,7 @@
       should_exist: false
 
 - name: Run joint germline variant calling with haplotypecaller with Stub for VQSR
-  command: nextflow run main.nf -profile test_cache,tools_germline,docker --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run
+  command: nextflow run main.nf -profile test_cache,tools_germline --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --step variant_calling --joint_germline --outdir results -stub-run
   tags:
     - germline
     - joint_germline

From 1fceca9e47496d195a73a4d4817d38a9f5640e9d Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 10:57:18 +0200
Subject: [PATCH 18/24] add ugly channel magic beast

---
 .../bam_joint_calling_germline_gatk/main.nf   | 43 ++++++++++++++++++-
 tests/test_joint_germline.yml                 |  2 +-
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 0c29298fba..57abdcd9cc 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -115,8 +115,47 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
         fai,
         dict.map{ meta, dict -> [ dict ] })
 
-    genotype_vcf   = MERGE_GENOTYPEGVCFS.out.vcf.mix(GATK4_APPLYVQSR_INDEL.out.vcf)
-    genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(GATK4_APPLYVQSR_INDEL.out.tbi)
+
+    // The following is an ugly monster to achieve the following:
+    // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR
+    // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS
+
+    // Remap both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
+    merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]}
+    merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]}
+
+    vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]}
+    vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]}
+
+    // Join on metamap
+    // If both --> meta, vcf_merged, vcf_bqsr
+    // If not VQSR --> meta, vcf_merged, []
+    // if the second is empty, use the first
+    genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{
+        meta, joint_vcf, recal_vcf ->
+
+        new_id = "joint_variant_calling"
+        vcf_out = joint_vcf
+        if(recal_vcf){
+            new_id = "recalibrated_joint_variant_calling"
+            vcf_out = recal_vcf
+        }
+
+        [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out]
+    }
+
+    genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{
+        meta, joint_tbi, recal_tbi ->
+
+        new_id = "joint_variant_calling"
+        tbi_out = joint_tbi
+        if(recal_tbi){
+            new_id = "recalibrated_joint_variant_calling"
+            tbi_out = recal_tbi
+        }
+
+        [[id:new_id], tbi_out]
+    }
 
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)
     versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions)
diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 6f503671b8..1eeaa6a792 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -37,7 +37,7 @@
     - germline
     - joint_germline
     - variant_calling
-   files:
+  files:
     - path: results/csv/variantcalled.csv
       md5sum: d2dffdbd2b4f1f26a06637592d24dab3
     - path: results/multiqc

From 9b192e7be25d5c927cb469174154d628c2f6133b Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Wed, 16 Aug 2023 09:00:20 +0000
Subject: [PATCH 19/24] [automated] Fix linting with Prettier

---
 tests/test_joint_germline.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 1eeaa6a792..00826ddc5d 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -101,4 +101,3 @@
     - path: results/variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi
     - path: results/haplotypecaller
       should_exist: false
-

From 8f33e7f1626a0184aec25e15f1aa15b91bfa945e Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 11:03:42 +0200
Subject: [PATCH 20/24] add missing keys

---
 subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 57abdcd9cc..452c7add6c 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -154,7 +154,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
             tbi_out = recal_tbi
         }
 
-        [[id:new_id], tbi_out]
+        [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out]
     }
 
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)

From d70294d9dec15cd83d810f997e63c1105ac3b7ec Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 15:19:47 +0200
Subject: [PATCH 21/24] update md5sums

---
 tests/test_joint_germline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 00826ddc5d..084f6dff12 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -73,7 +73,7 @@
     - vqsr
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: d2dffdbd2b4f1f26a06637592d24dab3
+      md5sum: 1a7e405250ac5f253197ebf4672b1f98
     - path: results/multiqc
     - path: results/preprocessing/recalibrated/test/test.recal.cram
       should_exist: false

From 7b2f17f8332a174e82d5332bf42b55842d96c007 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 16:06:47 +0200
Subject: [PATCH 22/24] simplify logic + fix output paths

---
 .../bam_joint_calling_germline_gatk/main.nf   | 33 ++++++-------------
 tests/test_joint_germline.yml                 |  8 ++---
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 452c7add6c..dae4c621fd 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -93,7 +93,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     vqsr_input_snp = vqsr_input.join(VARIANTRECALIBRATOR_SNP.out.recal, failOnDuplicate: true)
         .join(VARIANTRECALIBRATOR_SNP.out.idx, failOnDuplicate: true)
         .join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true)
-        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
+        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
 
     GATK4_APPLYVQSR_SNP(
         vqsr_input_snp,
@@ -103,11 +103,11 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
 
     // Join results of ApplyVQSR_SNP and use as input for Indels to avoid duplicate entries in the result
     // Rework meta for variantscalled.csv and annotation tools
-    vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta - meta.subMap('id') + [ id:'joint_variant_calling' ], vcf, tbi ]}
+    vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]}
         .join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true)
         .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true)
         .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true)
-        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
+        .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
 
     GATK4_APPLYVQSR_INDEL(
         vqsr_input_indel,
@@ -120,12 +120,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR
     // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS
 
-    // Remap both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
-    merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]}
-    merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]}
-
-    vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'recalibrated_joint_variant_calling'] , vcf]}
-    vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'recalibrated_joint_variant_calling'] , tbi]}
+    // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
+    vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
+    vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
 
     // Join on metamap
     // If both --> meta, vcf_merged, vcf_bqsr
@@ -134,27 +131,17 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{
         meta, joint_vcf, recal_vcf ->
 
-        new_id = "joint_variant_calling"
-        vcf_out = joint_vcf
-        if(recal_vcf){
-            new_id = "recalibrated_joint_variant_calling"
-            vcf_out = recal_vcf
-        }
+        vcf_out = recal_vcf ?: joint_vcf
 
-        [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out]
+        [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out]
     }
 
     genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{
         meta, joint_tbi, recal_tbi ->
 
-        new_id = "joint_variant_calling"
-        tbi_out = joint_tbi
-        if(recal_tbi){
-            new_id = "recalibrated_joint_variant_calling"
-            tbi_out = recal_tbi
-        }
+        tbi_out = recal_tbi ?: joint_tbi
 
-        [[id:new_id, patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out]
+        [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out]
     }
 
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)
diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index 084f6dff12..ba19cf5aa2 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -79,13 +79,13 @@
       should_exist: false
     - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
       should_exist: false
-    - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt
+    - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt
     # Not stable enough
-    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.FILTER.summary
     # Not stable enough
-    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.TsTv.count
     # Not stable enough
-    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual
+    - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline_recalibrated.TsTv.qual
     # Not stable enough
     - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz
     # binary changes md5sums on reruns

From 4aab5585fc0027252de422722175904b42626054 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 16:53:47 +0200
Subject: [PATCH 23/24] fix output paths

---
 subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index dae4c621fd..50c75b0c04 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -120,6 +120,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
     // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR
     // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS
 
+    merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
+    merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
+
     // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
     vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
     vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
@@ -144,6 +147,8 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
         [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out]
     }
 
+    genotype_vcf.view()
+
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)
     versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions)
     versions = versions.mix(VARIANTRECALIBRATOR_SNP.out.versions)

From 89163ced41f416e8819dbee38872127fc114c58f Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 16 Aug 2023 17:19:22 +0200
Subject: [PATCH 24/24] remove view statement

---
 subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 --
 tests/test_joint_germline.yml                              | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
index 50c75b0c04..f0d9148c07 100644
--- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
+++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf
@@ -147,8 +147,6 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
         [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out]
     }
 
-    genotype_vcf.view()
-
     versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions)
     versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions)
     versions = versions.mix(VARIANTRECALIBRATOR_SNP.out.versions)
diff --git a/tests/test_joint_germline.yml b/tests/test_joint_germline.yml
index ba19cf5aa2..6bd7b4532f 100644
--- a/tests/test_joint_germline.yml
+++ b/tests/test_joint_germline.yml
@@ -73,7 +73,7 @@
     - vqsr
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: 1a7e405250ac5f253197ebf4672b1f98
+      md5sum: 8513cd4aef3f54e2a72940461617c6c7
     - path: results/multiqc
     - path: results/preprocessing/recalibrated/test/test.recal.cram
       should_exist: false