diff --git a/config/cluster.json b/config/cluster.json index 000d26e..efbf169 100644 --- a/config/cluster.json +++ b/config/cluster.json @@ -61,5 +61,19 @@ "partition": "gpu", "time": "0-04:00:00", "gres": "gpu:a100:2,lscratch:500" + }, + "4-gpu_normal-memory": { + "threads": "64", + "mem": "240g", + "partition": "gpu", + "time": "0-02:00:00", + "gres": "gpu:a100:4,lscratch:500" + }, + "4-gpu_normal-memory_optimized": { + "threads": "64", + "mem": "240g", + "partition": "gpu", + "time": "0-02:00:00", + "gres": "gpu:a100:4,lscratch:500" } } \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 1a67eb1..88a95ff 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -96,6 +96,21 @@ rule all: join(workpath, "gatk_germline", "2gpu_low_memory", "{name}", "{name}.bam"), name=samples ), + # Run Parabricks germline pipeline with 4 GPU and normal memory, + # Output files of `rule parabricks_gatk_germline_4gpu_normal_memory` + # in file `rules/germline.smk` + expand( + join(workpath, "gatk_germline", "4gpu_normal_memory", "{name}", "{name}.bam"), + name=samples + ), + # Run Parabricks germline pipeline with 4 GPU and normal memory and + # a recommended set of option to gain the best performance/runtimes, + # Output files of `rule parabricks_gatk_germline_4gpu_normal_memory_optimized` + # in file `rules/germline.smk` + expand( + join(workpath, "gatk_germline", "4gpu_normal_memory_optimized", "{name}", "{name}.bam"), + name=samples + ), # Import rules diff --git a/workflow/rules/germline.smk b/workflow/rules/germline.smk index 203f129..02370b4 100644 --- a/workflow/rules/germline.smk +++ b/workflow/rules/germline.smk @@ -93,7 +93,8 @@ rule parabricks_gatk_germline_1gpu_normal_memory_optimized: r2 = join(workpath,"{name}.R2.fastq.gz"), output: bam = join(workpath, "gatk_germline", "1gpu_normal_memory_optimized", "{sample}", "{name}.bam"), - gvcf = join(workpath, "gatk_germline", "1gpu_normal_memory_optimized", "{sample}", "{name}.g.vcf.gz"), + # Gzipped VCF cannot be created with --run-partition option + gvcf = join(workpath, "gatk_germline", "1gpu_normal_memory_optimized", "{sample}", "{name}.g.vcf"), recal = join(workpath, "gatk_germline", "1gpu_normal_memory_optimized", "{sample}", "{name}.recal"), params: # Rule specific parameters @@ -375,7 +376,8 @@ rule parabricks_gatk_germline_2gpu_normal_memory_optimized: r2 = join(workpath,"{name}.R2.fastq.gz"), output: bam = join(workpath, "gatk_germline", "2gpu_normal_memory_optimized", "{sample}", "{name}.bam"), - gvcf = join(workpath, "gatk_germline", "2gpu_normal_memory_optimized", "{sample}", "{name}.g.vcf.gz"), + # Gzipped VCF cannot be created with --run-partition option + gvcf = join(workpath, "gatk_germline", "2gpu_normal_memory_optimized", "{sample}", "{name}.g.vcf"), recal = join(workpath, "gatk_germline", "2gpu_normal_memory_optimized", "{sample}", "{name}.recal"), params: # Rule specific parameters @@ -498,3 +500,151 @@ rule parabricks_gatk_germline_2gpu_low_memory: --low-memory \\ --htvc-low-memory """ + +# Rule utilizing 4 A100 GPUs, +# NOTE: each A100 node has 4 GPUs +# so we are allocating an entire +# node for this rule +rule parabricks_gatk_germline_4gpu_normal_memory: + """Benchmarking Parabricks GATK Germline pipeline with 4 GPU and an normal allotment + of main memory. NOTE: The limit memory option needs to be toned down to allow for + sufficent system memory to be available for the GPU. Internal testing has shown that + parabricks germline pipeline tends to use more than the allocated memory (even with + the --memory-limit option). + @Inputs: + GIAB Sample fastq file (scatter-per-sample). + @Outputs: + BAM file, + GVCF file, + Recal table + """ + input: + idxs = expand(join(workpath, "refs", genome + "{ext}"), ext=bwa_index_extensions), + lnk = join(workpath, "refs", genome), + r1 = join(workpath,"{name}.R1.fastq.gz"), + r2 = join(workpath,"{name}.R2.fastq.gz"), + output: + bam = join(workpath, "gatk_germline", "4gpu_normal_memory", "{sample}", "{name}.bam"), + gvcf = join(workpath, "gatk_germline", "4gpu_normal_memory", "{sample}", "{name}.g.vcf.gz"), + recal = join(workpath, "gatk_germline", "4gpu_normal_memory", "{sample}", "{name}.recal"), + params: + # Rule specific parameters + sample = "{name}", + # Job submission parameters + rname = "pb_germline_4gpu_normal_memory", + mem = allocated("mem", "4-gpu_normal-memory", cluster), + gres = allocated("gres", "4-gpu_normal-memory", cluster), + time = allocated("time", "4-gpu_normal-memory", cluster), + partition = allocated("partition", "4-gpu_normal-memory", cluster), + # Singularity options + bindpaths = ','.join(bindpaths), + tmpdir = tmpdir, + sif = config['images']['parabricks'], + # Parabricks options + RUNNING_MEMORY_GB = int( + allocated("mem", "4-gpu_normal-memory", cluster).lower().rstrip("g") + ) - 12 , + KNOWN_INDELS_1 = config['references']['GATK_KNOWN_INDELS'], + KNOWN_INDELS_2 = config['references']['OTHER_KNOWN_INDELS'], + threads: int(allocated("threads", "4-gpu_normal-memory", cluster)) + shell: """ + # Run Parabricks germline pipeline with + # default acceleration options + singularity exec \\ + -c \\ + --nv \\ + -B {params.bindpaths},{params.tmpdir}:/tmp \\ + {params.sif} \\ + pbrun germline \\ + --ref {input.lnk} \\ + --in-fq {input.r1} {input.r1} "@RG\\tID:{params.sample}\\tSM:{params.sample}\\tPL:illumina\\tLB:{params.sample}\\tPU:{params.sample}\\tCN:ncbr\\tDS:wgs" \\ + --knownSites {params.KNOWN_INDELS_1} \\ + --knownSites {params.KNOWN_INDELS_2} \\ + --out-bam {output.bam} \\ + --out-variants {output.gvcf} \\ + --out-recal-file {output.recal} \\ + --gvcf \\ + --bwa-options="-M" \\ + --monitor-usage \\ + --memory-limit {params.RUNNING_MEMORY_GB} \\ + --tmp-dir /tmp + """ + + +rule parabricks_gatk_germline_4gpu_normal_memory_optimized: + """Benchmarking Parabricks GATK Germline pipeline with 4 GPU and an normal allotment + of main memory using the recommended set of option to gain the best performance. + NOTE: The limit memory option needs to be toned down to allow for sufficent system + memory to be available for the GPU. Internal testing has shown that parabricks + germline pipeline tends to use more than the allocated memory (even with the + --memory-limit option). + @Inputs: + GIAB Sample fastq file (scatter-per-sample). + @Outputs: + BAM file, + GVCF file, + Recal table + """ + input: + idxs = expand(join(workpath, "refs", genome + "{ext}"), ext=bwa_index_extensions), + lnk = join(workpath, "refs", genome), + r1 = join(workpath,"{name}.R1.fastq.gz"), + r2 = join(workpath,"{name}.R2.fastq.gz"), + output: + bam = join(workpath, "gatk_germline", "4gpu_normal_memory_optimized", "{sample}", "{name}.bam"), + # Gzipped VCF cannot be created with --run-partition option + gvcf = join(workpath, "gatk_germline", "4gpu_normal_memory_optimized", "{sample}", "{name}.g.vcf"), + recal = join(workpath, "gatk_germline", "4gpu_normal_memory_optimized", "{sample}", "{name}.recal"), + params: + # Rule specific parameters + sample = "{name}", + # Job submission parameters + rname = "pb_germline_4gpu_normal_memory_optimized", + mem = allocated("mem", "4-gpu_normal-memory_optimized", cluster), + gres = allocated("gres", "4-gpu_normal-memory_optimized", cluster), + time = allocated("time", "4-gpu_normal-memory_optimized", cluster), + partition = allocated("partition", "4-gpu_normal-memory_optimized", cluster), + # Singularity options + bindpaths = ','.join(bindpaths), + tmpdir = tmpdir, + sif = config['images']['parabricks'], + # Parabricks options + RUNNING_MEMORY_GB = int( + allocated("mem", "4-gpu_normal-memory_optimized", cluster).lower().rstrip("g") + ) - 12 , + KNOWN_INDELS_1 = config['references']['GATK_KNOWN_INDELS'], + KNOWN_INDELS_2 = config['references']['OTHER_KNOWN_INDELS'], + threads: int(allocated("threads", "4-gpu_normal-memory_optimized", cluster)) + shell: """ + # Run Parabricks germline pipeline with + # default acceleration options and the + # recommended set of options for best + # performance + singularity exec \\ + -c \\ + --nv \\ + --env TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=268435456 \\ + -B {params.bindpaths},{params.tmpdir}:/tmp \\ + {params.sif} \\ + pbrun germline \\ + --ref {input.lnk} \\ + --in-fq {input.r1} {input.r1} "@RG\\tID:{params.sample}\\tSM:{params.sample}\\tPL:illumina\\tLB:{params.sample}\\tPU:{params.sample}\\tCN:ncbr\\tDS:wgs" \\ + --knownSites {params.KNOWN_INDELS_1} \\ + --knownSites {params.KNOWN_INDELS_2} \\ + --out-bam {output.bam} \\ + --out-variants {output.gvcf} \\ + --out-recal-file {output.recal} \\ + --gvcf \\ + --bwa-options="-M" \\ + --monitor-usage \\ + --memory-limit {params.RUNNING_MEMORY_GB} \\ + --tmp-dir /tmp \\ + --num-cpu-threads-per-stage {threads} \\ + --bwa-cpu-thread-pool {threads} \\ + --run-partition \\ + --read-from-tmp-dir \\ + --gpusort \\ + --gpuwrite \\ + --fq2bamfast \\ + --keep-tmp + """