Skip to content

Commit

Permalink
Adding step to compress and index VCF
Browse files Browse the repository at this point in the history
  • Loading branch information
skchronicles committed Jul 8, 2024
1 parent 37094e2 commit 5912bcd
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 1 deletion.
7 changes: 7 additions & 0 deletions config/cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,12 @@
"partition": "gpu",
"time": "0-02:00:00",
"gres": "gpu:a100:4,lscratch:500"
},
"bgzip_index_vcf": {
"threads": "2",
"mem": "16g",
"partition": "norm",
"time": "0-02:00:00",
"gres": "lscratch:20"
}
}
3 changes: 2 additions & 1 deletion config/containers.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"images": {
"parabricks": "docker://nvcr.io/nvidia/clara/clara-parabricks:4.3.1-1",
"bwa": "docker://biocontainers/bwa:v0.7.17_cv1"
"bwa": "docker://biocontainers/bwa:v0.7.17_cv1",
"gatk": "docker://skchronicles/genome-seek:v0.1.0"
}
}
22 changes: 22 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ bwa_index_extensions = [
".amb", ".ann", ".bwt", ".pac", ".sa"
]

# Benchmark configurations for
# different GPU, memory, and
# performance settings
benchmark_configurations = [
"1gpu_normal_memory",
"1gpu_normal_memory_optimized",
"1gpu_low_memory",
"1gpu_high_memory",
"2gpu_normal_memory",
"2gpu_normal_memory_optimized",
"2gpu_low_memory",
"4gpu_normal_memory",
"4gpu_normal_memory_optimized"
]

# Read in resource information,
# containing information about
# threads, mem, walltimes, etc.
Expand Down Expand Up @@ -111,6 +126,13 @@ rule all:
join(workpath, "gatk_germline", "4gpu_normal_memory_optimized", "{name}", "{name}.bam"),
name=samples
),
# bgzip compress and index VCF files,
# Output files of `rule bgzip_index_vcf`
expand(
join(workpath, "gatk_germline", "{benchmark_configuration}", "{name}", "{name}.vcf.gz"),
benchmark_configuration=benchmark_configurations,
name=samples
),


# Import rules
Expand Down
34 changes: 34 additions & 0 deletions workflow/rules/germline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -638,3 +638,37 @@ rule parabricks_gatk_germline_4gpu_normal_memory_optimized:
--fq2bamfast \\
--keep-tmp
"""


rule bgzip_index_vcf:
"""Data processing step to compress the VCF file with bgzip and index it with tabix.
@Inputs:
VCF file
@Outputs:
bgzip compressed VCF file,
tabix index file
"""
input:
vcf = join(workpath, "gatk_germline", "{benchmark_configuration}", "{sample}", "{name}.vcf"),
output:
vcf = join(workpath, "gatk_germline", "{benchmark_configuration}", "{sample}", "{name}.vcf.gz"),
idx = join(workpath, "gatk_germline", "{benchmark_configuration}", "{sample}", "{name}.vcf.gz.tbi"),
params:
# Job submission parameters
rname = "bgzip_index_vcf",
mem = allocated("mem", "bgzip_index_vcf", cluster),
gres = allocated("gres", "bgzip_index_vcf", cluster),
time = allocated("time", "bgzip_index_vcf", cluster),
partition = allocated("partition", "bgzip_index_vcf", cluster),
threads: int(allocated("threads", "bgzip_index_vcf", cluster)),
container: config['images']['gatk'],
shell: """
# Bgzip compress the VCF file
bgzip \\
-c {input.vcf} \\
> {output.vcf}
# Create a tabix index
tabix \\
-p vcf \\
{output.vcf}
"""

0 comments on commit 5912bcd

Please sign in to comment.