Skip to content

Commit

Permalink
Merge pull request #106 from sanjaynagi/fix-parabricks-support-16-09-24
Browse files Browse the repository at this point in the history
Fixing GPU support with parabricks
  • Loading branch information
sanjaynagi authored Sep 16, 2024
2 parents 9526191 + a4a34dd commit 11f63c6
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 29 deletions.
105 changes: 78 additions & 27 deletions workflow/rules/star-haplotypecaller.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@ rule star_index:
fasta=config["reference"]["genome"].rstrip(".gz"),
gff= config['reference']['gff'],
output:
directory("resources/reference/star_index"),
index = directory("resources/reference/star_index"),
threads: 8
log:
"logs/star_index/index.log",
params:
extra="--sjdbGTFtagExonParentTranscript Parent"
shell:
"""
mkdir -p {output.index}
STAR \
--runThreadN {threads} \
--runMode genomeGenerate \
--genomeFastaFiles {input.fasta} \
--sjdbGTFfile {input.gff} \
--sjdbGTFtagExonParentTranscript Parent \
--genomeDir {output} 2> {log} \
{params.extra} \
--genomeDir {output.index} \
--outFileNamePrefix {output.index}/ \
> {log} 2>&1
"""

rule fq2bam:
Expand All @@ -31,62 +36,110 @@ rule fq2bam:
star_index= "resources/reference/star_index",
output:
bam="results/alignments/{sample}.star.bam",
recal="results/alignments/recal_gpu_{sample}.txt"
bai="results/alignments/{sample}.star.bam.bai"
# recal="results/alignments/recal_gpu_{sample}.txt"
log:
"logs/parabricks/fq2bam-{sample}.log",
threads: 1
resources:
all_gpus = 1
params:
wkdir=wkdir
shell:
"""
docker run --rm --gpus all --volume {wkdir} --volume {wkdir}/results/alignments/star \
--workdir {wkdir} \
nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
docker run --user 1006:1006 --rm --gpus all --volume {wkdir}:/workdir --workdir /workdir nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
pbrun rna_fq2bam \
--in-fq {input.reads} \
--genome-lib-dir resources/reference/star_index\
--output-dir results/alignments/star \
--ref {input.ref_fasta} \
--out-bam {output.bam} \
--out-recal-file results/alignments/recal_gpu_{wildcards.sample}.txt \
--read-files-command zcat
--in-fq /workdir/{input.reads[0]} /workdir/{input.reads[1]} \
--genome-lib-dir /workdir/resources/reference/star_index\
--output-dir /workdir \
--ref /workdir/{input.ref_fasta} \
--out-bam /workdir/{output.bam} \
--read-files-command zcat 2> {log}
"""


rule haplotype_caller:
"""
Run a GPU-accelerated haplotypeCaller algorithm.
"""
input:
bam = "results/alignments/{sample}.star.bam",
ref_fasta=config["reference"]["genome"].rstrip(".gz"),
recal="results/alignments/recal_gpu_{sample}.txt",
# recal="results/alignments/recal_gpu_{sample}.txt",
output:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf"
log:
"logs/parabricks/haplotypecaller-{contig}-{sample}.log",
threads: 1
resources:
all_gpus = 1
params:
wkdir=wkdir,
ploidy=config['VariantAnalysis']['ploidy']
shell:
"""
docker run --rm --gpus all --volume {wkdir} --volume {wkdir}
-w {wkdir} \
nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
docker run --user 1006:1006 --rm --gpus all --volume {wkdir}:/workdir -w /workdir nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
pbrun haplotypecaller \
--ref {input.ref_fasta} \
--intervals {wildcards.contig} \
--in-bam {input.bam} \
--in-recal-file {input.recal} \
--out-variants {output.vcf} \
--ploidy {params.ploidy}
--rna \
--ref /workdir/{input.ref_fasta} \
--interval {wildcards.contig} \
--in-bam /workdir/{input.bam} \
--out-variants /workdir/{output.vcf} \
--ploidy {params.ploidy} 2> {log}
"""


rule create_dict:
input:
ref_fasta=config["reference"]["genome"].rstrip(".gz"),
output:
ref_dict=config["reference"]["genome"].rstrip(".gz").rstrip(".fa").rstrip(".fasta") + ".dict",
log:
"logs/gatk/create_dict.log",
params:
extra="", # optional: extra arguments for picard.
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024,
wrapper:
"v4.3.0/bio/picard/createsequencedictionary"


rule bgzip:
input:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf",
output:
tbi="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz",
params:
extra="", # optional
threads: 1
log:
"logs/gatk/bgzip/{contig}.{sample}.log",
wrapper:
"v4.3.0-25-g7d3aa9d/bio/bgzip"

rule tabix:
input:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz",
output:
tbi="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz.tbi",
log:
"logs/gatk/tabix/{contig}.{sample}.log",
params:
"-p vcf",
wrapper:
"v4.3.0/bio/tabix/index"

rule combine_calls:
input:
ref=config['reference']['genome'].rstrip(".gz"),
gvcfs=expand(
"results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf", sample=samples
"results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf.gz", sample=samples
),
tbi=expand("results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf.gz.tbi", sample=samples)
output:
gvcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/variants.g.vcf.gz",
log:
Expand All @@ -101,8 +154,6 @@ rule genotype_variants:
gvcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/variants.g.vcf.gz",
output:
vcf=temp("results/variantAnalysis/vcfs/haplotypecaller/variants.{contig}.vcf"),
params:
extra=config['reference']['genome'].rstrip(".gz"),
log:
"logs/gatk/genotypegvcfs.{contig}.log",
wrapper:
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/utilities.smk
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ rule IndexBams:
Index bams with samtools
"""
input:
bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
bam="results/alignments/{sample}.hisat2.bam",
output:
idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
idx="results/alignments/{sample}.hisat2.bam.bai",
log:
"logs/IndexBams/{sample}.log",
wrapper:
Expand Down

0 comments on commit 11f63c6

Please sign in to comment.