Skip to content

Commit

Permalink
Merge pull request nf-core#1193 from nf-core/sentieon_dnascope
Browse files Browse the repository at this point in the history
Dnascope module and subworkflow in Sarek
  • Loading branch information
asp8200 authored Oct 4, 2023
2 parents e69a301 + d6f610c commit df6df02
Show file tree
Hide file tree
Showing 25 changed files with 1,357 additions and 150 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline.
- [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev

### Changed
Expand Down
6 changes: 3 additions & 3 deletions conf/modules/prepare_genome.config
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ process {
}

withName: 'TABIX_DBSNP' {
ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) }
ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope') || params.tools.split(',').contains('mutect2'))) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
Expand All @@ -96,7 +96,7 @@ process {
}

withName: 'TABIX_KNOWN_INDELS' {
ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
Expand All @@ -106,7 +106,7 @@ process {
}

withName: 'TABIX_KNOWN_SNPS' {
ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') )) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
Expand Down
68 changes: 68 additions & 0 deletions conf/modules/sentieon_dnascope.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/

// SENTIEON DNASCOPE

process {

withName: 'SENTIEON_DNASCOPE' {
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.simpleName}" }
ext.when = { params.tools && params.tools.split(',').contains('sentieon_dnascope') }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/"},
pattern: "*{vcf.gz,vcf.gz.tbi}",
saveAs: { meta.num_intervals > 1 ? null : "sentieon_dnascope/${meta.id}/${it}" }
]
}

withName: 'MERGE_SENTIEON_DNASCOPE_VCFS' {
ext.prefix = { params.joint_germline ? "${meta.id}.dnascope.g" : "${meta.id}.dnascope.unfiltered" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MERGE_SENTIEON_DNASCOPE_GVCFS' {
ext.prefix = { "${meta.id}.dnascope.g" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

if (params.tools && params.tools.contains('sentieon_dnascope')) {
withName: '.*FILTERVARIANTTRANCHES' {
ext.prefix = {"${meta.id}.dnascope"}
ext.args = { "--info-key CNN_1D" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"},
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}
}

withName: 'SENTIEON_DNAMODELAPPLY' {
ext.prefix = {"${meta.id}.dnascope.filtered"}
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"},
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}

}
45 changes: 45 additions & 0 deletions conf/modules/sentieon_dnascope_joint_germline.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/

// SENTIEON DNASCOPE JOINT_GERMLINE

process {

// TO-DO: duplicate!!
withName: 'SENTIEON_GVCFTYPER' {
ext.args = { "--allow-old-rms-mapping-quality-annotation-data" }
ext.prefix = { meta.intervals_name }
publishDir = [
enabled: false
]
}

if (params.tools && params.tools.contains('sentieon_dnascope') && params.joint_germline) {
withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' {
ext.prefix = { vcf.baseName - ".vcf" + ".sort" }
publishDir = [
enabled: false
]
}

withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' {
ext.prefix = "joint_germline"
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}
}
}
File renamed without changes.
59 changes: 54 additions & 5 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [GATK Germline Single Sample Variant Calling](#gatk-germline-single-sample-variant-calling)
- [GATK Joint Germline Variant Calling](#gatk-joint-germline-variant-calling)
- [GATK Mutect2](#gatk-mutect2)
- [Sentieon DNAscope](#sentieon-dnascope)
- [Sentieon DNAscope joint germline variant calling](#sentieon-dnascope-joint-germline-variant-calling)
- [Sentieon Haplotyper](#sentieon-haplotyper)
- [Sentieon Joint Germline Variant Calling](#sentieon-joint-germline-variant-calling)
- [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling)
- [Strelka2](#strelka2)
- [Structural Variants](#structural-variants)
- [Manta](#manta)
Expand Down Expand Up @@ -442,6 +444,53 @@ Files created:

</details>

#### Sentieon DNAscope

[Sentieon DNAscope](https://support.sentieon.com/appnotes/dnascope_ml/#dnascope-germline-variant-calling-with-a-machine-learning-model) is a variant-caller which aims at outperforming GATK's Haplotypecaller in terms of both speed and accuracy. DNAscope allows you to use a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering.

<details markdown="1">
<summary>Unfiltered VCF-files for normal samples</summary>

**Output directory: `{outdir}/variantcalling/sentieon_dnascope/<sample>/`**

- `<sample>.dnascope.unfiltered.vcf.gz` and `<sample>.dnascope.unfiltered.vcf.gz.tbi`
- VCF with tabix index

</details>

The output from Sentieon's DNAscope can be controlled through the option `--sentieon_dnascope_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions).

Unless `dnascope_filter` is listed under `--skip_tools` in the nextflow command, Sentieon's [DNAModelApply](https://support.sentieon.com/manual/usages/general/#dnamodelapply-algorithm) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files.

<details markdown="1">
<summary>Filtered VCF-files for normal samples</summary>

**Output directory: `{outdir}/variantcalling/sentieon_dnascope/<sample>/`**

- `<sample>.dnascope.filtered.vcf.gz` and `<sample>.dnascope.filtered.vcf.gz.tbi`
- VCF with tabix index

</details>

##### Sentieon DNAscope joint germline variant calling

In Sentieon's package DNAscope, joint germline variant calling is done by first running Sentieon's Dnacope in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAscope.

<details markdown="1">
<summary>Output files from joint germline variant calling</summary>

**Output directory: `{outdir}/variantcalling/sentieon_dnascope/<sample>/`**

- `<sample>.dnascope.g.vcf.gz` and `<sample>.dnascope.g.vcf.gz.tbi`
- VCF with tabix index

**Output directory: `{outdir}/variantcalling/sentieon_dnascope/joint_variant_calling/`**

- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi`
- VCF with tabix index

</details>

#### Sentieon Haplotyper

[Sentieon Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) is Sention's speedup version of GATK's Haplotypecaller (see above).
Expand All @@ -456,7 +505,7 @@ Files created:

</details>

The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek).
The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions).

Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow command, GATK's CNNScoreVariants and FilterVariantTranches (see above) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files.

Expand All @@ -470,16 +519,16 @@ Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow comman

</details>

##### Sentieon Joint Germline Variant Calling
##### Sentieon Haplotyper joint germline variant calling

In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity.
In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity.

<details markdown="1">
<summary>Output files from joint germline variant calling</summary>

**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/<sample>/`**

- `<sample>.haplotypecaller.g.vcf.gz` and `<sample>.haplotypecaller.g.vcf.gz.tbi`
- `<sample>.haplotyper.g.vcf.gz` and `<sample>.haplotyper.g.vcf.gz.tbi`
- VCF with tabix index

**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`**
Expand Down
Loading

0 comments on commit df6df02

Please sign in to comment.