Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: create bam_to_fastq process #14

Merged
merged 9 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
- bwa/index
- bwa/mem
- also runs samtools sort & outputs index in bai format.
- custom/bam_to_fastq (#14)
- cutadapt
- khmer/uniquekmers
47 changes: 47 additions & 0 deletions modules/CCBR/custom/bam_to_fastq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process BAM_TO_FASTQ {
tag { meta.id }
label 'process_single'
container "${ meta.single_end ? 'nciccbr/ccbr_ubuntu_base_20.04:v5' : 'nciccbr/ccbr_picard_2.27.5:v1' }"

input:
tuple val(meta), path(bam), path(bai)

output:
tuple val(meta), path("*.R?.fastq*"), emit: reads
tuple val(meta), path("*.unpaired.fastq*"), emit: unpaired, optional: true
path("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
if (meta.single_end) {
"""
samtools bam2fq ${bam} | pigz -p ${task.cpus} > ${bam.baseName}.R1.fastq.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
} else {
"""
picard -Xmx${task.memory.toGiga()}G SamToFastq \\
--VALIDATION_STRINGENCY SILENT \\
--INPUT ${bam} \\
--FASTQ ${bam.baseName}.R1.fastq \\
--SECOND_END_FASTQ ${bam.baseName}.R2.fastq \\
--UNPAIRED_FASTQ ${bam.baseName}.unpaired.fastq
pigz -p ${task.cpus} *.fastq

cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(picard FastqToSam --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}
stub:
"""
touch ${bam.baseName}.R1.fastq versions.yml
"""
}
62 changes: 62 additions & 0 deletions modules/CCBR/custom/bam_to_fastq/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: bam_to_fastq
description: |
The module converts a BAM file to FASTQ format.
It uses samtools bam2fq if reads are single end,
or picard SamToFastq if reads are paired.

keywords:
- bam2fq
- samtools
- fastq
- picard
tools:
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
documentation: http://www.htslib.org/doc/1.1/samtools.html
licence: ["MIT"]
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036510672-FastqToSam-Picard-
tool_dev_url: https://github.com/broadinstitute/picard
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: BAI (bam index) file
pattern: "*.{bai}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: |
1 fastq file if single end, or 2 fastq files if paired-end.
pattern: "*.fastq.gz"
- unpaired:
type: file
description: |
unpaired reads
pattern: "*.fastq.gz"
authors:
- "@kelly-sovacool"
maintainers:
- "@kelly-sovacool"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ bwa/mem:
- modules/CCBR/bwa/mem/**
- tests/modules/CCBR/bwa/mem/**

custom/bam_to_fastq:
- modules/CCBR/custom/bam_to_fastq/**
- tests/modules/CCBR/custom/bam_to_fastq/**

cutadapt:
- modules/CCBR/cutadapt/**
- tests/modules/CCBR/cutadapt/**
Expand Down
48 changes: 48 additions & 0 deletions tests/modules/CCBR/custom/bam_to_fastq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { BWA_INDEX } from '../../../../../modules/CCBR/bwa/index/main.nf'
include { BWA_MEM } from '../../../../../modules/CCBR/bwa/mem/main.nf'
include { BAM_TO_FASTQ } from '../../../../../modules/CCBR/custom/bam_to_fastq/main.nf'

//
// Test with single-end data
//
workflow test_bam2fastq_single {
input = [
[ id:'test', single_end:true ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]

BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index )
BAM_TO_FASTQ( BWA_MEM.out.bam )
}

//
// Test with paired-end data
//
workflow test_bam2fastq_paired {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]

BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index )
BAM_TO_FASTQ( BWA_MEM.out.bam )
}
6 changes: 6 additions & 0 deletions tests/modules/CCBR/custom/bam_to_fastq/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }


}
19 changes: 19 additions & 0 deletions tests/modules/CCBR/custom/bam_to_fastq/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
- name: custom bam_to_fastq test_bam2fastq_single
command: nextflow run ./tests/modules/CCBR/custom/bam_to_fastq -entry test_bam2fastq_single -c ./tests/config/nextflow.config
tags:
- custom/bam_to_fastq
- custom
files:
- path: output/bam/test.R1.fastq.gz
- path: output/bwa/versions.yml

- name: custom bam_to_fastq test_bam2fastq_paired
command: nextflow run ./tests/modules/CCBR/custom/bam_to_fastq -entry test_bam2fastq_paired -c ./tests/config/nextflow.config
tags:
- custom/bam_to_fastq
- custom
files:
- path: output/bam/test.R1.fastq.gz
- path: output/bam/test.R2.fastq.gz
- path: output/bam/test.unpaired.fastq.gz
- path: output/bwa/versions.yml
Empty file.