Skip to content

Commit

Permalink
Merge pull request #32 from CCBR/champagne-qc-fixes
Browse files Browse the repository at this point in the history
Refactor with fixes from Champagne QC
  • Loading branch information
kelly-sovacool authored Nov 1, 2023
2 parents 19d2424 + 2ccd43e commit ad8b993
Show file tree
Hide file tree
Showing 14 changed files with 299 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Our documentation website is now live: <https://ccbr.github.io/nf-modules/> (#16
- bwa/mem
- also runs samtools sort & outputs index in bai format. (#12)
- custom/bam2fastq (#14,#22)
- custom/countfastq (#32)
- cutadapt (#11)
- khmer/uniquekmers (#7)
- picard/samtofastq (#21)
Expand Down
27 changes: 27 additions & 0 deletions modules/CCBR/custom/countfastq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

process CUSTOM_COUNTFASTQ {
tag { meta.id }
label 'process_single'

container 'nciccbr/ccbr_ubuntu_base_20.04:v6.1'

input:
tuple val(meta), path(fastq)

output:
tuple val(meta), path("*.txt"), emit: count
path('versions.yml'), emit: versions

when:
task.ext.when == null || task.ext.when

script:
template 'count-fastq.py'

stub:
"""
count=-1
echo \$count > ${meta.id}.count.txt
touch versions.yml
"""
}
41 changes: 41 additions & 0 deletions modules/CCBR/custom/countfastq/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: custom_countfastq
description: |
Count reads in a fastq file
keywords:
- fastq
- biopython
- python
tools:
- Biopython:
description: |
Python tools for computational molecular biology
homepage: https://biopython.org/
tool_dev_url: https://github.com/biopython/biopython
doi: 10.1093/bioinformatics/btp163
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastq:
type: file
description: fastq file
pattern: "*.{fastq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- count:
type: file
description: Plain text file containing the number of reads in the fastq files
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@kelly-sovacool"
maintainers:
- "@kelly-sovacool"
27 changes: 27 additions & 0 deletions modules/CCBR/custom/countfastq/templates/count-fastq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env python
import Bio.SeqIO
import gzip
import platform


def main():
count = 0
for fastq_filename in "${fastq}".split():
with gzip.open(fastq_filename, "rt") as file_handle:
n_seqs = sum(1 for rec in Bio.SeqIO.parse(file_handle, "fastq"))
count += n_seqs
with open("${meta.id}.count.txt", "w") as out_file:
out_file.write(str(count))
return count


def write_versions():
with open("versions.yml", "w") as outfile:
outfile.write('"${task.process}":\\n')
outfile.write(f' Python: "{platform.python_version()}"\\n')
outfile.write(f' Biopython: "{Bio.__version__}"\\n')


if __name__ == "__main__":
write_versions()
main()
5 changes: 3 additions & 2 deletions modules/CCBR/picard/samtofastq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ process PICARD_SAMTOFASTQ {
tuple val(meta), path(bam)

output:
tuple val(meta), path("*_?.fastq.gz"), emit: reads
path "versions.yml", emit: versions
tuple val(meta), path("*.fastq.gz"), emit: reads
tuple val(meta), path("*_?.fastq.gz"), emit: paired, optional: true
tuple val(meta), path("*unpaired.fastq.gz"), emit: unpaired, optional: true
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
46 changes: 46 additions & 0 deletions modules/CCBR/samtools/flagstat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process SAMTOOLS_FLAGSTAT {
tag "$meta.id"
label 'process_single'

conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(bam), path(bai)

output:
tuple val(meta), path("*.flagstat"), emit: flagstat
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${bam.baseName}"
"""
samtools \\
flagstat \\
--threads ${task.cpus} \\
$bam \\
> ${prefix}.flagstat
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.flagstat
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
51 changes: 51 additions & 0 deletions modules/CCBR/samtools/flagstat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: samtools_flagstat
description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
keywords:
- stats
- mapping
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- flagstat:
type: file
description: File containing samtools flagstat output
pattern: "*.{flagstat}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
maintainers:
- "@drpatelh"
14 changes: 9 additions & 5 deletions subworkflows/CCBR/filter_blacklist/main.nf
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@


include { BWA_MEM } from '../../../modules/CCBR/bwa/mem'
include { BWA_MEM } from '../../../modules/CCBR/bwa/mem'
include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned'
include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq'
include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq'
include { CUSTOM_COUNTFASTQ } from '../../../modules/CCBR/custom/countfastq'

workflow FILTER_BLACKLIST {
take:
Expand All @@ -15,14 +16,17 @@ workflow FILTER_BLACKLIST {
BWA_MEM ( ch_fastq_input, ch_blacklist_index )
SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam )
PICARD_SAMTOFASTQ( SAMTOOLS_FILTERALIGNED.out.bam )
CUSTOM_COUNTFASTQ( PICARD_SAMTOFASTQ.out.paired )

ch_versions = ch_versions.mix(
BWA_MEM.out.versions,
SAMTOOLS_FILTERALIGNED.out.versions,
PICARD_SAMTOFASTQ.out.versions
PICARD_SAMTOFASTQ.out.versions,
CUSTOM_COUNTFASTQ.out.versions
)

emit:
reads = PICARD_SAMTOFASTQ.out.reads // channel: [ val(meta), path(fastq) ]
versions = ch_versions // channel: [ path(versions.yml) ]
reads = PICARD_SAMTOFASTQ.out.paired // channel: [ val(meta), path(fastq) ]
n_surviving_reads = CUSTOM_COUNTFASTQ.out.count
versions = ch_versions // channel: [ path(versions.yml) ]
}
1 change: 1 addition & 0 deletions subworkflows/CCBR/filter_blacklist/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ components:
- bwa/mem
- samtools/filteraligned
- picard/samtofastq
- custom/countfastq
input:
- ch_fastq_input:
type: map
Expand Down
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ custom/bam2fastq:
- modules/CCBR/custom/bam2fastq/**
- tests/modules/CCBR/custom/bam2fastq/**

custom/countfastq:
- modules/CCBR/custom/countfastq/**
- tests/modules/CCBR/custom/countfastq/**

cutadapt:
- modules/CCBR/cutadapt/**
- tests/modules/CCBR/cutadapt/**
Expand Down
28 changes: 28 additions & 0 deletions tests/modules/CCBR/custom/countfastq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CUSTOM_COUNTFASTQ } from '../../../../../modules/CCBR/custom/countfastq/main.nf'

workflow test_countfastq_single {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['test_1_fastq_gz'], checkIfExists: true) ]
]
CUSTOM_COUNTFASTQ( input )
}

workflow test_countfastq_paired {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['test_2_fastq_gz'], checkIfExists: true) ]
]

CUSTOM_COUNTFASTQ ( input )
}

workflow test_countfastq_blank {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['test_blank_fastq_gz'], checkIfExists: true) ]
]
CUSTOM_COUNTFASTQ( input )
}
5 changes: 5 additions & 0 deletions tests/modules/CCBR/custom/countfastq/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

includeConfig '../../../../config/test_data_CCBR.config'
44 changes: 44 additions & 0 deletions tests/modules/CCBR/custom/countfastq/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
- name: custom countfastq test_countfastq_single
command: nextflow run ./tests/modules/CCBR/custom/countfastq -entry test_countfastq_single -c ./tests/config/nextflow.config
tags:
- custom
- custom/countfastq
files:
- path: output/custom/test.count.txt
md5sum: f899139df5e1059396431415e770c6dd
contains:
- "100"
- path: output/custom/versions.yml

- name: custom countfastq test_countfastq_paired
command: nextflow run ./tests/modules/CCBR/custom/countfastq -entry test_countfastq_paired -c ./tests/config/nextflow.config
tags:
- custom
- custom/countfastq
files:
- path: output/custom/test.count.txt
md5sum: 3644a684f98ea8fe223c713b77189a77
contains:
- "200"
- path: output/custom/versions.yml

- name: custom countfastq test_countfastq_blank
command: nextflow run ./tests/modules/CCBR/custom/countfastq -entry test_countfastq_blank -c ./tests/config/nextflow.config
tags:
- custom
- custom/countfastq
files:
- path: output/custom/test.count.txt
md5sum: cfcd208495d565ef66e7dff9f98764da
contains:
- "0"
- path: output/custom/versions.yml

- name: custom countfastq test_countfastq_single stub
command: nextflow run ./tests/modules/CCBR/custom/countfastq -entry test_countfastq_single -c ./tests/config/nextflow.config -stub
tags:
- custom
- custom/countfastq
files:
- path: output/custom/test.count.txt
- path: output/custom/versions.yml
18 changes: 12 additions & 6 deletions tests/subworkflows/CCBR/filter_blacklist/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
tags:
- subworkflows
- subworkflows/filter_blacklist
- bwa
- bwa/mem
- custom
- custom/countfastq
- picard
- picard/samtofastq
- samtools
- samtools/filteraligned
- bwa
- bwa/mem
files:
- path: output/picard/test_1.fastq.gz

Expand All @@ -17,12 +19,14 @@
tags:
- subworkflows
- subworkflows/filter_blacklist
- bwa
- bwa/mem
- custom
- custom/countfastq
- picard
- picard/samtofastq
- samtools
- samtools/filteraligned
- bwa
- bwa/mem
files:
- path: output/picard/test_1.fastq.gz
- path: output/picard/test_2.fastq.gz
Expand All @@ -33,11 +37,13 @@
tags:
- subworkflows
- subworkflows/filter_blacklist
- bwa
- bwa/mem
- custom
- custom/countfastq
- picard
- picard/samtofastq
- samtools
- samtools/filteraligned
- bwa
- bwa/mem
files:
- path: output/picard/test_1.fastq.gz

0 comments on commit ad8b993

Please sign in to comment.