Skip to content

Commit

Permalink
added IQtree and core to rename fasta headers for use with IQtree
Browse files Browse the repository at this point in the history
  • Loading branch information
zachary-foster committed Jul 20, 2023
1 parent 4b92bba commit a381e6b
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 3 deletions.
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,12 @@ process {
withName: BAKTA_BAKTA {
ext.args = '--force'
}

withName: MAFFT {
ext.prefix = { "${fasta.getSimpleName()}_aligned" }
}

withName: IQTREE2 {
ext.args = '-B 1000'
}
}
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"iqtree": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"mafft": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand Down
39 changes: 39 additions & 0 deletions modules/local/iqtree2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process IQTREE2 {
tag "$alignment"
label 'process_medium'

conda "bioconda::iqtree=2.1.4_beta"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/iqtree:2.1.4_beta--hdcc8f71_0' :
'biocontainers/iqtree:2.1.4_beta--hdcc8f71_0' }"

input:
tuple val(meta), path(alignment)
val constant_sites

output:
tuple val(meta), path("*.treefile"), emit: phylogeny
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def fconst_args = constant_sites ? "-fconst $constant_sites" : ''
def memory = task.memory.toString().replaceAll(' ', '')
"""
iqtree2 \\
$fconst_args \\
$args \\
-s $alignment \\
-nt AUTO \\
-ntmax $task.cpus \\
-mem $memory \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
iqtree: \$(echo \$(iqtree -version 2>&1) | sed 's/^IQ-TREE multicore version //;s/ .*//')
END_VERSIONS
"""
}
31 changes: 31 additions & 0 deletions modules/local/rename_core_gene_headers.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process RENAMECOREGENEHEADERS {
tag "$ref_meta.id"
label 'process_single'

conda "conda-forge::coreutils=9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"

input:
tuple val(ref_meta), path(feat_seqs)

output:
tuple val(ref_meta), path("${prefix}_feat_seqs_renamed"), emit: feat_seqs

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${ref_meta.id}"
"""
# Create folder for output
mkdir ${prefix}_feat_seqs_renamed
# Rename headers to just sample ID
for file in ${feat_seqs}/*.fasta
do
sed 's/>.*genome:\\(.*\\)gene.*/>\\1/g' \$file > ${prefix}_feat_seqs_renamed/\$(basename \$file)
done
"""
}
16 changes: 13 additions & 3 deletions subworkflows/local/core_genome_phylogeny.nf
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
include { PIRATE } from '../../modules/nf-core/pirate/main'
include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { MAFFT } from '../../modules/nf-core/mafft/main'
include { IQTREE2 } from '../../modules/local/iqtree2'
include { REFORMATPIRATERESULTS } from '../../modules/local/reformat_pirate_results'
include { ALIGNFEATURESEQUENCES } from '../../modules/local/align_feature_sequences'
include { SUBSETCOREGENES } from '../../modules/local/subset_core_genes'
include { RENAMECOREGENEHEADERS } from '../../modules/local/rename_core_gene_headers'

workflow CORE_GENOME_PHYLOGENY {

Expand All @@ -28,12 +30,20 @@ workflow CORE_GENOME_PHYLOGENY {
// Extract sequences of all genes (does not align, contrary to current name)
ALIGNFEATURESEQUENCES ( PIRATE.out.results )
ch_versions = ch_versions.mix(ALIGNFEATURESEQUENCES.out.versions.first())

// Rename FASTA file headers to start with just sample ID for use with IQTREE
RENAMECOREGENEHEADERS ( ALIGNFEATURESEQUENCES.out.feat_seqs )

// Filter for core single copy genes and link their extracted sequences to a new folder
SUBSETCOREGENES ( REFORMATPIRATERESULTS.out.gene_fam.join(ALIGNFEATURESEQUENCES.out.feat_seqs) )
// Filter for core single copy genes with no paralogs
SUBSETCOREGENES ( REFORMATPIRATERESULTS.out.gene_fam.join(RENAMECOREGENEHEADERS.out.feat_seqs) )

// Align each gene family with mafft
MAFFT( SUBSETCOREGENES.out.feat_seq.transpose(), [] )
MAFFT ( SUBSETCOREGENES.out.feat_seq.transpose(), [] )
ch_versions = ch_versions.mix(MAFFT.out.versions.first())

// Inferr phylogenetic tree from aligned core genes
IQTREE2 ( MAFFT.out.fas.groupTuple(), [] )
ch_versions = ch_versions.mix(IQTREE2.out.versions.first())

emit:
pirate_aln = PIRATE.out.aln // channel: [ ref_meta, align_fasta ]
Expand Down

0 comments on commit a381e6b

Please sign in to comment.