diff --git a/conf/modules.config b/conf/modules.config index febd9b16..e3c116a3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -90,4 +90,12 @@ process { withName: BAKTA_BAKTA { ext.args = '--force' } + + withName: MAFFT { + ext.prefix = { "${fasta.getSimpleName()}_aligned" } + } + + withName: IQTREE2 { + ext.args = '-B 1000' + } } diff --git a/modules.json b/modules.json index 2ff6365b..2b1572d2 100644 --- a/modules.json +++ b/modules.json @@ -50,6 +50,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "iqtree": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "mafft": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/iqtree2.nf b/modules/local/iqtree2.nf new file mode 100644 index 00000000..b3e9ce5e --- /dev/null +++ b/modules/local/iqtree2.nf @@ -0,0 +1,39 @@ +process IQTREE2 { + tag "$alignment" + label 'process_medium' + + conda "bioconda::iqtree=2.1.4_beta" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/iqtree:2.1.4_beta--hdcc8f71_0' : + 'biocontainers/iqtree:2.1.4_beta--hdcc8f71_0' }" + + input: + tuple val(meta), path(alignment) + val constant_sites + + output: + tuple val(meta), path("*.treefile"), emit: phylogeny + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def fconst_args = constant_sites ? "-fconst $constant_sites" : '' + def memory = task.memory.toString().replaceAll(' ', '') + """ + iqtree2 \\ + $fconst_args \\ + $args \\ + -s $alignment \\ + -nt AUTO \\ + -ntmax $task.cpus \\ + -mem $memory \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + iqtree: \$(echo \$(iqtree -version 2>&1) | sed 's/^IQ-TREE multicore version //;s/ .*//') + END_VERSIONS + """ +} diff --git a/modules/local/rename_core_gene_headers.nf b/modules/local/rename_core_gene_headers.nf new file mode 100644 index 00000000..a2611dc5 --- /dev/null +++ b/modules/local/rename_core_gene_headers.nf @@ -0,0 +1,31 @@ +process RENAMECOREGENEHEADERS { + tag "$ref_meta.id" + label 'process_single' + + conda "conda-forge::coreutils=9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(ref_meta), path(feat_seqs) + + output: + tuple val(ref_meta), path("${prefix}_feat_seqs_renamed"), emit: feat_seqs + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${ref_meta.id}" + """ + # Create folder for output + mkdir ${prefix}_feat_seqs_renamed + + # Rename headers to just sample ID + for file in ${feat_seqs}/*.fasta + do + sed 's/>.*genome:\\(.*\\)gene.*/>\\1/g' \$file > ${prefix}_feat_seqs_renamed/\$(basename \$file) + done + """ +} diff --git a/subworkflows/local/core_genome_phylogeny.nf b/subworkflows/local/core_genome_phylogeny.nf index 9a08d467..b390bae1 100644 --- a/subworkflows/local/core_genome_phylogeny.nf +++ b/subworkflows/local/core_genome_phylogeny.nf @@ -1,9 +1,11 @@ include { PIRATE } from '../../modules/nf-core/pirate/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { MAFFT } from '../../modules/nf-core/mafft/main' +include { IQTREE2 } from '../../modules/local/iqtree2' include { REFORMATPIRATERESULTS } from '../../modules/local/reformat_pirate_results' include { ALIGNFEATURESEQUENCES } from '../../modules/local/align_feature_sequences' include { SUBSETCOREGENES } from '../../modules/local/subset_core_genes' +include { RENAMECOREGENEHEADERS } from '../../modules/local/rename_core_gene_headers' workflow CORE_GENOME_PHYLOGENY { @@ -28,12 +30,20 @@ workflow CORE_GENOME_PHYLOGENY { // Extract sequences of all genes (does not align, contrary to current name) ALIGNFEATURESEQUENCES ( PIRATE.out.results ) ch_versions = ch_versions.mix(ALIGNFEATURESEQUENCES.out.versions.first()) + + // Rename FASTA file headers to start with just sample ID for use with IQTREE + RENAMECOREGENEHEADERS ( ALIGNFEATURESEQUENCES.out.feat_seqs ) - // Filter for core single copy genes and link their extracted sequences to a new folder - SUBSETCOREGENES ( REFORMATPIRATERESULTS.out.gene_fam.join(ALIGNFEATURESEQUENCES.out.feat_seqs) ) + // Filter for core single copy genes with no paralogs + SUBSETCOREGENES ( REFORMATPIRATERESULTS.out.gene_fam.join(RENAMECOREGENEHEADERS.out.feat_seqs) ) // Align each gene family with mafft - MAFFT( SUBSETCOREGENES.out.feat_seq.transpose(), [] ) + MAFFT ( SUBSETCOREGENES.out.feat_seq.transpose(), [] ) + ch_versions = ch_versions.mix(MAFFT.out.versions.first()) + + // Inferr phylogenetic tree from aligned core genes + IQTREE2 ( MAFFT.out.fas.groupTuple(), [] ) + ch_versions = ch_versions.mix(IQTREE2.out.versions.first()) emit: pirate_aln = PIRATE.out.aln // channel: [ ref_meta, align_fasta ]