From 7f4ddabfa245486f11744272e604e9ccc598b5f1 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 28 Jun 2024 10:08:26 -0400 Subject: [PATCH 01/27] Changed the name of assembled genomes from 'contigs' to 'fastq_1' to work with IRIDA-Next UI --- assets/samplesheet.csv | 2 +- assets/schema_input.json | 6 +++--- modules/local/staramr/main.nf | 8 ++++---- nextflow_schema.json | 30 ++++++------------------------ tests/assets/test_samplesheet.csv | 2 +- 5 files changed, 15 insertions(+), 33 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index e8b8694..4ea51fd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ -sample,contigs,species +sample,fastq_1,species GCA_000008105,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/salmonella/GCA_000008105.1_ASM810v1_genomic.fna.gz,Salmonella enterica GCA_000947975,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/ecoli/GCA_000947975.1_ASM94797v1_genomic.fna.gz,Escherichia coli GCF_000196035,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/listeria/GCF_000196035.1_ASM19603v1_genomic.fna.gz,Listeria monocytogenes diff --git a/assets/schema_input.json b/assets/schema_input.json index fd255cc..0eb2b6d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,10 +13,10 @@ "meta": ["id"], "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "contigs": { + "fastq_1": { "type": "string", "pattern": "^\\S+\\.f(ast|n)?a\\.gz$", - "errorMessage": "FASTA file containing assembled contigs, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'" + "errorMessage": "FASTA file containing assembled contigs/genomes, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'" }, "species": { "type": "string", @@ -24,6 +24,6 @@ "errorMessage": "The name of the species." } }, - "required": ["sample", "contigs"] + "required": ["sample", "fastq_1"] } } diff --git a/modules/local/staramr/main.nf b/modules/local/staramr/main.nf index 289524a..4537102 100644 --- a/modules/local/staramr/main.nf +++ b/modules/local/staramr/main.nf @@ -8,7 +8,7 @@ process STARAMR_SEARCH { 'biocontainers/staramr:0.10.0--pyhdfd78af_0' }" input: - tuple val(meta), path(contigs) + tuple val(meta), path(fastq_1) output: tuple val(meta), path("*_results/results.xlsx") , emit: results_xlsx @@ -27,12 +27,12 @@ process STARAMR_SEARCH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def is_gzipped = contigs.getName().endsWith(".gz") ? true : false - def genome_uncompressed_name = contigs.getName().replace(".gz", "") + def is_gzipped = fastq_1.getName().endsWith(".gz") ? true : false + def genome_uncompressed_name = fastq_1.getName().replace(".gz", "") def genome_filename = "${meta.id}.fasta" """ if [ "$is_gzipped" = "true" ]; then - gzip -c -d $contigs > $genome_uncompressed_name + gzip -c -d $fastq_1 > $genome_uncompressed_name fi #Change name of input genome to allow irida-next output of metadata diff --git a/nextflow_schema.json b/nextflow_schema.json index 67074e9..eb6815c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,27 +38,6 @@ } } }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -232,9 +211,6 @@ { "$ref": "#/definitions/input_output_options" }, - { - "$ref": "#/definitions/reference_genome_options" - }, { "$ref": "#/definitions/institutional_config_options" }, @@ -254,6 +230,12 @@ }, "mlst_scheme": { "type": "string" + }, + "genome": { + "type": "string" + }, + "igenomes_ignore": { + "type": "boolean" } } } diff --git a/tests/assets/test_samplesheet.csv b/tests/assets/test_samplesheet.csv index e8b8694..4ea51fd 100644 --- a/tests/assets/test_samplesheet.csv +++ b/tests/assets/test_samplesheet.csv @@ -1,4 +1,4 @@ -sample,contigs,species +sample,fastq_1,species GCA_000008105,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/salmonella/GCA_000008105.1_ASM810v1_genomic.fna.gz,Salmonella enterica GCA_000947975,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/ecoli/GCA_000947975.1_ASM94797v1_genomic.fna.gz,Escherichia coli GCF_000196035,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/listeria/GCF_000196035.1_ASM19603v1_genomic.fna.gz,Listeria monocytogenes From 80ffe12ab801c623c23729fee73d2f29361ad6de Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 28 Jun 2024 15:17:51 -0400 Subject: [PATCH 02/27] Revert "Changed the name of assembled genomes from 'contigs' to 'fastq_1' to work with IRIDA-Next UI" This reverts commit 7f4ddabfa245486f11744272e604e9ccc598b5f1. --- assets/samplesheet.csv | 2 +- assets/schema_input.json | 6 +++--- modules/local/staramr/main.nf | 8 ++++---- nextflow_schema.json | 30 ++++++++++++++++++++++++------ tests/assets/test_samplesheet.csv | 2 +- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 4ea51fd..e8b8694 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ -sample,fastq_1,species +sample,contigs,species GCA_000008105,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/salmonella/GCA_000008105.1_ASM810v1_genomic.fna.gz,Salmonella enterica GCA_000947975,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/ecoli/GCA_000947975.1_ASM94797v1_genomic.fna.gz,Escherichia coli GCF_000196035,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/listeria/GCF_000196035.1_ASM19603v1_genomic.fna.gz,Listeria monocytogenes diff --git a/assets/schema_input.json b/assets/schema_input.json index 0eb2b6d..fd255cc 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,10 +13,10 @@ "meta": ["id"], "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "fastq_1": { + "contigs": { "type": "string", "pattern": "^\\S+\\.f(ast|n)?a\\.gz$", - "errorMessage": "FASTA file containing assembled contigs/genomes, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'" + "errorMessage": "FASTA file containing assembled contigs, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'" }, "species": { "type": "string", @@ -24,6 +24,6 @@ "errorMessage": "The name of the species." } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "contigs"] } } diff --git a/modules/local/staramr/main.nf b/modules/local/staramr/main.nf index 4537102..289524a 100644 --- a/modules/local/staramr/main.nf +++ b/modules/local/staramr/main.nf @@ -8,7 +8,7 @@ process STARAMR_SEARCH { 'biocontainers/staramr:0.10.0--pyhdfd78af_0' }" input: - tuple val(meta), path(fastq_1) + tuple val(meta), path(contigs) output: tuple val(meta), path("*_results/results.xlsx") , emit: results_xlsx @@ -27,12 +27,12 @@ process STARAMR_SEARCH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def is_gzipped = fastq_1.getName().endsWith(".gz") ? true : false - def genome_uncompressed_name = fastq_1.getName().replace(".gz", "") + def is_gzipped = contigs.getName().endsWith(".gz") ? true : false + def genome_uncompressed_name = contigs.getName().replace(".gz", "") def genome_filename = "${meta.id}.fasta" """ if [ "$is_gzipped" = "true" ]; then - gzip -c -d $fastq_1 > $genome_uncompressed_name + gzip -c -d $contigs > $genome_uncompressed_name fi #Change name of input genome to allow irida-next output of metadata diff --git a/nextflow_schema.json b/nextflow_schema.json index eb6815c..67074e9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,6 +38,27 @@ } } }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -211,6 +232,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/reference_genome_options" + }, { "$ref": "#/definitions/institutional_config_options" }, @@ -230,12 +254,6 @@ }, "mlst_scheme": { "type": "string" - }, - "genome": { - "type": "string" - }, - "igenomes_ignore": { - "type": "boolean" } } } diff --git a/tests/assets/test_samplesheet.csv b/tests/assets/test_samplesheet.csv index 4ea51fd..e8b8694 100644 --- a/tests/assets/test_samplesheet.csv +++ b/tests/assets/test_samplesheet.csv @@ -1,4 +1,4 @@ -sample,fastq_1,species +sample,contigs,species GCA_000008105,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/salmonella/GCA_000008105.1_ASM810v1_genomic.fna.gz,Salmonella enterica GCA_000947975,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/ecoli/GCA_000947975.1_ASM94797v1_genomic.fna.gz,Escherichia coli GCF_000196035,https://github.com/phac-nml/staramrnf/raw/dev/tests/genomes/listeria/GCF_000196035.1_ASM19603v1_genomic.fna.gz,Listeria monocytogenes From 4c8f4f4886cc6548429b00e83ea9e4d655e1935b Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 2 Jul 2024 10:03:58 -0400 Subject: [PATCH 03/27] Contig input set to file-path to trigger autopopulating sample contig file on IRIDA-Next --- assets/schema_input.json | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/schema_input.json b/assets/schema_input.json index fd255cc..3814472 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -15,6 +15,7 @@ }, "contigs": { "type": "string", + "format": "file-path", "pattern": "^\\S+\\.f(ast|n)?a\\.gz$", "errorMessage": "FASTA file containing assembled contigs, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'" }, From abcc9d2e89c9c71c4420ff296982141e7f30ece5 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 4 Jul 2024 11:46:40 -0400 Subject: [PATCH 04/27] pointfinder_database added to IRIDA-Next to use on all samples --- conf/modules.config | 4 ++-- nextflow_schema.json | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0270459..9d878a3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -58,8 +58,8 @@ process { ext.args = { [ // Pointfinder database: - params.pointfinder_database && valid_point_db(params.pointfinder_database) ? - point_db_arg(params.pointfinder_database) : + params.pointfinder_database && valid_point_db(convert(params.pointfinder_database)) ? + point_db_arg(convert(params.pointfinder_database)) : meta.species && valid_point_db(convert(meta.species)) ? point_db_arg(convert(meta.species)) : "", diff --git a/nextflow_schema.json b/nextflow_schema.json index 67074e9..72991d0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,6 +38,18 @@ } } }, + "pointfinder_database": { + "title": "PointFinder Organism", + "type": "object", + "description": "The organism to use for pointfinder.", + "fa_icon": "fas fa-terminal", + "properties": { + "pointfinder_database": { + "enum" : ["","Enterococcus faecium", "Enterococcus faecalis", "Helicobacter pylori", "Salmonella", "Campylobacter", "Escherichia coli"], + "description": "The organism to use for pointfinder. Validated: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" + } + } + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -243,6 +255,9 @@ }, { "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/pointfinder_database" } ], "properties": { From a816aa82f6b7e31538e8d33771679cbcb5a0ba4f Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 4 Jul 2024 14:10:16 -0400 Subject: [PATCH 05/27] Fix linting issues --- nextflow_schema.json | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 72991d0..0373966 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,7 +38,7 @@ } } }, - "pointfinder_database": { + "database": { "title": "PointFinder Organism", "type": "object", "description": "The organism to use for pointfinder.", @@ -257,18 +257,7 @@ "$ref": "#/definitions/generic_options" }, { - "$ref": "#/definitions/pointfinder_database" + "$ref": "#/definitions/database" } - ], - "properties": { - "pointfinder_database": { - "type": "string" - }, - "plasmidfinder_database": { - "type": "string" - }, - "mlst_scheme": { - "type": "string" - } - } + ] } From 5c41d29b4b17cdee0562aeed04376b18965c53cd Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 5 Jul 2024 11:31:38 -0400 Subject: [PATCH 06/27] Adding a parameter to pipeline that seems to be breaking nextflow.config file --- conf/modules.config | 5 +++++ nextflow.config | 1 + nextflow_schema.json | 43 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9d878a3..894084d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -50,6 +50,7 @@ process { def point_db_arg = {String database -> " --pointfinder-organism ${database} " } def plasmid_db_arg = {String database -> " --plasmidfinder-database-type ${database} " } def mlst_arg = {String scheme -> " --mlst-scheme ${scheme} " } + def minimum_contig_length_arg = {String min_length -> " --minimum-contig-length ${min_length} "} // Check to see if the database name is valid: def valid_point_db = {String database -> pointfinder_databases.contains(database)} @@ -70,6 +71,10 @@ process { // MLST scheme: params.mlst_scheme ? mlst_arg(params.mlst_scheme) : "" + + // Additional parameters + params.minimum_contig_length + ? minimum_contig_length_arg(params.minimum_contig_length) : "" ].join(" ") } } diff --git a/nextflow.config b/nextflow.config index 895a6d0..8960101 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,6 +54,7 @@ params { pointfinder_database = null plasmidfinder_database = null mlst_scheme = null + minimum_contig_length = null } diff --git a/nextflow_schema.json b/nextflow_schema.json index 0373966..731658a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -39,14 +39,42 @@ } }, "database": { - "title": "PointFinder Organism", + "title": "Databases", "type": "object", - "description": "The organism to use for pointfinder.", + "description": "Select databases to be run on all samples.", "fa_icon": "fas fa-terminal", "properties": { "pointfinder_database": { - "enum" : ["","Enterococcus faecium", "Enterococcus faecalis", "Helicobacter pylori", "Salmonella", "Campylobacter", "Escherichia coli"], + "enum": [ + "", + "Enterococcus faecium", + "Enterococcus faecalis", + "Helicobacter pylori", + "Salmonella", + "Campylobacter", + "Escherichia coli" + ], "description": "The organism to use for pointfinder. Validated: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" + }, + "plasmidfinder_database": { + "enum": ["", "gram_positive", "enterobacteriales"], + "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [None]." + }, + "mlst_scheme": { + "type": "string", + "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [None]" + } + } + }, + "additional_settings": { + "title": "Additional Settings", + "type": "object", + "description": "For advanced changes to staramr", + "default": "", + "properties": { + "minimum_contig_length": { + "type": "string", + "description": "The minimum contig length for the quality metrics. Defaults to 300 bp." } } }, @@ -244,6 +272,12 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/database" + }, + { + "$ref": "#/definitions/additional_settings" + }, { "$ref": "#/definitions/reference_genome_options" }, @@ -255,9 +289,6 @@ }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/database" } ] } From 8781a5d08c218ea8fb5ed32e0e9c68660c7a6d3f Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 5 Jul 2024 14:34:55 -0400 Subject: [PATCH 07/27] Fixed the syntax issues in previous commit --- conf/modules.config | 4 ++-- nextflow_schema.json | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 894084d..a6af0db 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -70,11 +70,11 @@ process { // MLST scheme: params.mlst_scheme - ? mlst_arg(params.mlst_scheme) : "" + ? mlst_arg(params.mlst_scheme) : "", // Additional parameters params.minimum_contig_length - ? minimum_contig_length_arg(params.minimum_contig_length) : "" + ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "" ].join(" ") } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 731658a..b3a2fe0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -70,10 +70,9 @@ "title": "Additional Settings", "type": "object", "description": "For advanced changes to staramr", - "default": "", "properties": { "minimum_contig_length": { - "type": "string", + "type": "integer", "description": "The minimum contig length for the quality metrics. Defaults to 300 bp." } } From 56b5cfc8f24dc21b0619bb96f8df6a948b61ce68 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 9 Jul 2024 15:18:33 -0400 Subject: [PATCH 08/27] Added additional CLI arguments --- conf/modules.config | 49 ++++++++++++++++++++++++++++++- nextflow.config | 18 +++++++++++- nextflow_schema.json | 69 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 129 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a6af0db..01d0e7c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -51,6 +51,17 @@ process { def plasmid_db_arg = {String database -> " --plasmidfinder-database-type ${database} " } def mlst_arg = {String scheme -> " --mlst-scheme ${scheme} " } def minimum_contig_length_arg = {String min_length -> " --minimum-contig-length ${min_length} "} + def genome_size_lower_bound_arg = {String min_genome -> " --genome-size-lower-bound ${min_genome} "} + def genome_size_upper_bound_arg = {String max_genome -> " --genome-size-upper-bound ${max_genome} "} + def minimum_N50_value_arg = {String min_n50 -> " --minimum-N50-value ${min_n50} "} + def unacceptable_number_contigs_arg = {String min_length -> " --unacceptable-number-contigs ${min_length} "} + def pid_threshold_arg = {String min_pid -> " --pid-threshold ${min_pid} "} + def percent_length_overlap_plasmidfinder_arg = {String min_overlap -> " --percent-length-overlap-plasmidfinder ${min_overlap} "} + def percent_length_overlap_resfinder_arg = {String min_overlap -> " --percent-length-overlap-resfinder ${min_overlap} "} + def percent_length_overlap_pointfinder_arg = {String min_overlap -> " --percent-length-overlap-pointfinder ${min_overlap} "} + def no_exclude_genes_arg = {String exclude_gene -> " --no-exclude-genes ${exclude_gene} "} + def exclude_negatives_arg = {String exclude_neg -> " --exclude-negatives ${exclude_neg} "} + def exclude_resistance_phenotypes_arg = {String exclude_pheno -> " --exclude-resistance-phenotypes ${exclude_pheno} "} // Check to see if the database name is valid: def valid_point_db = {String database -> pointfinder_databases.contains(database)} @@ -74,7 +85,43 @@ process { // Additional parameters params.minimum_contig_length - ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "" + ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "", + + params.genome_size_lower_bound + ? genome_size_lower_bound_arg(params.genome_size_lower_bound.toString()) : "", + + params.genome_size_upper_bound + ? genome_size_upper_bound_arg(params.genome_size_upper_bound.toString()) : "", + + params.minimum_N50_value + ? minimum_N50_value_arg(params.minimum_N50_value.toString()) : "", + + params.minimum_contig_length + ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "", + + params.unacceptable_number_contigs + ? unacceptable_number_contigs_arg(params.unacceptable_number_contigs.toString()) : "", + + params.pid_threshold + ? pid_threshold_arg(params.pid_threshold.toString()) : "", + + params.percent_length_overlap_plasmidfinder + ? percent_length_overlap_plasmidfinder_arg(params.percent_length_overlap_plasmidfinder.toString()) : "", + + params.percent_length_overlap_resfinder + ? percent_length_overlap_resfinder_arg(params.percent_length_overlap_resfinder.toString()) : "", + + params.percent_length_overlap_pointfinder + ? percent_length_overlap_pointfinder_arg(params.percent_length_overlap_pointfinder.toString()) : "", + + params.no_exclude_genes + ? no_exclude_genes_arg(params.no_exclude_genes.toString()) : "", + + params.exclude_negatives + ? exclude_negatives_arg(params.exclude_negatives.toString()) : "", + + params.exclude_resistance_phenotypes + ? exclude_resistance_phenotypes_arg(params.exclude_resistance_phenotypes.toString()) : "" ].join(" ") } } diff --git a/nextflow.config b/nextflow.config index 8960101..f0e5861 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,12 +50,28 @@ params { validationShowHiddenParams = false validate_params = true - //StarAMR options + // StarAMR options + + // Databases pointfinder_database = null plasmidfinder_database = null mlst_scheme = null minimum_contig_length = null + // Additional CLI arguments + genome_size_lower_bound = null + genome_size_upper_bound = null + minimum_N50_value = null + minimum_contig_length = null + unacceptable_number_contigs = null + pid_threshold = null + percent_length_overlap_plasmidfinder = null + percent_length_overlap_resfinder = null + percent_length_overlap_pointfinder = null + no_exclude_genes = null + exclude_negatives = null + exclude_resistance_phenotypes = null + } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index b3a2fe0..46b4916 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -57,7 +60,11 @@ "description": "The organism to use for pointfinder. Validated: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" }, "plasmidfinder_database": { - "enum": ["", "gram_positive", "enterobacteriales"], + "enum": [ + "", + "gram_positive", + "enterobacteriales" + ], "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [None]." }, "mlst_scheme": { @@ -71,11 +78,56 @@ "type": "object", "description": "For advanced changes to staramr", "properties": { + "genome_size_lower_bound": { + "type": "integer", + "description": "The lower bound for our genome size for the quality metrics [Default 4000000]" + }, + "genome_size_upper_bound": { + "type": "integer", + "description": "The upper bound for our genome size for the quality metrics [Default 6000000]." + }, + "minimum_N50_value": { + "type": "integer", + "description": "The minimum N50 value for the quality metrics [Defaults 10000]" + }, "minimum_contig_length": { "type": "integer", - "description": "The minimum contig length for the quality metrics. Defaults to 300 bp." + "description": "The minimum contig length for the quality metrics [Default 300 bp]" + }, + "unacceptable_number_contigs": { + "type": "integer", + "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]" + }, + "pid_threshold": { + "type": "integer", + "description": "BLAST percent identity threshold [Default 98]" + }, + "percent_length_overlap_plasmidfinder": { + "type": "integer", + "description": "The percent length overlap for resfinder results [Default 60.0]" + }, + "percent_length_overlap_resfinder": { + "type": "integer", + "description": "The percent length overlap for resfinder results [Default 60.0]" + }, + "percent_length_overlap_pointfinder": { + "type": "integer", + "description": "The percent length overlap for pointfinder results [Default 95.0]" + }, + "no_exclude_genes": { + "type": "boolean", + "description": "Disable the default exclusion of some genes from ResFinder/PointFinder/PlasmidFinder [Default False]" + }, + "exclude_negatives": { + "type": "boolean", + "description": "Exclude negative results (those susceptible to antimicrobials) [DefaultFalse]" + }, + "exclude_resistance_phenotypes": { + "type": "boolean", + "description": "Exclude predicted antimicrobial resistances [Default False]." } - } + }, + "fa_icon": "fas fa-terminal" }, "reference_genome_options": { "title": "Reference genome options", @@ -206,7 +258,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { From 2abef26be35f10701c604c773f75c7a0991ff7b4 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 9 Jul 2024 15:24:16 -0400 Subject: [PATCH 09/27] prettier fix --- nextflow_schema.json | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 46b4916..3a812bb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -60,11 +57,7 @@ "description": "The organism to use for pointfinder. Validated: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" }, "plasmidfinder_database": { - "enum": [ - "", - "gram_positive", - "enterobacteriales" - ], + "enum": ["", "gram_positive", "enterobacteriales"], "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [None]." }, "mlst_scheme": { @@ -258,14 +251,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 8b8ec648923276548ae0296b9397367d9264ce21 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 10 Jul 2024 11:00:49 -0400 Subject: [PATCH 10/27] Fixed boolean CLI arguments --- conf/modules.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 01d0e7c..c0c093a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,9 +59,9 @@ process { def percent_length_overlap_plasmidfinder_arg = {String min_overlap -> " --percent-length-overlap-plasmidfinder ${min_overlap} "} def percent_length_overlap_resfinder_arg = {String min_overlap -> " --percent-length-overlap-resfinder ${min_overlap} "} def percent_length_overlap_pointfinder_arg = {String min_overlap -> " --percent-length-overlap-pointfinder ${min_overlap} "} - def no_exclude_genes_arg = {String exclude_gene -> " --no-exclude-genes ${exclude_gene} "} - def exclude_negatives_arg = {String exclude_neg -> " --exclude-negatives ${exclude_neg} "} - def exclude_resistance_phenotypes_arg = {String exclude_pheno -> " --exclude-resistance-phenotypes ${exclude_pheno} "} + def no_exclude_genes_arg = " --no-exclude-genes" + def exclude_negatives_arg = " --exclude-negatives" + def exclude_resistance_phenotypes_arg = " --exclude-resistance-phenotypes" // Check to see if the database name is valid: def valid_point_db = {String database -> pointfinder_databases.contains(database)} @@ -115,13 +115,13 @@ process { ? percent_length_overlap_pointfinder_arg(params.percent_length_overlap_pointfinder.toString()) : "", params.no_exclude_genes - ? no_exclude_genes_arg(params.no_exclude_genes.toString()) : "", + ? no_exclude_genes_arg : "", params.exclude_negatives - ? exclude_negatives_arg(params.exclude_negatives.toString()) : "", + ? exclude_negatives_arg : "", params.exclude_resistance_phenotypes - ? exclude_resistance_phenotypes_arg(params.exclude_resistance_phenotypes.toString()) : "" + ? exclude_resistance_phenotypes_arg : "" ].join(" ") } } From 75958395217725907f0687466572c519943108c3 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 16 Jul 2024 15:46:03 -0400 Subject: [PATCH 11/27] Added default parameters of staramr as pipeline parameters defaults --- conf/modules.config | 2 +- nextflow.config | 26 +++++++++++++------------- nextflow_schema.json | 36 +++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c0c093a..0ffefd7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -80,7 +80,7 @@ process { ? plasmid_db_arg(params.plasmidfinder_database) : "", // MLST scheme: - params.mlst_scheme + params.mlst_scheme && (params.mlst_scheme != "None") ? mlst_arg(params.mlst_scheme) : "", // Additional parameters diff --git a/nextflow.config b/nextflow.config index f0e5861..2640bf7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,22 +55,22 @@ params { // Databases pointfinder_database = null plasmidfinder_database = null - mlst_scheme = null + mlst_scheme = "None" minimum_contig_length = null // Additional CLI arguments - genome_size_lower_bound = null - genome_size_upper_bound = null - minimum_N50_value = null - minimum_contig_length = null - unacceptable_number_contigs = null - pid_threshold = null - percent_length_overlap_plasmidfinder = null - percent_length_overlap_resfinder = null - percent_length_overlap_pointfinder = null - no_exclude_genes = null - exclude_negatives = null - exclude_resistance_phenotypes = null + genome_size_lower_bound = 4000000 + genome_size_upper_bound = 6000000 + minimum_N50_value = 10000 + minimum_contig_length = 300 + unacceptable_number_contigs = 1000 + pid_threshold = 98 + percent_length_overlap_plasmidfinder = 60 + percent_length_overlap_resfinder = 60 + percent_length_overlap_pointfinder = 95 + no_exclude_genes = false + exclude_negatives = false + exclude_resistance_phenotypes = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3a812bb..fe690a0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -46,7 +46,7 @@ "properties": { "pointfinder_database": { "enum": [ - "", + "Automatic Selection", "Enterococcus faecium", "Enterococcus faecalis", "Helicobacter pylori", @@ -54,15 +54,16 @@ "Campylobacter", "Escherichia coli" ], - "description": "The organism to use for pointfinder. Validated: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" + "description": "Select a single Pointfinder database to use on all samples (overriding metadata option). Validated Organisms: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" }, "plasmidfinder_database": { - "enum": ["", "gram_positive", "enterobacteriales"], + "enum": ["All", "gram_positive", "enterobacteriales"], "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [None]." }, "mlst_scheme": { "type": "string", - "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [None]" + "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [None]", + "default": "None" } } }, @@ -73,39 +74,48 @@ "properties": { "genome_size_lower_bound": { "type": "integer", - "description": "The lower bound for our genome size for the quality metrics [Default 4000000]" + "description": "The lower bound for our genome size for the quality metrics [Default 4000000]", + "default": 4000000 }, "genome_size_upper_bound": { "type": "integer", - "description": "The upper bound for our genome size for the quality metrics [Default 6000000]." + "description": "The upper bound for our genome size for the quality metrics [Default 6000000].", + "default": 6000000 }, "minimum_N50_value": { "type": "integer", - "description": "The minimum N50 value for the quality metrics [Defaults 10000]" + "description": "The minimum N50 value for the quality metrics [Defaults 10000]", + "default": 10000 }, "minimum_contig_length": { "type": "integer", - "description": "The minimum contig length for the quality metrics [Default 300 bp]" + "description": "The minimum contig length for the quality metrics [Default 300 bp]", + "default": 300 }, "unacceptable_number_contigs": { "type": "integer", - "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]" + "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]", + "default": 1000 }, "pid_threshold": { "type": "integer", - "description": "BLAST percent identity threshold [Default 98]" + "description": "BLAST percent identity threshold [Default 98]", + "default": 98 }, "percent_length_overlap_plasmidfinder": { "type": "integer", - "description": "The percent length overlap for resfinder results [Default 60.0]" + "description": "The percent length overlap for resfinder results [Default 60.0]", + "default": 60 }, "percent_length_overlap_resfinder": { "type": "integer", - "description": "The percent length overlap for resfinder results [Default 60.0]" + "description": "The percent length overlap for resfinder results [Default 60.0]", + "default": 60 }, "percent_length_overlap_pointfinder": { "type": "integer", - "description": "The percent length overlap for pointfinder results [Default 95.0]" + "description": "The percent length overlap for pointfinder results [Default 95.0]", + "default": 95 }, "no_exclude_genes": { "type": "boolean", From 5cf60c359273eb27ea3efabf610623d332e143d4 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 17 Jul 2024 12:17:46 -0400 Subject: [PATCH 12/27] Added nf-test to check that all commandline parameters are run --- tests/main.nf.test | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 06cc4c1..21c1d2c 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -51,6 +51,25 @@ nextflow_pipeline { assert ecoli_metadata."Scheme" == "ecoli_achtman_4" assert ecoli_metadata."Sequence Type" == "678" + // Check the commandline parameters + // Salmonella + assert path("$baseDir/tests/results/staramr/GCA_000008105_results/GCA_000008105_settings.txt").exists() + def salmonella_settings = new File("$baseDir/tests/results/staramr/GCA_000008105_results/GCA_000008105_settings.txt") + def salmonella_cmd = salmonella_settings.readLines().get(0) + assert salmonella_cmd == "command_line = /usr/local/bin/staramr search --pointfinder-organism salmonella --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCA_000008105_results GCA_000008105.fasta" + + // Ecoli + assert path("$baseDir/tests/results/staramr/GCA_000947975_results/GCA_000947975_settings.txt").exists() + def ecoli_settings = new File("$baseDir/tests/results/staramr/GCA_000947975_results/GCA_000947975_settings.txt") + def ecoli_cmd = ecoli_settings.readLines().get(0) + assert ecoli_cmd == "command_line = /usr/local/bin/staramr search --pointfinder-organism escherichia_coli --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCA_000947975_results GCA_000947975.fasta" + + // Listeria + assert path("$baseDir/tests/results/staramr/GCF_000196035_results/GCF_000196035_settings.txt").exists() + def listeria_settings = new File("$baseDir/tests/results/staramr/GCF_000196035_results/GCF_000196035_settings.txt") + def listeria_cmd = listeria_settings.readLines().get(0) + assert listeria_cmd == "command_line = /usr/local/bin/staramr search --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCF_000196035_results GCF_000196035.fasta" + // Check CSVTK_concat output (merged_*) files // merged_detailed_summary.tsv From e11ea6d428d3e5f5ad9fc75c234630152126d5e6 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 17 Jul 2024 12:21:26 -0400 Subject: [PATCH 13/27] Changed description of --pointfinder_database parameter --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index fe690a0..c858ac0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -58,7 +58,7 @@ }, "plasmidfinder_database": { "enum": ["All", "gram_positive", "enterobacteriales"], - "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [None]." + "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [All]." }, "mlst_scheme": { "type": "string", From 753835d8700d6a1d57bcffa4e7b189807fcc6d66 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 10:49:20 -0400 Subject: [PATCH 14/27] Added min/max thresholds to parameters --- nextflow_schema.json | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c858ac0..a492f57 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -75,47 +75,66 @@ "genome_size_lower_bound": { "type": "integer", "description": "The lower bound for our genome size for the quality metrics [Default 4000000]", - "default": 4000000 + "default": 4000000, + "minimum": 1, + "maximum" : 14000000 }, "genome_size_upper_bound": { "type": "integer", "description": "The upper bound for our genome size for the quality metrics [Default 6000000].", - "default": 6000000 + "default": 6000000, + "minimum": 1, + "maximum" : 14000000 }, "minimum_N50_value": { "type": "integer", "description": "The minimum N50 value for the quality metrics [Defaults 10000]", - "default": 10000 + "default": 10000, + "minimum": 1, + "maximum" : 14000000 + }, "minimum_contig_length": { "type": "integer", "description": "The minimum contig length for the quality metrics [Default 300 bp]", - "default": 300 + "default": 300, + "minimum": 1, + "maximum" : 14000000 }, "unacceptable_number_contigs": { "type": "integer", "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]", - "default": 1000 + "default": 1000, + "minimum": 1, + "maximum" : 500000 }, "pid_threshold": { "type": "integer", "description": "BLAST percent identity threshold [Default 98]", - "default": 98 + "default": 98, + "minimum": 1, + "maximum" : 100 }, "percent_length_overlap_plasmidfinder": { "type": "integer", "description": "The percent length overlap for resfinder results [Default 60.0]", - "default": 60 + "default": 60, + "minimum": 1, + "maximum" : 100 }, "percent_length_overlap_resfinder": { "type": "integer", "description": "The percent length overlap for resfinder results [Default 60.0]", - "default": 60 + "default": 60, + "minimum": 1, + "maximum" : 100 }, "percent_length_overlap_pointfinder": { "type": "integer", "description": "The percent length overlap for pointfinder results [Default 95.0]", - "default": 95 + "default": 95, + "minimum": 1, + "maximum" : 100 }, "no_exclude_genes": { "type": "boolean", From 2ac55097b4fe42b5595fb82f9a9e4cf9f4f29231 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 10:51:02 -0400 Subject: [PATCH 15/27] Removed duplicate minimum_contig_length --- nextflow.config | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 2640bf7..4e2f3d2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -56,7 +56,6 @@ params { pointfinder_database = null plasmidfinder_database = null mlst_scheme = "None" - minimum_contig_length = null // Additional CLI arguments genome_size_lower_bound = 4000000 From bdecbc75c350863a0250281daf20ac89ddfff3eb Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 13:19:57 -0400 Subject: [PATCH 16/27] Limit memory usage for nf-test --- tests/main.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 21c1d2c..b74c2d5 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -9,6 +9,7 @@ nextflow_pipeline { params { input = "$baseDir/tests/assets/test_samplesheet.csv" outdir = "$baseDir/tests/results" + max_memory = "4.GB" } } From 6b41683874fd7f45f70da8c4a9b2b5519c257e8f Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 13:25:06 -0400 Subject: [PATCH 17/27] Change StAMR Database options --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 4e2f3d2..64d0998 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,8 +53,8 @@ params { // StarAMR options // Databases - pointfinder_database = null - plasmidfinder_database = null + pointfinder_database = "Automatic Selection" + plasmidfinder_database = "All" mlst_scheme = "None" // Additional CLI arguments From 5b34fb4ff5add72285c885882d0c4817c13758cb Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 13:37:02 -0400 Subject: [PATCH 18/27] Change MLST scheme default from None to Automatic --- conf/modules.config | 3 ++- nextflow.config | 2 +- nextflow_schema.json | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0ffefd7..53aafba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -44,7 +44,8 @@ process { 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori'] // Convert the species name to a Pointfinder database-style name: - def convert = {String species_name -> species_name.trim().toLowerCase().replaceAll(" ", "_")} + def species_code = "[sS]almonella|[cC]amplyobacter|[eE]nterococcus.faecalis|[eE]nterococcus.faecium|[eE]scherichia.coli|[hH]elicobacter.pylori" + def convert = {String species_name -> species_name.trim().toLowerCase().replaceAll(" ", "_").find(species_code)} // Create the command line arguments: def point_db_arg = {String database -> " --pointfinder-organism ${database} " } diff --git a/nextflow.config b/nextflow.config index 64d0998..c6702c0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { // Databases pointfinder_database = "Automatic Selection" plasmidfinder_database = "All" - mlst_scheme = "None" + mlst_scheme = "Automatic" // Additional CLI arguments genome_size_lower_bound = 4000000 diff --git a/nextflow_schema.json b/nextflow_schema.json index a492f57..1b33df1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -63,7 +63,7 @@ "mlst_scheme": { "type": "string", "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [None]", - "default": "None" + "default": "Automatic" } } }, From 49ca184eb7858ce93179881dd2b0e279ea2824f8 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 13:40:45 -0400 Subject: [PATCH 19/27] Prettier fix --- nextflow_schema.json | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1b33df1..5cb65fa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -77,64 +77,63 @@ "description": "The lower bound for our genome size for the quality metrics [Default 4000000]", "default": 4000000, "minimum": 1, - "maximum" : 14000000 + "maximum": 14000000 }, "genome_size_upper_bound": { "type": "integer", "description": "The upper bound for our genome size for the quality metrics [Default 6000000].", "default": 6000000, "minimum": 1, - "maximum" : 14000000 + "maximum": 14000000 }, "minimum_N50_value": { "type": "integer", "description": "The minimum N50 value for the quality metrics [Defaults 10000]", "default": 10000, "minimum": 1, - "maximum" : 14000000 - + "maximum": 14000000 }, "minimum_contig_length": { "type": "integer", "description": "The minimum contig length for the quality metrics [Default 300 bp]", "default": 300, "minimum": 1, - "maximum" : 14000000 + "maximum": 14000000 }, "unacceptable_number_contigs": { "type": "integer", "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]", "default": 1000, "minimum": 1, - "maximum" : 500000 + "maximum": 500000 }, "pid_threshold": { "type": "integer", "description": "BLAST percent identity threshold [Default 98]", "default": 98, "minimum": 1, - "maximum" : 100 + "maximum": 100 }, "percent_length_overlap_plasmidfinder": { "type": "integer", "description": "The percent length overlap for resfinder results [Default 60.0]", "default": 60, "minimum": 1, - "maximum" : 100 + "maximum": 100 }, "percent_length_overlap_resfinder": { "type": "integer", "description": "The percent length overlap for resfinder results [Default 60.0]", "default": 60, "minimum": 1, - "maximum" : 100 + "maximum": 100 }, "percent_length_overlap_pointfinder": { "type": "integer", "description": "The percent length overlap for pointfinder results [Default 95.0]", "default": 95, "minimum": 1, - "maximum" : 100 + "maximum": 100 }, "no_exclude_genes": { "type": "boolean", From 95f36658cf87e419e472bbe362c367e6637fef2a Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 13:49:17 -0400 Subject: [PATCH 20/27] Missed a change of None to Automatic --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 53aafba..daba6cf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,7 +81,7 @@ process { ? plasmid_db_arg(params.plasmidfinder_database) : "", // MLST scheme: - params.mlst_scheme && (params.mlst_scheme != "None") + params.mlst_scheme && (params.mlst_scheme != "Automatic") ? mlst_arg(params.mlst_scheme) : "", // Additional parameters From e4114fd1d3aff8d0d012af5af37214ddabcc8d96 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 14:13:15 -0400 Subject: [PATCH 21/27] Fix linting issue and limits on max genome size --- nextflow_schema.json | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cb65fa..ccb8f1c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -45,6 +45,7 @@ "fa_icon": "fas fa-terminal", "properties": { "pointfinder_database": { + "type": "string", "enum": [ "Automatic Selection", "Enterococcus faecium", @@ -57,6 +58,7 @@ "description": "Select a single Pointfinder database to use on all samples (overriding metadata option). Validated Organisms: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli" }, "plasmidfinder_database": { + "type": "string", "enum": ["All", "gram_positive", "enterobacteriales"], "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [All]." }, @@ -76,29 +78,25 @@ "type": "integer", "description": "The lower bound for our genome size for the quality metrics [Default 4000000]", "default": 4000000, - "minimum": 1, - "maximum": 14000000 + "minimum": 1 }, "genome_size_upper_bound": { "type": "integer", "description": "The upper bound for our genome size for the quality metrics [Default 6000000].", "default": 6000000, - "minimum": 1, - "maximum": 14000000 + "minimum": 1 }, "minimum_N50_value": { "type": "integer", "description": "The minimum N50 value for the quality metrics [Defaults 10000]", "default": 10000, - "minimum": 1, - "maximum": 14000000 + "minimum": 1 }, "minimum_contig_length": { "type": "integer", "description": "The minimum contig length for the quality metrics [Default 300 bp]", "default": 300, - "minimum": 1, - "maximum": 14000000 + "minimum": 1 }, "unacceptable_number_contigs": { "type": "integer", From f48e4a884c1fdc4e2323166f180a61e25a2d61f0 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 14:28:44 -0400 Subject: [PATCH 22/27] Fixed typos --- nextflow.config | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index c6702c0..124f0f3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,7 +67,7 @@ params { percent_length_overlap_plasmidfinder = 60 percent_length_overlap_resfinder = 60 percent_length_overlap_pointfinder = 95 - no_exclude_genes = false + no_exclude_genes = false exclude_negatives = false exclude_resistance_phenotypes = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ccb8f1c..e5bd18c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -114,7 +114,7 @@ }, "percent_length_overlap_plasmidfinder": { "type": "integer", - "description": "The percent length overlap for resfinder results [Default 60.0]", + "description": "The percent length overlap for plasmidfinder results [Default 60.0]", "default": 60, "minimum": 1, "maximum": 100 From 2292e353dc1c7827d8903c6cc0d0e6ded4ef9fdd Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Fri, 19 Jul 2024 15:33:13 -0400 Subject: [PATCH 23/27] Modified parameter descriptions --- nextflow_schema.json | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index e5bd18c..0b572c9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -64,7 +64,7 @@ }, "mlst_scheme": { "type": "string", - "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [None]", + "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [Automatic]", "default": "Automatic" } } @@ -102,8 +102,7 @@ "type": "integer", "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]", "default": 1000, - "minimum": 1, - "maximum": 500000 + "minimum": 1 }, "pid_threshold": { "type": "integer", @@ -114,21 +113,21 @@ }, "percent_length_overlap_plasmidfinder": { "type": "integer", - "description": "The percent length overlap for plasmidfinder results [Default 60.0]", + "description": "The percent length overlap for plasmidfinder results [Default 60]", "default": 60, "minimum": 1, "maximum": 100 }, "percent_length_overlap_resfinder": { "type": "integer", - "description": "The percent length overlap for resfinder results [Default 60.0]", + "description": "The percent length overlap for resfinder results [Default 60]", "default": 60, "minimum": 1, "maximum": 100 }, "percent_length_overlap_pointfinder": { "type": "integer", - "description": "The percent length overlap for pointfinder results [Default 95.0]", + "description": "The percent length overlap for pointfinder results [Default 95]", "default": 95, "minimum": 1, "maximum": 100 @@ -139,7 +138,7 @@ }, "exclude_negatives": { "type": "boolean", - "description": "Exclude negative results (those susceptible to antimicrobials) [DefaultFalse]" + "description": "Exclude negative results (those susceptible to antimicrobials) [Default False]" }, "exclude_resistance_phenotypes": { "type": "boolean", From b4d1e5d78431acd0a8be0ea05f064fd2aa49f3fe Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Mon, 22 Jul 2024 10:00:00 -0400 Subject: [PATCH 24/27] Deleted: Template iGenomes parameter from nf-core --- conf/igenomes.config | 440 ------------------------------------------- 1 file changed, 440 deletions(-) delete mode 100644 conf/igenomes.config diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 3f11437..0000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,440 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} From 2879bf7bb1fe6c3e1d0efc7229fca076671bd86a Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Mon, 22 Jul 2024 10:02:08 -0400 Subject: [PATCH 25/27] Modifcations (as seen in iridanextexample PR#14) to allow the template reference_genome_option to be removed --- lib/WorkflowMain.groovy | 12 +----------- lib/WorkflowStaramr.groovy | 15 +-------------- nextflow.config | 8 -------- nextflow_schema.json | 24 ------------------------ 4 files changed, 2 insertions(+), 57 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f8c4f4c..63356f6 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -49,15 +49,5 @@ class WorkflowMain { Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } + } diff --git a/lib/WorkflowStaramr.groovy b/lib/WorkflowStaramr.groovy index 1bda1d5..2f9c5de 100755 --- a/lib/WorkflowStaramr.groovy +++ b/lib/WorkflowStaramr.groovy @@ -12,7 +12,6 @@ class WorkflowStaramr { // public static void initialise(params, log) { - genomeExistsError(params, log) } public static String toolCitationText(params) { @@ -65,17 +64,5 @@ class WorkflowStaramr { return description_html } - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } + } diff --git a/nextflow.config b/nextflow.config index 124f0f3..e1b12c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,8 +14,6 @@ params { input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false // Boilerplate options outdir = null @@ -199,12 +197,6 @@ plugins { } -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/nextflow_schema.json b/nextflow_schema.json index 0b572c9..1443dd2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -147,27 +147,6 @@ }, "fa_icon": "fas fa-terminal" }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -347,9 +326,6 @@ { "$ref": "#/definitions/additional_settings" }, - { - "$ref": "#/definitions/reference_genome_options" - }, { "$ref": "#/definitions/institutional_config_options" }, From 247defcb4936ba9438a5c80a15c9bcbf2f7ecc0c Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Mon, 22 Jul 2024 10:05:39 -0400 Subject: [PATCH 26/27] Remove genome params --- nextflow.config | 2 -- 1 file changed, 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index e1b12c4..a14b1f2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,8 +12,6 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - // References - genome = null // Boilerplate options outdir = null From 23278eb455ffaeba19b0ab4d248e8c3ed59ec0e9 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Mon, 22 Jul 2024 12:55:39 -0400 Subject: [PATCH 27/27] Added all the MLST schemeas available on https://github.com/tseemann/mlst/tree/master/db/pubmlst --- nextflow_schema.json | 148 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1443dd2..c2944fa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -64,6 +64,154 @@ }, "mlst_scheme": { "type": "string", + "enum": [ + "Automatic", + "aactinomycetemcomitans", + "abaumannii", + "abaumannii_2", + "achromobacter", + "aeromonas", + "aphagocytophilum", + "arcobacter", + "bbacilliformis", + "bcc", + "bcereus", + "bfragilis", + "bhenselae", + "blicheniformis_14", + "bordetella_3", + "borrelia", + "bpseudomallei", + "brachyspira", + "brachyspira_2", + "brachyspira_3", + "brachyspira_4", + "brachyspira_5", + "brucella", + "bsubtilis", + "bwashoensis", + "campylobacter", + "campylobacter_nonjejuni", + "campylobacter_nonjejuni_2", + "campylobacter_nonjejuni_3", + "campylobacter_nonjejuni_4", + "campylobacter_nonjejuni_5", + "campylobacter_nonjejuni_6", + "campylobacter_nonjejuni_7", + "campylobacter_nonjejuni_8", + "campylobacter_nonjejuni_9", + "cbotulinum", + "cdifficile", + "cfreundii", + "chlamydiales", + "cmaltaromaticum", + "cperfringens", + "cronobacter", + "csepticum", + "dbases.sh", + "diphtheria_3", + "dnodosus", + "ecloacae", + "ecoli", + "ecoli_achtman_4", + "edwardsiella", + "efaecalis", + "efaecium", + "fpsychrophilum", + "gallibacterium", + "geotrichum", + "hcinaedi", + "helicobacter", + "hinfluenzae", + "hparasuis", + "hsuis", + "kaerogenes", + "kingella", + "klebsiella", + "koxytoca", + "leptospira", + "leptospira_2", + "leptospira_3", + "liberibacter", + "listeria_2", + "llactis_phage", + "lsalivarius", + "mabscessus", + "magalactiae", + "manserisalpingitidis", + "mbovis_2", + "mcanis", + "mcaseolyticus", + "mcatarrhalis_achtman_6", + "mflocculare", + "mgallisepticum", + "mgallisepticum_2", + "mhaemolytica", + "mhominis_3", + "mhyopneumoniae", + "mhyorhinis", + "miowae", + "mplutonius", + "mpneumoniae", + "msciuri", + "msynoviae", + "mycobacteria_2", + "neisseria", + "oralstrep", + "orhinotracheale", + "otsutsugamushi", + "pacnes_3", + "paeruginosa", + "pdamselae", + "pfluorescens", + "pgingivalis", + "plarvae", + "pmultocida", + "pmultocida_2", + "ppentosaceus", + "pputida", + "psalmonis", + "ranatipestifer", + "rhodococcus", + "sagalactiae", + "saureus", + "sbsec", + "scanis", + "schromogenes", + "sdysgalactiae", + "senterica_achtman_2", + "sepidermidis", + "sgallolyticus", + "shaemolyticus", + "shewanella", + "shominis", + "sinorhizobium", + "smaltophilia", + "spneumoniae", + "spseudintermedius", + "spyogenes", + "ssuis", + "staphlugdunensis", + "sthermophilus", + "streptomyces", + "streptothermophilus", + "suberis", + "szooepidemicus", + "taylorella", + "tenacibaculum", + "tpallidum", + "ureaplasma", + "vcholerae", + "vcholerae_2", + "vibrio", + "vparahaemolyticus", + "vtapetis", + "vvulnificus", + "wolbachia", + "xfastidiosa", + "ypseudotuberculosis_achtman_3", + "yruckeri" + ], "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [Automatic]", "default": "Automatic" }