phac-nml · sgsutcliffe · Jul 25, 2024 · Jun 28, 2024 · Jun 28, 2024 · Jul 2, 2024
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -15,6 +15,7 @@
             },
             "contigs": {
                 "type": "string",
+                "format": "file-path",
                 "pattern": "^\\S+\\.f(ast|n)?a\\.gz$",
                 "errorMessage": "FASTA file containing assembled contigs, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'"
             },

diff --git a/conf/igenomes.config b/conf/igenomes.config
diff --git a/conf/modules.config b/conf/modules.config
@@ -44,12 +44,25 @@ process {
                                     'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']
 
         // Convert the species name to a Pointfinder database-style name:
-        def convert = {String species_name -> species_name.trim().toLowerCase().replaceAll(" ", "_")}
+        def species_code = "[sS]almonella|[cC]amplyobacter|[eE]nterococcus.faecalis|[eE]nterococcus.faecium|[eE]scherichia.coli|[hH]elicobacter.pylori"
+        def convert = {String species_name -> species_name.trim().toLowerCase().replaceAll(" ", "_").find(species_code)}
 
         // Create the command line arguments:
         def point_db_arg = {String database -> " --pointfinder-organism ${database} " }
         def plasmid_db_arg = {String database -> " --plasmidfinder-database-type ${database} " }
         def mlst_arg = {String scheme -> " --mlst-scheme ${scheme} " }
+        def minimum_contig_length_arg = {String min_length -> " --minimum-contig-length ${min_length} "}
+        def genome_size_lower_bound_arg = {String min_genome -> " --genome-size-lower-bound ${min_genome} "}
+        def genome_size_upper_bound_arg = {String max_genome -> " --genome-size-upper-bound ${max_genome} "}
+        def minimum_N50_value_arg = {String min_n50 -> " --minimum-N50-value ${min_n50} "}
+        def unacceptable_number_contigs_arg = {String min_length -> " --unacceptable-number-contigs ${min_length} "}
+        def pid_threshold_arg = {String min_pid -> " --pid-threshold ${min_pid} "}
+        def percent_length_overlap_plasmidfinder_arg = {String min_overlap -> " --percent-length-overlap-plasmidfinder ${min_overlap} "}
+        def percent_length_overlap_resfinder_arg = {String min_overlap -> " --percent-length-overlap-resfinder ${min_overlap} "}
+        def percent_length_overlap_pointfinder_arg = {String min_overlap -> " --percent-length-overlap-pointfinder ${min_overlap} "}
+        def no_exclude_genes_arg = " --no-exclude-genes"
+        def exclude_negatives_arg = " --exclude-negatives"
+        def exclude_resistance_phenotypes_arg = " --exclude-resistance-phenotypes"
 
         // Check to see if the database name is valid:
         def valid_point_db = {String database -> pointfinder_databases.contains(database)}
@@ -58,8 +71,8 @@ process {
         ext.args = {
             [
                 // Pointfinder database:
-                params.pointfinder_database && valid_point_db(params.pointfinder_database) ?
-                    point_db_arg(params.pointfinder_database) :
+                params.pointfinder_database && valid_point_db(convert(params.pointfinder_database)) ?
+                    point_db_arg(convert(params.pointfinder_database)) :
                         meta.species && valid_point_db(convert(meta.species)) ?
                             point_db_arg(convert(meta.species)) : "",
 
@@ -68,8 +81,48 @@ process {
                     ? plasmid_db_arg(params.plasmidfinder_database) : "",
 
                 // MLST scheme:
-                params.mlst_scheme
-                    ? mlst_arg(params.mlst_scheme) : ""
+                params.mlst_scheme && (params.mlst_scheme != "Automatic")
+                    ? mlst_arg(params.mlst_scheme) : "",
+
+                // Additional parameters
+                params.minimum_contig_length
+                    ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "",
+
+                params.genome_size_lower_bound
+                    ? genome_size_lower_bound_arg(params.genome_size_lower_bound.toString()) : "",
+
+                params.genome_size_upper_bound
+                    ? genome_size_upper_bound_arg(params.genome_size_upper_bound.toString()) : "",
+
+                params.minimum_N50_value
+                    ? minimum_N50_value_arg(params.minimum_N50_value.toString()) : "",
+
+                params.minimum_contig_length
+                    ? minimum_contig_length_arg(params.minimum_contig_length.toString()) : "",
+
+                params.unacceptable_number_contigs
+                    ? unacceptable_number_contigs_arg(params.unacceptable_number_contigs.toString()) : "",
+
+                params.pid_threshold
+                    ? pid_threshold_arg(params.pid_threshold.toString()) : "",
+
+                params.percent_length_overlap_plasmidfinder
+                    ? percent_length_overlap_plasmidfinder_arg(params.percent_length_overlap_plasmidfinder.toString()) : "",
+
+                params.percent_length_overlap_resfinder
+                    ? percent_length_overlap_resfinder_arg(params.percent_length_overlap_resfinder.toString()) : "",
+
+                params.percent_length_overlap_pointfinder
+                    ? percent_length_overlap_pointfinder_arg(params.percent_length_overlap_pointfinder.toString()) : "",
+
+                params.no_exclude_genes
+                    ? no_exclude_genes_arg : "",
+
+                params.exclude_negatives
+                    ? exclude_negatives_arg : "",
+
+                params.exclude_resistance_phenotypes
+                    ? exclude_resistance_phenotypes_arg : ""
             ].join(" ")
         }
     }

diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
@@ -49,15 +49,5 @@ class WorkflowMain {
             Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
         }
     }
-    //
-    // Get attribute from genome config file e.g. fasta
-    //
-    public static Object getGenomeAttribute(params, attribute) {
-        if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
-            if (params.genomes[ params.genome ].containsKey(attribute)) {
-                return params.genomes[ params.genome ][ attribute ]
-            }
-        }
-        return null
-    }
+
 }
diff --git a/lib/WorkflowStaramr.groovy b/lib/WorkflowStaramr.groovy
@@ -12,7 +12,6 @@ class WorkflowStaramr {
     //
     public static void initialise(params, log) {
 
-        genomeExistsError(params, log)
     }
 
     public static String toolCitationText(params) {
@@ -65,17 +64,5 @@ class WorkflowStaramr {
         return description_html
     }
 
-    //
-    // Exit pipeline if incorrect --genome key provided
-    //
-    private static void genomeExistsError(params, log) {
-        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
-            def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
-                "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
-                "  Currently, the available genome keys are:\n" +
-                "  ${params.genomes.keySet().join(", ")}\n" +
-                "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-            Nextflow.error(error_string)
-        }
-    }
+
 }
diff --git a/nextflow.config b/nextflow.config
@@ -12,10 +12,6 @@ params {
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                      = null
-    // References
-    genome                     = null
-    igenomes_base              = 's3://ngi-igenomes/igenomes'
-    igenomes_ignore            = false
 
     // Boilerplate options
     outdir                     = null
@@ -50,10 +46,26 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
-    //StarAMR options
-    pointfinder_database             = null
-    plasmidfinder_database           = null
-    mlst_scheme                      = null
+    // StarAMR options
+
+    // Databases
+    pointfinder_database             = "Automatic Selection"
+    plasmidfinder_database           = "All"
+    mlst_scheme                      = "Automatic"
+
+    // Additional CLI arguments
+    genome_size_lower_bound              = 4000000
+    genome_size_upper_bound              = 6000000
+    minimum_N50_value                    = 10000
+    minimum_contig_length                = 300
+    unacceptable_number_contigs          = 1000
+    pid_threshold                        = 98
+    percent_length_overlap_plasmidfinder = 60
+    percent_length_overlap_resfinder     = 60
+    percent_length_overlap_pointfinder   = 95
+    no_exclude_genes                     = false
+    exclude_negatives                    = false
+    exclude_resistance_phenotypes        = false
 
 }
 
@@ -183,12 +195,6 @@ plugins {
 }
 
 
-// Load igenomes.config if required
-if (!params.igenomes_ignore) {
-    includeConfig 'conf/igenomes.config'
-} else {
-    params.genomes = [:]
-}
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -38,27 +38,115 @@
                 }
             }
         },
-        "reference_genome_options": {
-            "title": "Reference genome options",
+        "database": {
+            "title": "Databases",
             "type": "object",
-            "fa_icon": "fas fa-dna",
-            "description": "Reference genome related files and options required for the workflow.",
+            "description": "Select databases to be run on all samples.",
+            "fa_icon": "fas fa-terminal",
             "properties": {
-                "genome": {
+                "pointfinder_database": {
                     "type": "string",
-                    "description": "Name of iGenomes reference.",
-                    "fa_icon": "fas fa-book",
-                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+                    "enum": [
+                        "Automatic Selection",
+                        "Enterococcus faecium",
+                        "Enterococcus faecalis",
+                        "Helicobacter pylori",
+                        "Salmonella",
+                        "Campylobacter",
+                        "Escherichia coli"
+                    ],
+                    "description": "Select a single Pointfinder database to use on all samples (overriding metadata option). Validated Organisms: Enterococcus faecium, Enterococcus faecalis, Helicobacter pylori, Salmonella, Campylobacter, Escherichia coli"
                 },
-                "igenomes_ignore": {
-                    "type": "boolean",
-                    "description": "Do not load the iGenomes reference config.",
-                    "fa_icon": "fas fa-ban",
-                    "hidden": true,
-                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+                "plasmidfinder_database": {
+                    "type": "string",
+                    "enum": ["All", "gram_positive", "enterobacteriales"],
+                    "description": "The database type to use for plasmidfinder {gram_positive, enterobacteriales}. Defaults to using all available database types to search for plasmids. [All]."
+                },
+                "mlst_scheme": {
+                    "type": "string",
+                    "description": "Specify scheme name, visit https://github.com/tseemann/mlst/tree/master/db/pubmlst for supported scheme genera available. [Automatic]",
+                    "default": "Automatic"
                 }
             }
         },
+        "additional_settings": {
+            "title": "Additional Settings",
+            "type": "object",
+            "description": "For advanced changes to staramr",
+            "properties": {
+                "genome_size_lower_bound": {
+                    "type": "integer",
+                    "description": "The lower bound for our genome size for the quality metrics [Default 4000000]",
+                    "default": 4000000,
+                    "minimum": 1
+                },
+                "genome_size_upper_bound": {
+                    "type": "integer",
+                    "description": "The upper bound for our genome size for the quality metrics [Default 6000000].",
+                    "default": 6000000,
+                    "minimum": 1
+                },
+                "minimum_N50_value": {
+                    "type": "integer",
+                    "description": "The minimum N50 value for the quality metrics [Defaults 10000]",
+                    "default": 10000,
+                    "minimum": 1
+                },
+                "minimum_contig_length": {
+                    "type": "integer",
+                    "description": "The minimum contig length for the quality metrics [Default 300 bp]",
+                    "default": 300,
+                    "minimum": 1
+                },
+                "unacceptable_number_contigs": {
+                    "type": "integer",
+                    "description": "The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics [Default 1000]",
+                    "default": 1000,
+                    "minimum": 1
+                },
+                "pid_threshold": {
+                    "type": "integer",
+                    "description": "BLAST percent identity threshold [Default 98]",
+                    "default": 98,
+                    "minimum": 1,
+                    "maximum": 100
+                },
+                "percent_length_overlap_plasmidfinder": {
+                    "type": "integer",
+                    "description": "The percent length overlap for plasmidfinder results [Default 60]",
+                    "default": 60,
+                    "minimum": 1,
+                    "maximum": 100
+                },
+                "percent_length_overlap_resfinder": {
+                    "type": "integer",
+                    "description": "The percent length overlap for resfinder results [Default 60]",
+                    "default": 60,
+                    "minimum": 1,
+                    "maximum": 100
+                },
+                "percent_length_overlap_pointfinder": {
+                    "type": "integer",
+                    "description": "The percent length overlap for pointfinder results [Default 95]",
+                    "default": 95,
+                    "minimum": 1,
+                    "maximum": 100
+                },
+                "no_exclude_genes": {
+                    "type": "boolean",
+                    "description": "Disable the default exclusion of some genes from ResFinder/PointFinder/PlasmidFinder [Default False]"
+                },
+                "exclude_negatives": {
+                    "type": "boolean",
+                    "description": "Exclude negative results (those susceptible to antimicrobials) [Default False]"
+                },
+                "exclude_resistance_phenotypes": {
+                    "type": "boolean",
+                    "description": "Exclude predicted antimicrobial resistances [Default False]."
+                }
+            },
+            "fa_icon": "fas fa-terminal"
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -233,7 +321,10 @@
             "$ref": "#/definitions/input_output_options"
         },
         {
-            "$ref": "#/definitions/reference_genome_options"
+            "$ref": "#/definitions/database"
+        },
+        {
+            "$ref": "#/definitions/additional_settings"
         },
         {
             "$ref": "#/definitions/institutional_config_options"
@@ -244,16 +335,5 @@
         {
             "$ref": "#/definitions/generic_options"
         }
-    ],
-    "properties": {
-        "pointfinder_database": {
-            "type": "string"
-        },
-        "plasmidfinder_database": {
-            "type": "string"
-        },
-        "mlst_scheme": {
-            "type": "string"
-        }
-    }
+    ]
 }
diff --git a/tests/main.nf.test b/tests/main.nf.test
@@ -9,6 +9,7 @@ nextflow_pipeline {
             params {
                 input = "$baseDir/tests/assets/test_samplesheet.csv"
                 outdir = "$baseDir/tests/results"
+                max_memory = "4.GB"
             }
         }
 
@@ -51,6 +52,25 @@ nextflow_pipeline {
             assert ecoli_metadata."Scheme" == "ecoli_achtman_4"
             assert ecoli_metadata."Sequence Type" == "678"
 
+            // Check the commandline parameters
+            // Salmonella
+            assert path("$baseDir/tests/results/staramr/GCA_000008105_results/GCA_000008105_settings.txt").exists()
+            def salmonella_settings = new File("$baseDir/tests/results/staramr/GCA_000008105_results/GCA_000008105_settings.txt")
+            def salmonella_cmd = salmonella_settings.readLines().get(0)
+            assert salmonella_cmd == "command_line                  = /usr/local/bin/staramr search --pointfinder-organism salmonella --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCA_000008105_results GCA_000008105.fasta"
+
+            // Ecoli
+            assert path("$baseDir/tests/results/staramr/GCA_000947975_results/GCA_000947975_settings.txt").exists()
+            def ecoli_settings = new File("$baseDir/tests/results/staramr/GCA_000947975_results/GCA_000947975_settings.txt")
+            def ecoli_cmd = ecoli_settings.readLines().get(0)
+            assert ecoli_cmd == "command_line                  = /usr/local/bin/staramr search --pointfinder-organism escherichia_coli --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCA_000947975_results GCA_000947975.fasta"
+
+            // Listeria
+            assert path("$baseDir/tests/results/staramr/GCF_000196035_results/GCF_000196035_settings.txt").exists()
+            def listeria_settings = new File("$baseDir/tests/results/staramr/GCF_000196035_results/GCF_000196035_settings.txt")
+            def listeria_cmd = listeria_settings.readLines().get(0)
+            assert listeria_cmd == "command_line                  = /usr/local/bin/staramr search --minimum-contig-length 300 --genome-size-lower-bound 4000000 --genome-size-upper-bound 6000000 --minimum-N50-value 10000 --minimum-contig-length 300 --unacceptable-number-contigs 1000 --pid-threshold 98 --percent-length-overlap-plasmidfinder 60 --percent-length-overlap-resfinder 60 --percent-length-overlap-pointfinder 95 --nprocs 1 -o GCF_000196035_results GCF_000196035.fasta"
+
             // Check CSVTK_concat output (merged_*) files
 
             // merged_detailed_summary.tsv