diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index b24dd094..33a2c027 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 137da49f..335a06b1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index f96fb17c..2ac5826b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,18 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/smrnaseq/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/smrnaseq/results-${{ github.sha }}" } - profiles: test_full,public_aws_ecr + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index a1f96d73..84d22f1c 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/smrnaseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/smrnaseq/results-test-${{ github.sha }}" } - profiles: test,public_aws_ecr + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4bd5555..45ad5219 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" profile: - "test" diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CHANGELOG.md b/CHANGELOG.md index 8024dbeb..fc874c04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [v2.2.1](https://github.com/nf-core/smrnaseq/releases/tag/2.2.1) - 2023-05-08 +## [v2.2.2](https://github.com/nf-core/smrnaseq/releases/tag/2.2.2) - 2023-09-04 + +- [[#253]](https://github.com/nf-core/smrnaseq/pull/253) - Remove globs from process alias when using ECR containers +- [[#237]](https://github.com/nf-core/smrnaseq/issues/237) - Fix illumina protocol clip parameters to default +- Remove public_aws_ecr profile +- [[#269]](https://github.com/nf-core/smrnaseq/pull/269) - Updated miRBase URLs with new location (old ones were broken) + +### Software dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.13 | 1.15 | +| `fastp` | 0.23.2 | 0.23.4 | + +## [v2.2.1](https://github.com/nf-core/smrnaseq/releases/tag/2.2.1) - 2023-05-12 ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 11f752b9..67b98c96 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,8 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + * [trimgalore](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) * [samtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) @@ -56,5 +58,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 6fb16c5e..44066051 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/smrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3456879-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3456879) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -57,34 +57,34 @@ You can find numerous talks on the nf-core events page from various topics inclu > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/smrnaseq \ -profile \ - --input samplesheet.csv \ - --genome 'GRCh37' \ - --mirtrace_species 'hsa' \ - --protocol 'illumina' \ - --outdir + --input samplesheet.csv \ + --genome 'GRCh37' \ + --mirtrace_species 'hsa' \ + --protocol 'illumina' \ + --outdir ``` > **Warning:** @@ -92,11 +92,11 @@ nextflow run nf-core/smrnaseq \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/smrnaseq/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/smrnaseq/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/smrnaseq/output). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 1f05424e..87cc6194 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -5,13 +5,17 @@ section_href: "https://github.com/nf-core/smrnaseq" plot_type: "html" data: |

Methods

-

Data was processed using nf-core/smrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/smrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index df5ec37a..23da1135 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-smrnaseq-methods-description": order: -1000 diff --git a/assets/nf-core-smrnaseq_logo_light.png b/assets/nf-core-smrnaseq_logo_light.png index 2ecc50a7..262fa1b0 100644 Binary files a/assets/nf-core-smrnaseq_logo_light.png and b/assets/nf-core-smrnaseq_logo_light.png differ diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..214c7fa9 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/smrnaseq v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index 8644dd3c..556d8509 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -207,7 +207,7 @@ process { } process { - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_SAMTOOLS:SAMTOOLS_.*' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/samtools" }, @@ -215,7 +215,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/samtools/samtools_stats" }, diff --git a/conf/public_aws_ecr.config b/conf/public_aws_ecr.config deleted file mode 100644 index 8152e14a..00000000 --- a/conf/public_aws_ecr.config +++ /dev/null @@ -1,30 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - AWS ECR Config -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config to set public AWS ECR images wherever possible - This improves speed when running on AWS infrastructure. - Use this as an example template when using your own private registry. ----------------------------------------------------------------------------------------- -*/ - -docker.registry = 'public.ecr.aws' -podman.registry = 'public.ecr.aws' - -process { - withName: '.*:SAMPLESHEET_CHECK' { - container = "quay.io/biocontainers/python:3.8.3" - } - withName: '.*:BOWTIE_MAP.*' { - container = 'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' - } - withName: '.*:EDGER_QC' { - container = 'quay.io/biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' - } - withName: '.*:MIRTOP_QUANT' { - container = 'quay.io/biocontainers/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' - } - withName: '.*:TABLE_MERGE' { - container = 'quay.io/biocontainers/r-data.table:1.12.2' - } -} diff --git a/docs/usage.md b/docs/usage.md index 1893e2e6..9bfe5494 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,9 +26,9 @@ It should point to the 3-letter species name used by [miRBase](https://www.mirba Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below. -- `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/ftp/CURRENT/genomes/${params.mirtrace_species}.gff3` -- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/ftp/CURRENT/mature.fa.gz` -- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/ftp/CURRENT/hairpin.fa.gz` +- `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3` +- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa` +- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa` If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below . @@ -104,7 +104,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/smrnaseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/smrnaseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -123,7 +123,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/smrnaseq -profile docker -params-file params.yaml @@ -135,7 +136,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804d..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 9a0db96c..ed5a126c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,30 +19,6 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen @@ -65,14 +41,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/lib/WorkflowSmrnaseq.groovy b/lib/WorkflowSmrnaseq.groovy index 7013e903..5b94bad7 100755 --- a/lib/WorkflowSmrnaseq.groovy +++ b/lib/WorkflowSmrnaseq.groovy @@ -11,6 +11,7 @@ class WorkflowSmrnaseq { // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) } @@ -41,15 +42,57 @@ class WorkflowSmrnaseq { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() @@ -82,8 +125,8 @@ class WorkflowSmrnaseq { switch(params.protocol){ case 'illumina': - params.putIfAbsent("clip_r1", 1); - params.putIfAbsent("three_prime_clip_r1",2); + params.putIfAbsent("clip_r1", 0); + params.putIfAbsent("three_prime_clip_r1",0); params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); break case 'nextflex': diff --git a/main.nf b/main.nf index 68e88980..e0072925 100644 --- a/main.nf +++ b/main.nf @@ -27,6 +27,22 @@ params.bowtie_index = WorkflowMain.getGenomeAttribute(params, 'bowtie') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 0b6daac5..758f1b84 100644 --- a/modules.json +++ b/modules.json @@ -12,48 +12,62 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_stats_samtools", "modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows", "bam_sort_stats_samtools"] } } } diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index b132e1da..91a6cd53 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -8,7 +8,7 @@ process INDEX_GENOME { 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: - path fasta + tuple val(meta2), path(fasta) output: path 'genome*ebwt' , emit: index diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 9390730a..2be45bb8 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -7,7 +7,7 @@ process INDEX_MIRNA { 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: - path fasta + tuple val(meta2), path(fasta) output: path 'fasta_bidx*' , emit: index diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf index 3e247ce4..489879a5 100644 --- a/modules/local/format_fasta_mirna.nf +++ b/modules/local/format_fasta_mirna.nf @@ -10,11 +10,11 @@ process FORMAT_FASTA_MIRNA { 'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" input: - path fasta + tuple val(meta2), path(fasta) output: - path '*_idx.fa' , emit: formatted_fasta - path "versions.yml", emit: versions + tuple val(meta2), path('*_idx.fa') , emit: formatted_fasta + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 74044e4a..e4e2aaaf 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -10,7 +10,7 @@ process MIRDEEP2_RUN { 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" input: - path fasta + tuple val(meta2), path(fasta) tuple path(reads), path(arf) path hairpin path mature diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index 3ebb65ac..cc8d29a7 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -12,9 +12,9 @@ process MIRTOP_QUANT { path gtf output: - path "mirtop/mirtop.gff" + path "mirtop/mirtop.gff" , emit: mirtop_gff path "mirtop/mirtop.tsv" , emit: mirtop_table - path "mirtop/mirtop_rawData.tsv" + path "mirtop/mirtop_rawData.tsv", emit: mirtop_rawdata path "mirtop/stats/*" , emit: logs path "versions.yml" , emit: versions diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index ab7a91f0..71f5c85a 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -7,11 +7,11 @@ process PARSE_FASTA_MIRNA { 'biocontainers/seqkit:2.3.1--h9ee0642_0' }" input: - path fasta + tuple val(meta2), path(fasta) output: - path '*_igenome.fa', emit: parsed_fasta - path "versions.yml", emit: versions + tuple val(meta2), path('*_igenome.fa'), emit: parsed_fasta + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index c1b0967f..351d2322 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,5 +1,4 @@ process SAMPLESHEET_CHECK { - label 'process_low' tag "$samplesheet" label 'process_single' diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..c9d014b1 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 2ca2d3ee..831b7f12 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,10 +2,10 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.2" + conda "bioconda::fastp=0.23.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : - 'biocontainers/fastp:0.23.2--h79da9fb_0' }" + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 07d5e433..249f9064 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..65d7dd0d 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index eb7e72fc..b75707ec 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -32,4 +32,15 @@ process SAMTOOLS_FLAGSTAT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index a257d700..83c7c34b 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -33,4 +33,16 @@ process SAMTOOLS_IDXSTATS { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 1e5181d4..2b7753fd 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -21,13 +21,11 @@ process SAMTOOLS_SORT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def sort_memory = (task.memory.mega/task.cpus).intValue() if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools sort \\ $args \\ -@ $task.cpus \\ - -m ${sort_memory}M \\ -o ${prefix}.bam \\ -T $prefix \\ $bam diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index eb7f098b..4a2607de 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -9,7 +9,7 @@ process SAMTOOLS_STATS { input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 1d68a5d8..90e6345f 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -30,9 +30,14 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,4 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/nextflow.config b/nextflow.config index 2f9dd945..c9f81e43 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,8 +20,8 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false mirna_gtf = null - mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" - hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" + mature = "https://mirbase.org/download/CURRENT/mature.fa" + hairpin = "https://mirbase.org/download/CURRENT/hairpin.fa" mirgenedb = false mirgenedb_mature = null mirgenedb_hairpin = null @@ -61,7 +61,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -70,10 +69,6 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options custom_config_version = 'master' @@ -89,6 +84,14 @@ params { max_cpus = 16 max_time = '240.h' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -199,14 +202,24 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } - public_aws_ecr { - includeConfig 'conf/public_aws_ecr.config' - } test { includeConfig 'conf/test.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation'// Validation of pipeline parameters and creation of an input channel from a sample sheet +} + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -237,19 +250,19 @@ podman.registry = 'quay.io' def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -258,8 +271,8 @@ manifest { homePage = 'https://github.com/nf-core/smrnaseq' description = """Small RNA-Seq Best Practice Analysis Pipeline.""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '2.2.1' + nextflowVersion = '!>=23.04.0' + version = '2.2.2' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 213199c1..d0162e47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,9 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/smrnaseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -80,6 +80,8 @@ }, "fasta": { "type": "string", + "format": "file-path", + "exists": true, "fa_icon": "fas fa-font", "description": "Path to reference genome FASTA genome file.", "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." @@ -87,7 +89,7 @@ "mirna_gtf": { "type": "string", "description": "GFF/GTF file with coordinates positions of precursor and miRNAs.", - "help_text": "miRBase `.gff3` file, typically downloaded from [`https://mirbase.org/ftp/CURRENT/genomes/`](https://mirbase.org/ftp/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", + "help_text": "miRBase `.gff3` file, typically downloaded from [`https://mirbase.org/download/CURRENT/genomes/`](https://mirbase.org/download/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", "fa_icon": "fas fa-address-book" }, "mirgenedb_gff": { @@ -100,7 +102,7 @@ "description": "Path to FASTA file with mature miRNAs.", "fa_icon": "fas fa-wheelchair", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/ftp/CURRENT/mature.fa.gz" + "default": "https://mirbase.org/download/CURRENT/mature.fa" }, "mirgenedb_mature": { "type": "string", @@ -112,7 +114,7 @@ "description": "Path to FASTA file with miRNAs precursors.", "fa_icon": "fab fa-cuttlefish", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" + "default": "https://mirbase.org/download/CURRENT/hairpin.fa" }, "mirgenedb_hairpin": { "type": "string", @@ -348,7 +350,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -365,12 +367,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -394,6 +398,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -408,6 +413,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -419,6 +425,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -434,13 +441,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -448,12 +448,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index 033f59ad..967b2757 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -3,12 +3,12 @@ // include { INDEX_GENOME } from '../../modules/local/bowtie_genome' -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools' include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' workflow GENOME_QUANT { take: - fasta + fasta index reads // channel: [ val(meta), [ reads ] ] @@ -16,7 +16,7 @@ workflow GENOME_QUANT { ch_versions = Channel.empty() if (!index){ - INDEX_GENOME ( fasta ) + INDEX_GENOME ( [ [:], fasta ] ) bowtie_index = INDEX_GENOME.out.index fasta_formatted = INDEX_GENOME.out.fasta ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) @@ -29,14 +29,14 @@ workflow GENOME_QUANT { BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) } emit: fasta = fasta_formatted index = bowtie_index - stats = BAM_SORT_SAMTOOLS.out.stats + stats = BAM_SORT_STATS_SAMTOOLS.out.stats versions = ch_versions } diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index dd8a8412..dfa16ab4 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -15,8 +15,8 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_MATURE BOWTIE_MAP_SEQ as BOWTIE_MAP_HAIRPIN BOWTIE_MAP_SEQ as BOWTIE_MAP_SEQCLUSTER } from '../../modules/local/bowtie_map_mirna' -include { BAM_SORT_SAMTOOLS as BAM_STATS_MATURE - BAM_SORT_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_samtools' +include { BAM_SORT_STATS_SAMTOOLS as BAM_STATS_MATURE + BAM_SORT_STATS_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_stats_samtools' include { SEQCLUSTER_SEQUENCES } from '../../modules/local/seqcluster_collapse.nf' include { MIRTOP_QUANT } from '../../modules/local/mirtop_quant.nf' @@ -25,8 +25,8 @@ include { EDGER_QC } from '../../modules/local/edger_qc.nf' workflow MIRNA_QUANT { take: - mature // channel: fasta file - hairpin // channel: fasta file + mature // channel: [ val(meta), fasta file] + hairpin // channel: [ val(meta), fasta file] gtf // channle: GTF file reads // channel: [ val(meta), [ reads ] ] @@ -94,7 +94,7 @@ workflow MIRNA_QUANT { ch_mirtop_logs = Channel.empty() if (params.mirtrace_species){ - MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta, gtf ) + MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf ) ch_mirtop_logs = MIRTOP_QUANT.out.logs ch_versions = ch_versions.mix(MIRTOP_QUANT.out.versions) diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf similarity index 77% rename from subworkflows/nf-core/bam_sort_samtools.nf rename to subworkflows/nf-core/bam_sort_stats_samtools/main.nf index ded07b32..fc1c652b 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -2,16 +2,17 @@ // Sort, index BAM file and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' -workflow BAM_SORT_SAMTOOLS { +workflow BAM_SORT_STATS_SAMTOOLS { take: - ch_bam // channel: [ val(meta), [ bam ] ] - fasta + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: + ch_versions = Channel.empty() SAMTOOLS_SORT ( ch_bam ) @@ -33,7 +34,7 @@ workflow BAM_SORT_SAMTOOLS { } .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( ch_bam_bai, fasta ) + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..69c16be4 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 38d2569b..00000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,32 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] - fasta - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( ch_bam_bai, fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..44d4c010 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..87863b11 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index ad5ecfef..049851d0 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -1,12 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Validate input parameters WorkflowSmrnaseq.initialise(params, log) // Check input path parameters to see if they exist @@ -25,7 +31,7 @@ if (!params.mirtrace_species) { } // Genome options -def mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/ftp/CURRENT/genomes/${params.mirtrace_species}.gff3" : false +def mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3" : false def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species /* @@ -96,7 +102,7 @@ workflow SMRNASEQ { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) .reads .dump(tag: 'group') @@ -109,6 +115,9 @@ workflow SMRNASEQ { } .set { ch_fastq } ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // MODULE: Concatenate FastQ files from same sample if required @@ -170,8 +179,8 @@ workflow SMRNASEQ { } MIRNA_QUANT ( - reference_mature, - reference_hairpin, + [ [:], reference_mature], + [ [:], reference_hairpin], mirna_gtf, mirna_reads ) @@ -211,7 +220,7 @@ workflow SMRNASEQ { if (!params.skip_multiqc) { workflow_summary = WorkflowSmrnaseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowSmrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowSmrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty()