diff --git a/.editorconfig b/.editorconfig index b6b31907..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4d09370b..bd134c88 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e278390b..3cdbf2b7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 2ac5826b..1795a0fb 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 + uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -28,7 +30,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 84d22f1c..3ed287c5 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,7 +12,7 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 + uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 96080391..5deaba55 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45ad5219..64fcab5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,15 +29,20 @@ jobs: profile: - "test" - "test_no_genome" + - "test_umi" + - "test_index" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..f823210d --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,72 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ae6a0d7b..56151e57 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/smrnaseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..748b4311 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,72 +11,33 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 81% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..c3674af2 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' > $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitignore b/.gitignore index 5124c9ac..4109b5c9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +execution_trace* diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..363d5b1d 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,6 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..8a74623b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,5 @@ repository_type: pipeline +lint: + nextflow_config: + - config_defaults: + - params.fastp_known_mirna_adapters diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 97613a1c..48e2675b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,50 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.3.0 - 2024-02-23 - Gray Zinc Dalmatian + +- [[#307]](https://github.com/nf-core/smrnaseq/pull/307) - Clean up config file and improve output folder structure +- [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`) +- [[#164]](https://github.com/nf-core/smrnaseq/pull/164) - UMI Handling Feature implemented in the pipeline +- [[#302]](https://github.com/nf-core/smrnaseq/pull/302) - Merged in nf-core template v2.11.1 +- [[#294]](https://github.com/nf-core/smrnaseq/pull/294) - Fixed contamination screening issues +- [[#309]](https://github.com/nf-core/smrnaseq/pull/309) - Merged in nf-core template v2.12.0 +- [[#310]](https://github.com/nf-core/smrnaseq/pull/310) - Removed unnecessarily separate mirtrace subworkflow, now using module instead +- [[#311]](https://github.com/nf-core/smrnaseq/pull/311) - Fix use of FASTP, set `three_prime_adapter` per default +- [[#314]](https://github.com/nf-core/smrnaseq/pull/314) - Add parameters to control publishing of intermediate results +- [[#317]](https://github.com/nf-core/smrnaseq/pull/317) - Fixed issue with bowtie indices directly supplied +- [[#318]](https://github.com/nf-core/smrnaseq/pull/318) - Merged in nf-core template v2.13.0 and pinned nf-validator + +### Parameters + +| Old parameter | New parameter | +| ------------- | ---------------------------- | +| | `--with_umi` | +| | `--umitools_extract_method` | +| | `--umitools_method` | +| | `--skip_umi_extract` | +| | `--umitools_bc_pattern` | +| | `--umi_discard_read` | +| | `--save_umi_intermeds` | +| | `--save_aligned` | +| | `--save_aligned_mirna_quant` | +| | `--save_merged` | + +### Software dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `multiqc` | 1.15 | 1.20 | +| `edgeR` | 3.36.0 | 4.0.2 | +| `limma` | 3.50.0 | 3.58.1 | +| `bioconvert` | 0.4.3 | 1.1.1 | +| `mirdeep` | 2.0.1 | 2.0.1.3 | +| `seqkit` | 2.3.1 | 2.6.1 | +| `fastqc` | 0.11.4 | 0.12.1 | +| `samtools` | 1.17 | 1.19.2 | +| `umitools` | | 1.1.5 | +| `umicollapse` | | 1.0.0 | + ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03 - Update template to 2.10 @@ -10,6 +54,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#288]](https://github.com/nf-core/smrnaseq/issues/288) - Bugfix for issue with handling malformed GFF3 from mirbase - Updated dependencies, including FASTQC, MultiQC 1.17, fastP and samtools to latest versions +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------------------ | +| | `--mirgenedb` | +| | `--mirgenedb_species` | +| | `--mirgenedb_gff` | +| | `--mirgenedb_mature` | +| | `--mirgenedb_hairpin` | +| | `--contamination_filter` | +| | `--rrna` | +| | `--trna` | +| | `--cdna` | +| | `--ncrna` | +| | `--pirna` | +| | `--other_contamination` | + ## [v2.2.3](https://github.com/nf-core/smrnaseq/releases/tag/2.2.3) - 2023-09-06 - [[#271]](https://github.com/nf-core/smrnaseq/issues/271) - Bugfix for parsing hairpin and mature fasta files @@ -60,23 +121,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#188](https://github.com/nf-core/smrnaseq/pull/188)] - Dropped TrimGalore in favor of fastp QC and adapter trimming, improved handling of adapters and trimming parameters - [[#194](https://github.com/nf-core/smrnaseq/issues/194)] - Added default adapters file for FastP improved miRNA adapter trimming -### Parameters - -| Old parameter | New parameter | -| ------------- | ------------------------ | -| | `--mirgenedb` | -| | `--mirgenedb_species` | -| | `--mirgenedb_gff` | -| | `--mirgenedb_mature` | -| | `--mirgenedb_hairpin` | -| | `--contamination_filter` | -| | `--rrna` | -| | `--trna` | -| | `--cdna` | -| | `--ncrna` | -| | `--pirna` | -| | `--other_contamination` | - ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua ### Major enhancements @@ -94,20 +138,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script +- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline and extend the deduplication step. - [#55](https://github.com/lpantano/seqcluster/pull/55) - update seqcluster to fix UMI-detecting bug -### Parameters - -| Old parameter | New parameter | -| -------------------- | ---------------- | -| `--conda` | `--enable_conda` | -| `--clusterOptions` | | -| `--publish_dir_mode` | | - -> **NB:** Parameter has been **updated** if both old and new parameter information is present. -> **NB:** Parameter has been **added** if just the new parameter information is present. -> **NB:** Parameter has been **removed** if parameter information isn't present. - ### Software dependencies Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. @@ -137,9 +170,17 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | `bowtie2` | - | 2.4.5 | | `blat` | - | 36 | -> **NB:** Dependency has been **updated** if both old and new version information is present. -> **NB:** Dependency has been **added** if just the new version information is present. -> **NB:** Dependency has been **removed** if version information isn't present. +### Parameters + +| Old parameter | New parameter | +| -------------------- | ---------------- | +| `--conda` | `--enable_conda` | +| `--clusterOptions` | | +| `--publish_dir_mode` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. ## [v1.1.0](https://github.com/nf-core/smrnaseq/releases/tag/1.1.0) - 2021-06-15 diff --git a/README.md b/README.md index 7c010c00..e49ec02b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,14 @@ -# ![nf-core/smrnaseq](docs/images/nf-core-smrnaseq_logo_light.png#gh-light-mode-only) ![nf-core/smrnaseq](docs/images/nf-core-smrnaseq_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/smrnaseq + +

[![GitHub Actions CI Status](https://github.com/nf-core/smrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/smrnaseq/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/smrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/smrnaseq/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/smrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3456879-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3456879) + [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) @@ -27,37 +33,50 @@ You can find numerous talks on the nf-core events page from various topics inclu ## Pipeline summary -1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) - 1. Insert Size calculation - 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) -4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Alignment against miRBase hairpin - 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -6. Post-alignment processing of miRBase hairpin - 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 2. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - - TMM normalization and a table of top expression hairpin - - MDS plot clustering samples - - Heatmap of sample similarities - 3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop)) -7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) -8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) +1. Quality check and triming + 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) + 2. UMI extraction and miRNA adapter trimming ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) (Optional) + 3. 3' adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) + 4. Read quality and length filter ([`fastp`](https://github.com/OpenGene/fastp)) + 5. Trim read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. UMI deduplication (Optional) + 1. Deduplication on fastq-level ([`UMICollapse`](https://github.com/Daniel-Liu-c0deb0t/UMICollapse)) + 2. Barcode and miRNA adapter extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) + 3. Read length filter ([`fastp`](https://github.com/OpenGene/fastp)) +3. miRNA QC ([`miRTrace`](https://github.com/friedlanderlab/mirtrace)) +4. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) (Optional) + 1. rRNA filtration + 2. tRNA filtration + 3. cDNA filtration + 4. ncRNA filtration + 5. piRNA filtration + 6. Others filtration +5. UMI barcode deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +6. miRNA quantification + - EdgeR + 1. Reads alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against Mature miRNA ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Unmapped reads (from reads vs mature miRNA) alignment against miRBase hairpin + 4. Post-alignment processing of alignment against Hairpin ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 5. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) + - TMM normalization and a table of top expression hairpin + - MDS plot clustering samples + - Heatmap of sample similarities + - Mirtop quantification + 1. Read collapsing ([`seqcluster`](https://github.com/lpantano/seqcluster)) + 2. miRNA and isomiR annotation ([`mirtop`](https://github.com/miRTop/mirtop)) +7. Genome Quantification (Optional) + 1. Reads alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) (Optional) 1. Mapping against reference genome with the mapper module 2. Known and novel miRNA discovery with the mirdeep2 module -9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +9. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -89,11 +108,9 @@ nextflow run nf-core/smrnaseq \ --outdir ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters). diff --git a/assets/email_template.html b/assets/email_template.html index e75e86ac..d169743a 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/smrnaseq v${version}

+

nf-core/smrnaseq ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 654e78db..40a0e326 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/smrnaseq v${version} + nf-core/smrnaseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/known_adapters.fa b/assets/known_adapters.fa index 6b54be80..00999c05 100644 --- a/assets/known_adapters.fa +++ b/assets/known_adapters.fa @@ -4,3 +4,5 @@ AACTGTAGGCACCATCAAT TGGAATTCTCGGGTGCCAAGG > Nextflex miRNA adapter TGGAATTCTCGGGTGCCAAGG +> CATS Small RNA-seq Kit adapter +GATCGGAAGAGCACACGTCTG diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 435afefa..cb6b62c8 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: "nf-core-smrnaseq-methods-description": order: -1000 @@ -11,3 +12,22 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results on the input files." + path_filters: + - "**/*.raw_fastqc.zip" + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "**/*.trim_fastqc.zip" + - fastqc: + name: "FastQC (deduplicated)" + info: "This section of the report shows FastQC results after UMI-based deduplication." + path_filters: + - "**/*.deduplicated_fastqc.zip" diff --git a/assets/nf-core-smrnaseq_logo_light.png b/assets/nf-core-smrnaseq_logo_light.png index 262fa1b0..9f8e05ef 100644 Binary files a/assets/nf-core-smrnaseq_logo_light.png and b/assets/nf-core-smrnaseq_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 5041ee60..892b1996 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,25 +10,22 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, "required": ["sample", "fastq_1"] diff --git a/assets/slackreport.json b/assets/slackreport.json index 214c7fa9..1884b21a 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/smrnaseq v${version} - ${runName}", + "author_name": "nf-core/smrnaseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 94077cd4..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python - -# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - -import os -import sys -import errno -import argparse - - -def parse_args(args=None): - Description = "Reformat nf-core/smrnaseq samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) - - -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception - - -def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) - if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) - print(error_str) - sys.exit(1) - - -def check_samplesheet(file_in, file_out): - """ - This function checks that the samplesheet follows the following structure: - sample,fastq_1 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz - For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """ - - sample_mapping_dict = {} - with open(file_in, "r") as fin: - ## Check header - MIN_COLS = 2 - HEADER = ["sample", "fastq_1"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if any([item not in header for item in HEADER]): - missing = [item for item in HEADER if item not in header] - print_error("ERROR: Please check samplesheet header. Missing columns: '{}'".format(",".join(missing))) - sys.exit(1) - - ## Check sample entries - for line_number, line in enumerate(fin): - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - row = {k: v for k, v in zip(header, lspl)} - - # Check valid number of columns per row - if len(lspl) != len(header): - print_error( - "Invalid number of columns found - header has {} columns, content has {}. Number of rows {})".format( - len(header), len(lspl), line_number - ), - f"Line #{line_number+2}", - line, - ) - - ## Check sample name entries - sample = row.get("sample", "").replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", f"Line #{line_number+2}", line) - - ## Check FastQ file extension - fastq = row.get("fastq_1", None) - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", f"Line #{line_number+2}", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Create sample mapping dictionary - sample_info = {"single_end": "1", "fastq_1": fastq} - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - output_cols = ["id", "intrasample_id", "single_end", "fastq_1"] - if len(sample_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - fout.write(",".join(output_cols) + "\n") - for sample in sorted(sample_mapping_dict.keys()): - for intrasample_id, val in enumerate(sample_mapping_dict[sample]): - sample_info = {**{"id": sample, "intrasample_id": str(intrasample_id)}, **val} - outrow = [sample_info.get(colname, None) for colname in output_cols] - fout.write(",".join(outrow) + "\n") - else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/modules.config b/conf/modules.config index b06d0055..f7a50622 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,252 +10,356 @@ ---------------------------------------------------------------------------------------- */ -// -// General configuration options -// + process { + + // + // General configuration options + // publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { + // + // Input preparation + // + withName: 'NFCORE_SMRNASEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + withName: 'CAT_FASTQ' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/cat_fastq" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: '*.fastq', + enabled: params.save_merged ] } -} -// -// Genome preparation options -// - -process { - withName: 'CAT_FASTQ' { + // + // FASTQ_FASTQC_UMITOOLS_FASTP + // + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { + ext.args = [ "", + params.trim_fastq ? "" : "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", + params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", + params.three_prime_adapter == null ? '' : "--adapter_sequence ${params.three_prime_adapter}" + ].join(" ").trim() publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: '*.fastq', - // enabled: params.save_merged_fastq //TODO ? implement save_merged_fastq param + [ + path: { "${params.outdir}/fastp/on_raw" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/fastp/on_raw/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/fastp/on_raw" }, + mode: params.publish_dir_mode, + pattern: "*.fail.fastq.gz", + enabled: params.save_trimmed_fail + ] ] } - withName: 'INDEX_GENOME' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { + //the prefix is required for multiqc to pickup the files separately from the other fastqc instances + ext.prefix = { "${meta.id}.raw" } + ext.args = '--quiet' publishDir = [ - path: { "${params.outdir}/index" }, + path: { "${params.outdir}/fastqc/raw" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:FORMAT_.*' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { + ext.prefix = { "${meta.id}.trim" } + ext.args = '--quiet' publishDir = [ - path: { "${params.outdir}/genome" }, + path: { "${params.outdir}/fastqc/trimmed" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'EDGER_QC' { + + // + // FASTP LENGTH FILTER + // + withName: 'FASTP_LENGTH_FILTER' { + ext.args = [ "", + "--disable_adapter_trimming", + "--disable_quality_filtering", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "" + ].join(" ").trim() publishDir = [ - path: { "${params.outdir}/edger" }, - mode: params.publish_dir_mode, - enabled: true + [ + path: { "${params.outdir}/fastp/on_umi_extracted" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/fastp/on_umi_extracted/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/fastp/on_umi_extracted" }, + mode: params.publish_dir_mode, + pattern: "*.fail.fastq.gz", + enabled: params.save_trimmed_fail + ] ] } -} -// -// Read QC and trimming options -// -process { - withName: 'MIRTRACE_RUN' { + // + // Make bowtie index + // + withName: 'INDEX_GENOME' { publishDir = [ - path: { "${params.outdir}/mirtrace/${meta.id}" }, + path: { "${params.outdir}/bowtie_index/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } -} -if (!(params.skip_fastqc)) { - process { - withName: '.*:FASTQC_FASTP:FASTQC_.*' { - ext.args = '--quiet' - } + // + // UMI deduplication + // + + withName: '.*:UMICOLLAPSE_FASTQ' { + ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_deduplicated" }, + mode: params.publish_dir_mode, + pattern: '*.{bam,fastq.gz}', + enabled: ( + params.save_umi_intermeds + ) + ] } -} -if (!params.skip_fastp) { - process { - withName: 'FASTP' { - ext.args = [ "", - params.trim_fastq ? "" : "--disable_adapter_trimming", - params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. - params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. - params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", - params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", - params.three_prime_adapter == null ? '' : "--adapter_sequence ${params.three_prime_adapter}" - ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.{json,html}" - ], - [ - path: { "${params.outdir}/fastp/log" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.fail.fastq.gz", - enabled: params.save_trimmed_fail - ] + withName: 'UMITOOLS_EXTRACT' { + ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/umi_dedup/fastq_extracted_umi" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umi_dedup/fastq_extracted_umi" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds ] - } - + ] } - if (!params.skip_fastqc) { - process { - withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/fastqc/trim" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + // + // MIRTRACE QC + // + withName: 'MIRTRACE_RUN' { + publishDir = [ + //"mirtrace" already part of the published folder + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } -} -// -// Quantification -// + // + // CONTAMINANT_FILTER + // + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:.*' { + publishDir = [ + path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } -process { - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:PARSE_.*' { - publishDir = [ - path: { "${params.outdir}/genome" }, + // + // MIRNA_QUANT + // + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:FORMAT_.*' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/reference" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_.*:SAMTOOLS_SORT' { + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:PARSE_.*' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/reference" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:INDEX_MATURE' { + publishDir = [ + path: { "${params.outdir}/bowtie_index/mirna_mature" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:INDEX_HAIRPIN' { + publishDir = [ + path: { "${params.outdir}/bowtie_index/mirna_hairpin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_MATURE' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/bam/mature" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_aligned_mirna_quant + ] + } + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:BAM_STATS_MATURE:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools" }, + path: { "${params.outdir}/mirna_quant/bam/mature" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_.*:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_HAIRPIN' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/bam/hairpin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_aligned_mirna_quant + ] + } + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:BAM_STATS_HAIRPIN:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools/samtools_stats" }, + path: { "${params.outdir}/mirna_quant/bam/hairpin" }, mode: params.publish_dir_mode, - pattern: "*.{stats,flagstat,idxstats}" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_.*' { + withName: 'EDGER_QC' { publishDir = [ - path: { "${params.outdir}/unmapped/fastq" }, + path: { "${params.outdir}/mirna_quant/edger_qc" }, mode: params.publish_dir_mode, - pattern: "unmapped/*.gz" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: 'SEQCLUSTER_SEQUENCES' { publishDir = [ - path: { "${params.outdir}/seqcluster" }, - enabled: false + path: { "${params.outdir}/mirna_quant/seqcluster" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MIRTOP_QUANT' { + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_SEQCLUSTER' { publishDir = [ - path: { "${params.outdir}" }, + path: { "${params.outdir}/mirna_quant/bam/seqcluster" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_aligned_mirna_quant ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:TABLE_MERGE' { + withName: 'MIRTOP_QUANT' { publishDir = [ - path: { "${params.outdir}/mirtop" }, + //mirtop already part of the output folder + path: { "${params.outdir}/mirna_quant/" }, mode: params.publish_dir_mode, - pattern: "*.tsv" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'EDGER_QC' { + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:TABLE_MERGE' { publishDir = [ - path: { "${params.outdir}/edger" }, + path: { "${params.outdir}/mirna_quant/mirtop" }, mode: params.publish_dir_mode, - enabled: true + pattern: "*.tsv" ] } -} -process { - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + // + // GENOME_QUANT + // + withName: 'NFCORE_SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools/samtools_stats" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, pattern: "*.{stats,flagstat,idxstats}" ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_.*' { + withName: 'NFCORE_SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_GENOME' { publishDir = [ - path: { "${params.outdir}/unmapped/fastq" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, - pattern: "unmapped/*.gz" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_aligned ] } -} -if (!params.skip_mirdeep) { - process { - withName: 'MIRDEEP2_MAPPER' { - publishDir = [ - path: { "${params.outdir}/mirdeep" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } -} -if (!params.skip_multiqc) { - process { - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // + // MIRDEEP + // + withName: 'MIRDEEP2_MAPPER' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/mapper" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MIRDEEP2_RUN' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/run" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + // + // reports + // + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/test.config b/conf/test.config index 1a81afee..a56b2e96 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,11 +23,12 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - mature = 'https://mirbase.org/download/CURRENT/mature.fa' - hairpin = 'https://mirbase.org/download/CURRENT/hairpin.fa' - mirna_gtf = 'https://mirbase.org/download/hsa.gff3' - mirtrace_species = 'hsa' - protocol = 'illumina' - skip_mirdeep = true + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + save_merged = false + save_aligned_mirna_quant = false + + cleanup = true //Otherwise tests dont run through properly. } diff --git a/conf/test_index.config b/conf/test_index.config new file mode 100644 index 00000000..bb9f4707 --- /dev/null +++ b/conf/test_index.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test_index, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test index profile' + config_profile_description = 'Minimal test dataset to check pipeline function with bowtie index' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + bowtie_index = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/bowtie_index.tar.gz' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + save_merged = false + save_aligned_mirna_quant = false + + cleanup = true //Otherwise tests dont run through properly. +} diff --git a/conf/test_no_genome.config b/conf/test_no_genome.config index 485870de..aae8ce91 100644 --- a/conf/test_no_genome.config +++ b/conf/test_no_genome.config @@ -21,9 +21,9 @@ params { // Input data input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' - mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/mature.fa' - hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hairpin.fa' - mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hsa.gff3' + mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' + hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' + mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' mirtrace_species = 'hsa' skip_mirdeep = true protocol = 'illumina' diff --git a/conf/test_umi.config b/conf/test_umi.config new file mode 100644 index 00000000..c7d0db15 --- /dev/null +++ b/conf/test_umi.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_umi.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + + //UMI Specific testcase + with_umi = true + umitools_extract_method = 'regex' + umitools_bc_pattern = '.+(?PAACTGTAGGCACCATCAAT){s<=2}(?P.{12})(?P.*)' + save_umi_intermeds = true +} diff --git a/docs/images/nf-core-smrnaseq_logo_dark.png b/docs/images/nf-core-smrnaseq_logo_dark.png index bcab7dfe..20b1aaff 100644 Binary files a/docs/images/nf-core-smrnaseq_logo_dark.png and b/docs/images/nf-core-smrnaseq_logo_dark.png differ diff --git a/docs/images/nf-core-smrnaseq_logo_light.png b/docs/images/nf-core-smrnaseq_logo_light.png index 262fa1b0..62d5d1d7 100644 Binary files a/docs/images/nf-core-smrnaseq_logo_light.png and b/docs/images/nf-core-smrnaseq_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 706930a1..d2358724 100644 --- a/docs/output.md +++ b/docs/output.md @@ -13,6 +13,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [FastQC](#fastqc) - read quality control +- [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction +- [UMI-collapse deduplicate](#umicollapse-deduplicate) - read deduplication - [FastP](#fastp) - adapter trimming - [Bowtie2](#bowtie2) - contamination filtering - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins) @@ -40,6 +42,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +## UMI-tools extract + +
+Output files + +- `umitools/` + - `*.fastq.gz`: If `--save_umi_intermeds` is specified, FastQ files **after** UMI extraction will be placed in this directory. + - `*.log`: Log file generated by the UMI-tools `extract` command. + +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) extracts UMIs from reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-collapse deduplicate](#umicollapse-deduplicate) section. + +To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. + ## FastP [FastP](https://github.com/OpenGene/fastp) is used for removal of adapter contamination and trimming of low quality regions. @@ -55,6 +72,18 @@ Contains FastQ files with quality and adapter trimmed reads for each sample, alo FastP can automatically detect adapter sequences when not specified directly by the user - the pipeline also comes with a feature and a supplied miRNA adapters file to ensure adapters auto-detected are more accurate. If there are needs to add more known miRNA adapters to this list, please open a pull request. +## UMI-collapse deduplicate + +
+Output files + +- `umi_dedup/` + - `*.log`: Results statistics files detailing the UMI deduplication results. + - `*.fastq.gz`: If `--save_umi_intermeds` is specified, the deduplicated fastq.gz files **after** UMI deduplication will be placed in this directory. +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. Umicollapse works directly on the fastq files instead of mapping the UMI data first, then deduplicating and generating fastq files again. + ## Bowtie2 [Bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) is used to align the reads to user-defined databases of contaminants. diff --git a/docs/usage.md b/docs/usage.md index 2884df74..b5eb01f0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,11 +11,13 @@ This option indicates the experimental protocol used for the sample preparation. Currently supporting: - 'illumina': adapter (`TGGAATTCTCGGGTGCCAAGG`) -- 'nextflex': adapter (`TGGAATTCTCGGGTGCCAAGG), clip_r1 (`4`), three_prime_clip_r1 (`4`) +- 'nextflex': adapter (`TGGAATTCTCGGGTGCCAAGG`), clip_r1 (`4`), three_prime_clip_r1 (`4`) - 'qiaseq': adapter (`AACTGTAGGCACCATCAAT`) -- 'cats': adapter (`GATCGGAAGAGCACACGTCTG), clip_r1(`3) +- 'cats': adapter (`GATCGGAAGAGCACACGTCTG`), clip_r1(`3) - 'custom' (where the user can indicate the `three_prime_adapter`, `clip_r1` and `three_prime_clip_r1` manually) +The parameter `--three_prime_adapter` is set to the Illumina TruSeq single index adapter sequence `AGATCGGAAGAGCACACGTCTGAACTCCAGTCA`. This is also to ensure, that the auto-detect functionality of `FASTP` is disabled. Please make sure to adapt this adapter sequence accordingly for your run. + :warning: At least the `custom` protocol has to be specified, otherwise the pipeline won't run. In case you specify the `custom` protocol, ensure that the parameters above are set accordingly or the defaults will be applied. If you want to auto-detect the adapters using `fastp`, please set `--three_prime_adapter` to `""`. ### `mirtrace_species` or `mirgenedb_species` @@ -27,10 +29,10 @@ It should point to the 3-letter species name used by [miRBase](https://www.mirba Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below. - `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3` -- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa` -- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa` +- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/mature.fa` +- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/hairpin.fa` -If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below . +If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below. - `mirgenedb_gff`: The data can not be downloaded automatically (URLs are created with short term tokens in it), thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `--mirgenedb_species`. - `mirgenedb_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. @@ -54,6 +56,18 @@ Contamination filtering of the sequencing reads is optional and can be invoked u - `pirna`: Used to supply a FASTA file containing piRNA contamination sequence. e.g. The FASTA file is first compared to the available miRNA sequences and overlaps are removed. - `other_contamination`: Used to supply an additional filtering set. The FASTA file is first compared to the available miRNA sequences and overlaps are removed. +### UMI handling + +The pipeline handles UMIs with two tools. Umicollapse to deduplicate on entire read sequence after 3'adapter removal. Followed by Umitools-extract to extract the miRNA adapter and UMI. This can be achieved by using the parameters for UMI handling as follows (in this case for QIAseq miRNA Library Kit): + +```bash +--with_umi --umitools_extract_method regex --umitools_bc_pattern = '.+(?PAACTGTAGGCACCATCAAT){s<=2}(?P.{12})(?P.*)' +``` + +:::note +You will have to specify custom umitools_bc_pattern patterns if your UMI read structure is different. Please check the required capability in your UMI handling manual. It should be set in a way, that only the insert sequence of the RNA molecule is left after extraction. Please refer to the manual of the used kit for the expected read structure. +::: + ## Samplesheet input You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns ("sample" and "fastq_1"), and a header row as shown in the examples below. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653d..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index ed5a126c..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,72 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/smrnaseq pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://zenodo.org/badge/latestdoi/140590861\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - //Detect Protocol setting, set this early before help so help shows proper adapters etc pp - WorkflowSmrnaseq.formatProtocol(params,log) - - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/WorkflowSmrnaseq.groovy b/lib/WorkflowSmrnaseq.groovy deleted file mode 100755 index 2575fd9b..00000000 --- a/lib/WorkflowSmrnaseq.groovy +++ /dev/null @@ -1,160 +0,0 @@ -// -// This file holds several functions specific to the workflow/smrnaseq.nf in the nf-core/smrnaseq pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowSmrnaseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO nf-core: Optionally add in-text citation tools to this list. - - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - - /* - * Format the protocol - * Given the protocol parameter (params.protocol), - * this function formats the protocol such that it is fit for the respective - * subworkflow - */ - public static String formatProtocol(params,log) { - - switch(params.protocol){ - case 'illumina': - params.putIfAbsent("clip_r1", 0); - params.putIfAbsent("three_prime_clip_r1",0); - params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); - break - case 'nextflex': - params.putIfAbsent("clip_r1", 4); - params.putIfAbsent("three_prime_clip_r1", 4); - params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); - break - case 'qiaseq': - params.putIfAbsent("clip_r1",0); - params.putIfAbsent("three_prime_clip_r1",0); - params.putIfAbsent("three_prime_adapter","AACTGTAGGCACCATCAAT"); - break - case 'cats': - params.putIfAbsent("clip_r1",3); - params.putIfAbsent("three_prime_clip_r1", 0); - params.putIfAbsent("three_prime_adapter", "AAAAAAAA"); - break - case 'custom': - params.putIfAbsent("clip_r1", params.clip_r1) - params.putIfAbsent("three_prime_clip_r1", params.three_prime_clip_r1) - default: - log.warn "Please make sure to specify all required clipping and trimming parameters, otherwise only adapter detection will be performed." - } - - log.warn "Running with Protocol ${params.protocol}" - log.warn "Therefore using Adapter: ${params.three_prime_adapter}" - log.warn "Clipping ${params.clip_r1} bases from R1" - log.warn "And clipping ${params.three_prime_clip_r1} bases from 3' end" - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 1e4232eb..cd13268a 100644 --- a/main.nf +++ b/main.nf @@ -13,63 +13,70 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.mirtrace_species = WorkflowMain.getGenomeAttribute(params, 'mirtrace_species') -params.bowtie_index = WorkflowMain.getGenomeAttribute(params, 'bowtie') - +include { NFCORE_SMRNASEQ } from './workflows/smrnaseq' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +params.fasta = getGenomeAttribute('fasta') +params.mirtrace_species = getGenomeAttribute('mirtrace_species') +params.bowtie_index = getGenomeAttribute('bowtie') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +workflow { -include { SMRNASEQ } from './workflows/smrnaseq' + main: + ch_versions = Channel.empty() -workflow NFCORE_SMRNASEQ { - SMRNASEQ () -} + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_SMRNASEQ ( + Channel.of(file(params.input, checkIfExists: true)), + PIPELINE_INITIALISATION.out.samplesheet, + ch_versions + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_SMRNASEQ.out.multiqc_report + ) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// -workflow { - NFCORE_SMRNASEQ () } /* diff --git a/modules.json b/modules.json index 1250670b..ad13815e 100644 --- a/modules.json +++ b/modules.json @@ -5,55 +5,70 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "cat/fastq": { + "cat/cat": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { + "cat/fastq": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "0997b47c93c06b49aa7b3fefda87e728312cf2ca", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "fastqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "installed_by": ["fastq_fastqc_umitools_fastp"] }, "multiqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "umicollapse": { + "branch": "master", + "git_sha": "ba3f3df395d2719dcef5c67189042a1dc555c701", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "ff7e93715a2acecf3f143ec78c9858deba2984d0", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + }, + "untarfiles": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } }, @@ -61,13 +76,33 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "7c8eeb2b37a6c6d3ffba0aef55ff60c8718c0ba6", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows", "bam_sort_stats_samtools"] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_sort_stats_samtools"] + }, + "fastq_fastqc_umitools_fastp": { + "branch": "master", + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", + "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "262b17ed2aad591039f914951659177e6c39a8d8", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/blat_mirna.nf b/modules/local/blat_mirna.nf index 7f8a2324..aa0d3d51 100644 --- a/modules/local/blat_mirna.nf +++ b/modules/local/blat_mirna.nf @@ -53,7 +53,7 @@ process BLAT_MIRNA { blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa -cat <<-END_VERSIONS > versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }') END_VERSIONS diff --git a/modules/local/bowtie_contaminants.nf b/modules/local/bowtie_contaminants.nf index e6a594a7..cf02de31 100644 --- a/modules/local/bowtie_contaminants.nf +++ b/modules/local/bowtie_contaminants.nf @@ -4,7 +4,7 @@ process INDEX_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2'}" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2'}" input: path fasta diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 91a6cd53..17ea9253 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -2,10 +2,10 @@ process INDEX_GENOME { tag "$fasta" label 'process_medium' - conda 'bioconda::bowtie=1.3.1-4' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index d10f13b5..c9863ab3 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -4,7 +4,7 @@ process BOWTIE_MAP_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) @@ -21,17 +21,20 @@ process BOWTIE_MAP_CONTAMINANTS { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: "" + """ - INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` bowtie2 \\ + -x \$INDEX \\ + -U ${reads} \\ --threads ${task.cpus} \\ + --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ --very-sensitive-local \\ -k 1 \\ - -x \$INDEX \\ - --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ - ${reads} \\ + -S ${meta.id}.filter.contaminant.sam \\ ${args} \\ - -S ${meta.id}.filter.contaminant.sam > ${meta.id}.contaminant_bowtie.log 2>&1 + > ${meta.id}.contaminant_bowtie.log 2>&1 # extracting number of reads from bowtie logs awk -v type=${contaminant_type} 'BEGIN{tot=0} {if(NR==4 || NR == 5){tot += \$1}} END {print "\\""type"\\": "tot }' ${meta.id}.contaminant_bowtie.log | tr -d , > filtered.${meta.id}_${contaminant_type}.stats diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 2be45bb8..733d816e 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -1,10 +1,10 @@ process INDEX_MIRNA { label 'process_medium' - conda 'bioconda::bowtie=1.3.0-2' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf index 729d5eed..8c311457 100644 --- a/modules/local/edger_qc.nf +++ b/modules/local/edger_qc.nf @@ -1,10 +1,10 @@ process EDGER_QC { label 'process_medium' - conda 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36' + conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' : - 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' : + 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' }" input: path input_files diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index 18e7016b..4c46f51d 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -4,7 +4,7 @@ process FILTER_STATS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) @@ -13,6 +13,7 @@ process FILTER_STATS { output: path "*_mqc.yaml" , emit: stats tuple val(meta), path('*.filtered.fastq.gz'), emit: reads + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,5 +25,12 @@ process FILTER_STATS { tr '\n' ', ' | \\ awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n id: \\"contamination_filter_plot\\"\\n title: \\"Contamination Plot\\"\\n ylab: \\"Number of reads\\"\\ndata:\\n "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml gzip -c ${reads} > ${meta.id}.filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(cat --version | grep 'cat ' |sed 's/cat (GNU coreutils) //') + gzip: \$(gzip --version | grep "gzip" | sed 's/gzip //') + tr: \$(tr --version | grep 'tr ' |sed 's/tr (GNU coreutils) //') + END_VERSIONS """ } diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf index 489879a5..67461d64 100644 --- a/modules/local/format_fasta_mirna.nf +++ b/modules/local/format_fasta_mirna.nf @@ -4,10 +4,10 @@ process FORMAT_FASTA_MIRNA { tag "$fasta" label 'process_medium' - conda 'bioconda::fastx_toolkit=0.0.14-9' + conda 'bioconda::fastx_toolkit=0.0.14' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--he1b5a44_8' : - 'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" + 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--hdbdd923_11' : + 'biocontainers/fastx_toolkit:0.0.14--hdbdd923_11' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf index 842af6e6..19a9c5dc 100644 --- a/modules/local/mirdeep2_mapper.nf +++ b/modules/local/mirdeep2_mapper.nf @@ -4,7 +4,7 @@ process MIRDEEP2_MAPPER { label 'process_medium' tag "$meta.id" - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf index 7e2f2437..ce66b9f1 100644 --- a/modules/local/mirdeep2_prepare.nf +++ b/modules/local/mirdeep2_prepare.nf @@ -3,10 +3,10 @@ process MIRDEEP2_PIGZ { tag "$meta.id" // TODO maybe create a mulled container and uncompress within mirdeep2_mapper? - conda 'bioconda::bioconvert=0.4.3' + conda 'bioconda::bioconvert=1.1.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconvert:0.4.3--py_0' : - 'biocontainers/bioconvert:0.4.3--py_0' }" + 'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' : + 'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) @@ -23,7 +23,7 @@ process MIRDEEP2_PIGZ { pigz -f -d -p $task.cpus $reads cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 26635a4f..442f26f3 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -4,7 +4,7 @@ process MIRDEEP2_RUN { label 'process_medium' errorStrategy 'ignore' - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" @@ -35,9 +35,8 @@ process MIRDEEP2_RUN { -z _${reads.simpleName} cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirdeep2: \$(echo "$VERSION") END_VERSIONS """ } - diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index e97d6a09..ab38c93d 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -34,7 +34,7 @@ process MIRTOP_QUANT { mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //') END_VERSIONS """ diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf index f576ebc0..500de058 100644 --- a/modules/local/mirtrace.nf +++ b/modules/local/mirtrace.nf @@ -43,8 +43,8 @@ process MIRTRACE_RUN { --force cat <<-END_VERSIONS > versions.yml - ${task.process}": - mirtrace: \$(echo \$(mirtrace -v 2>&1)) + "${task.process}": + mirtrace: \$(echo \$(mirtrace -v)) END_VERSIONS """ diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index a0bbc75e..60665251 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -1,10 +1,10 @@ process PARSE_FASTA_MIRNA { label 'process_medium' - conda 'bioconda::seqkit=2.3.1' + conda 'bioconda::seqkit=2.6.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0' : - 'biocontainers/seqkit:2.3.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0' : + 'biocontainers/seqkit:2.6.1--h9ee0642_0' }" input: tuple val(meta2), path(fasta) @@ -34,7 +34,7 @@ process PARSE_FASTA_MIRNA { seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 351d2322..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/smrnaseq/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index 39f6ce85..4379654c 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -2,10 +2,10 @@ process SEQCLUSTER_SEQUENCES { label 'process_medium' tag "$meta.id" - conda 'bioconda::seqcluster=1.2.9-0' + conda 'bioconda::seqcluster=1.2.9' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' : - 'biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }" + 'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }" input: tuple val(meta), path(reads) @@ -25,7 +25,7 @@ process SEQCLUSTER_SEQUENCES { mv collapsed/*.fastq.gz final/. cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqcluster: \$(echo \$(seqcluster --version 2>&1) | sed 's/^.*seqcluster //') END_VERSIONS """ diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/cat/cat/environment.yml similarity index 61% rename from modules/nf-core/custom/dumpsoftwareversions/environment.yml rename to modules/nf-core/cat/cat/environment.yml index 7ca22161..17a04ef2 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -1,6 +1,7 @@ +name: cat_cat channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.15 + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..adbdbd7b --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,79 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..00a8db0b --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..aaae04f9 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,177 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..0c9bfe8d --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2024-01-12T14:02:02.999254641" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_unzipped_zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2024-01-12T14:08:26.948048418" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2024-01-12T14:10:22.445700266" + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index 222b301f..8c69b121 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -1,6 +1,7 @@ +name: cat_fastq channels: - conda-forge - bioconda - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index b75a2e73..f132b2ad 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/cat/fastq/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" @@ -76,5 +76,4 @@ process CAT_FASTQ { """ } } - } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index f5f94182..dab2e14c 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -28,8 +28,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -42,13 +41,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -56,8 +55,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -70,11 +68,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -82,8 +80,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -96,13 +93,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -110,8 +107,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -124,10 +120,10 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -135,8 +131,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index ec2342e5..43dfe28f 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -1,78 +1,169 @@ { "test_cat_fastq_single_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:12.990284837" + "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:31.554568147" + "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:49.629360033" + "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ [ - "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", - "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:40.711617539" + "timestamp": "2024-01-17T17:33:33.031555" }, "test_cat_fastq_paired_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ [ - "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", - "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-18T07:53:20.923560211" + "timestamp": "2024-01-17T17:32:02.270935" } } \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 60a19e0e..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 9414c32d..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index eec1db10..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,38 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 8713b921..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,27 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ], - "1": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "2": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "versions": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "yml": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ] - } - ], - "timestamp": "2023-10-11T17:10:02.930699" - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24a..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml index 19ccec25..70389e66 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/fastp/environment.yml @@ -1,3 +1,4 @@ +name: fastp channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index ca5f100f..2a3b679e 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastp/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" @@ -45,7 +45,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -66,7 +66,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -91,7 +91,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -99,4 +99,22 @@ process FASTP { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index f610b735..9b3f9a38 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -19,11 +19,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -57,7 +56,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } ) } } @@ -74,12 +133,11 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -127,7 +185,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end-stub") } ) } } @@ -144,10 +262,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] - ] - + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -181,7 +299,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved-stub") } ) } } @@ -198,9 +376,10 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -241,7 +420,7 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } ) } } @@ -258,13 +437,11 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -321,7 +498,7 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } ) } } @@ -337,11 +514,11 @@ nextflow_process { adapter_fasta = [] save_trimmed_fail = false save_merged = true - - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -399,7 +576,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } ) } } @@ -412,14 +649,15 @@ nextflow_process { } process { """ - adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) save_trimmed_fail = false save_merged = true - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -478,7 +716,7 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } ) } } diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 0fa68c7d..b4c0e1dd 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -11,7 +11,43 @@ ] ] ], - "timestamp": "2023-10-17T11:04:45.794175881" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:10:13.467574" + }, + "versions_interleaved": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:24.615634793" }, "test_fastp_single_end_json": { "content": [ @@ -25,15 +61,253 @@ ] ] ], - "timestamp": "2023-10-17T11:04:10.566343705" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.223817" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:42.333545689" + }, + "test_fastp_paired_end_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:06.431833729" + }, + "test_fastp_interleaved-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:37.827323085" + }, + "test_fastp_paired_end_merged_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:08:44.496251446" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:27.354051299" + }, + "versions_interleaved-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:46.535528418" + }, + "versions_single_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:03.724591407" + }, + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:07:15.398827" + }, + "versions_paired_end-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:06.50017282" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:07.67921647" + }, + "versions_paired_end_merged_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:47.350653154" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.127974" + }, + "versions_paired_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:18.140484878" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.244202" + }, + "test_fastp_single_end-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:57:30.791982648" + }, + "versions_paired_end_merged_adapterlist": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:05:37.845370554" }, - "versions": { + "versions_paired_end_merged": { "content": [ [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], - "timestamp": "2023-10-17T11:04:10.582076024" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:32.860543858" }, "test_fastp_single_end_trim_fail_json": { "content": [ @@ -47,6 +321,10 @@ ] ] ], - "timestamp": "2023-10-17T11:05:00.379878948" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:41.942317" } } \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index f52a53a0..1787b38a 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,4 @@ +name: fastqc channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 5def8818..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144..70edae4d 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,24 +3,20 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -28,14 +24,189 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
    Mon 2 Oct 2023
    test.gz
    // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32ce..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,88 @@ { - "versions": { + "fastqc_versions_interleaved": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 9d0e6b20..2212096a 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,7 @@ +name: multiqc channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 485b3ba8..354f4430 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda 'modules/nf-core/multiqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index a61223ed..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..c204b488 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:28:51.744211298" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:28.847433492" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:13.223621555" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index 04c82f14..bd57cb54 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -1,6 +1,8 @@ +name: samtools_flagstat channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index b289d151..eb5f5252 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/samtools/flagstat/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..24c3c04b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..a76fc27e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:44.299617452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml index 04c82f14..174973b8 100644 --- a/modules/nf-core/samtools/idxstats/environment.yml +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -1,6 +1,8 @@ +name: samtools_idxstats channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index 97217419..a544026f 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/samtools/idxstats/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 00000000..a2dcb27c --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match("idxstats") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 00000000..a7050bdc --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,613dde56f108418039ffcdeeddba397a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:50.147462763" + }, + "idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:36:41.561026" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 00000000..d3057c61 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 04c82f14..a5e50649 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,6 +1,8 @@ +name: samtools_index channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index af3dbc4c..dc14f98d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda 'modules/nf-core/samtools/index/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..bb7756d1 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("bai_versions") } + ) + } + } + + test("crai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert snapshot(process.out.versions).match("crai_versions") } + ) + } + } + + test("csi") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("csi_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..3dc8e7de --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index 04c82f14..4d898e48 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -1,6 +1,8 @@ +name: samtools_sort channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 77256702..cdd8305d 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/samtools/sort/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index 1f72f3b9..31e24b88 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/sort" - test("test_samtools_sort") { + test("bam") { config "./nextflow.config" @@ -18,12 +18,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) """ } } @@ -34,10 +32,9 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } - test("test_samtools_sort_stub") { + test("bam_stub") { config "./nextflow.config" options "-stub-run" @@ -48,12 +45,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) """ } } @@ -61,10 +56,9 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } ) } - } - } diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index a43566da..a7cf0210 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -1,5 +1,27 @@ { - "test_samtools_sort": { + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:21:04.364044" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:00.20800281" + }, + "bam": { "content": [ { "0": [ @@ -8,14 +30,14 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + "test.sorted.bam:md5,c6ea1346ec4aae007eb40b708935088c" ] ], "1": [ ], "2": [ - "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" ], "bam": [ [ @@ -23,17 +45,21 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + "test.sorted.bam:md5,c6ea1346ec4aae007eb40b708935088c" ] ], "csi": [ ], "versions": [ - "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" ] } ], - "timestamp": "2023-10-17T17:21:46.5427968" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:14:52.736359271" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml index 04c82f14..67bb0ca4 100644 --- a/modules/nf-core/samtools/stats/environment.yml +++ b/modules/nf-core/samtools/stats/environment.yml @@ -1,6 +1,8 @@ +name: samtools_stats channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index fe30bf89..52b00f4b 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/samtools/stats/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test index e037132c..e3d5cb14 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -4,75 +4,62 @@ nextflow_process { script "../main.nf" process "SAMTOOLS_STATS" tag "modules" - tag "modules/nf-core" + tag "modules_nfcore" tag "samtools" tag "samtools/stats" - test("SAMTOOLS STATS Should run without failures") { + test("bam") { when { params { - outdir = "$outputDir" } process { """ - // define inputs of the process here. - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) - - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) input[1] = [[],[]] """ - } } then { assertAll( - {assert process.success}, - {assert snapshot(process.out).match()} + {assert process.success}, + {assert snapshot(process.out).match()} ) } - } - test("SAMTOOLS CRAM Should run without failures") { + test("cram") { when { params { - outdir = "$outputDir" } process { """ - // define inputs of the process here - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) - - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) """ } - - } then { assertAll( - {assert process.success}, - {assert snapshot(process.out).match()} + {assert process.success}, + {assert snapshot(process.out).match()} ) } - } - - } diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap index 516b2b01..1b7c9ba4 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "SAMTOOLS STATS Should run without failures": { + "cram": { "content": [ { "0": [ @@ -8,11 +8,11 @@ "id": "test", "single_end": false }, - "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + "test.stats:md5,01812900aa4027532906c5d431114233" ] ], "1": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" ], "stats": [ [ @@ -20,17 +20,21 @@ "id": "test", "single_end": false }, - "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + "test.stats:md5,01812900aa4027532906c5d431114233" ] ], "versions": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" ] } ], - "timestamp": "2023-10-18T12:12:42.998746" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:25.562429714" }, - "SAMTOOLS CRAM Should run without failures": { + "bam": { "content": [ { "0": [ @@ -39,11 +43,11 @@ "id": "test", "single_end": false }, - "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + "test.stats:md5,5d8681bf541199898c042bf400391d59" ] ], "1": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" ], "stats": [ [ @@ -51,14 +55,18 @@ "id": "test", "single_end": false }, - "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + "test.stats:md5,5d8681bf541199898c042bf400391d59" ] ], "versions": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" ] } ], - "timestamp": "2023-10-18T12:13:30.747222" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:07.857611509" } } \ No newline at end of file diff --git a/modules/nf-core/umicollapse/environment.yml b/modules/nf-core/umicollapse/environment.yml new file mode 100644 index 00000000..8dbc65dc --- /dev/null +++ b/modules/nf-core/umicollapse/environment.yml @@ -0,0 +1,7 @@ +name: umicollapse +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umicollapse=1.0.0 diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf new file mode 100644 index 00000000..dae290e6 --- /dev/null +++ b/modules/nf-core/umicollapse/main.nf @@ -0,0 +1,73 @@ +process UMICOLLAPSE { + tag "$meta.id" + label "process_high" + label "process_high_memory" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umicollapse:1.0.0--hdfd78af_1' : + 'biocontainers/umicollapse:1.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(input), path(bai) + val(mode) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*dedup*fastq.gz"), emit: fastq, optional: true + tuple val(meta), path("*_UMICollapse.log"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + // Memory allocation: We need to make sure that both heap and stack size is sufficiently large for + // umicollapse. We set the stack size to 5% of the available memory, the heap size to 90% + // which leaves 5% for stuff happening outside of java without the scheduler killing the process. + def max_heap_size_mega = (task.memory.toMega() * 0.9).intValue() + def max_stack_size_mega = 999 //most java jdks will not allow Xss > 1GB, so fixing this to the allowed max + if ( mode !in [ 'fastq', 'bam' ] ) { + error "Mode must be one of 'fastq' or 'bam'." + } + extension = mode.contains("fastq") ? "fastq.gz" : "bam" + """ + # Getting the umicollapse jar file like this because `umicollapse` is a Python wrapper script generated + # by conda that allows to set the heap size (Xmx), but not the stack size (Xss). + # `which` allows us to get the directory that contains `umicollapse`, independent of whether we + # are in a container or conda environment. + UMICOLLAPSE_JAR=\$(dirname \$(which umicollapse))/../share/umicollapse-${VERSION}/umicollapse.jar + java \\ + -Xmx${max_heap_size_mega}M \\ + -Xss${max_stack_size_mega}M \\ + -jar \$UMICOLLAPSE_JAR \\ + $mode \\ + -i ${input} \\ + -o ${prefix}.${extension} \\ + $args | tee ${prefix}_UMICollapse.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.0-1' + if ( mode !in [ 'fastq', 'bam' ] ) { + error "Mode must be one of 'fastq' or 'bam'." + } + extension = mode.contains("fastq") ? "fastq.gz" : "bam" + """ + touch ${prefix}.dedup.${extension} + touch ${prefix}_UMICollapse.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml new file mode 100644 index 00000000..c1361f9a --- /dev/null +++ b/modules/nf-core/umicollapse/meta.yml @@ -0,0 +1,63 @@ +--- +name: "umicollapse" +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umicollapse + - deduplication + - genomics +tools: + - "umicollapse": + description: "UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)." + homepage: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + documentation: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + tool_dev_url: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + doi: "10.7717/peerj.8275" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. Optionally can be skipped using [] when using FastQ input. + pattern: "*.{bai}" + - mode: + type: string + description: | + Selects the mode of Umicollapse - either fastq or bam need to be provided. + pattern: "{fastq,bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - log: + type: file + description: A log file with the deduplication statistics. + pattern: "*_{UMICollapse.log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@CharlotteAnne" + - "@chris-cheshire" +maintainers: + - "@CharlotteAnne" + - "@chris-cheshire" + - "@apeltzer" + - "@MatthiasZepper" diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test new file mode 100644 index 00000000..36cd748b --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -0,0 +1,249 @@ +nextflow_process { + + name "Test Process UMICOLLAPSE" + script "../main.nf" + process "UMICOLLAPSE" + + tag "modules" + tag "modules_nfcore" + tag "umicollapse" + tag "umitools/extract" + tag "samtools/index" + tag "bwa/index" + tag "bwa/mem" + + test("umicollapse single end test") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_SE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_SE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_PE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_PE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse fastq tests") { + + when { + config "./nextflow_SE.config" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + [] + ] + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fastq, + process.out.versions).match() } + ) + } + } + + test("umicollapse stub tests") { + options "-stub-run" + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_PE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + when { + config "./nextflow_PE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap new file mode 100644 index 00000000..23916f0b --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -0,0 +1,124 @@ +{ + "umicollapse single end test": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.bam:md5,05c5331185263cbee6f508c0669be864" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:24:31.850566925" + }, + "umicollapse fastq tests": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.fastq.gz:md5,c9bac08c7fd8df3e0203e3eeafc73155" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-30T10:45:56.053352008" + }, + "umicollapse stub tests": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-30T10:46:12.482697713" + }, + "umicollapse paired tests": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.bam:md5,f4f05467cb456309fe22851d8b4d4387" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:24:44.166029769" + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config new file mode 100644 index 00000000..844edbdc --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config new file mode 100644 index 00000000..ae4c9632 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_PE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config new file mode 100644 index 00000000..d4b94436 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_SE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/tags.yml b/modules/nf-core/umicollapse/tests/tags.yml new file mode 100644 index 00000000..912879c4 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/tags.yml @@ -0,0 +1,2 @@ +umicollapse: + - "modules/nf-core/umicollapse/**" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 00000000..aab452d1 --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.5 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 00000000..8719e5f6 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,56 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_single" + label "process_long" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.5--py39hf95cd2a_0' : + 'biocontainers/umi_tools:1.1.5--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 00000000..7695b271 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,48 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - UMI + - barcode + - extract + - umitools +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: "MIT" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 00000000..83a77a1b --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..bf82701d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T10:01:33.326046137" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 00000000..c866f5a0 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 00000000..c3fb23de --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/modules/nf-core/untarfiles/environment.yml b/modules/nf-core/untarfiles/environment.yml new file mode 100644 index 00000000..e479f80d --- /dev/null +++ b/modules/nf-core/untarfiles/environment.yml @@ -0,0 +1,9 @@ +name: untarfiles +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - bioconda::grep=3.4 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untarfiles/main.nf b/modules/nf-core/untarfiles/main.nf new file mode 100644 index 00000000..de27e67c --- /dev/null +++ b/modules/nf-core/untarfiles/main.nf @@ -0,0 +1,52 @@ +process UNTARFILES { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/**") , emit: files + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untarfiles/meta.yml b/modules/nf-core/untarfiles/meta.yml new file mode 100644 index 00000000..38108826 --- /dev/null +++ b/modules/nf-core/untarfiles/meta.yml @@ -0,0 +1,48 @@ +name: untarfiles +description: Extract files. +keywords: + - untar + - uncompress + - files +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files: + type: string + description: A list containing references to individual archive files + pattern: "*/**" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" diff --git a/nextflow.config b/nextflow.config index 2fb69a90..272b7372 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,28 +21,43 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false mirna_gtf = null - mature = "https://mirbase.org/download/CURRENT/mature.fa" - hairpin = "https://mirbase.org/download/CURRENT/hairpin.fa" + mature = "https://mirbase.org/download/mature.fa" + hairpin = "https://mirbase.org/download/hairpin.fa" mirgenedb = false mirgenedb_mature = null mirgenedb_hairpin = null mirgenedb_gff = null mirgenedb_species = null + save_aligned = false + save_aligned_mirna_quant = true + bowtie_index = null + + // UMI handling + with_umi = false + // skips umitools extract in FASTQ_FASTQC_UMITOOLS_FASTP subworkflow. Needs to be true for fastq mode in collapsing reads + skip_umi_extract_before_dedup = true + umitools_extract_method = 'string' + umitools_bc_pattern = null + umi_discard_read = null + umitools_method = 'dir' + save_umi_intermeds = false // Trimming options clip_r1 = null three_prime_clip_r1 = null - three_prime_adapter = null + three_prime_adapter = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA' trim_fastq = true fastp_min_length = 17 fastp_known_mirna_adapters = "$projectDir/assets/known_adapters.fa" save_trimmed_fail = false + save_merged = true skip_fastqc = false skip_multiqc = false skip_mirdeep = false skip_fastp = false save_reference = false - fastp_max_length = 40 + fastp_max_length = 100 + min_trimmed_reads = 10 // Contamination filtering filter_contamination = false @@ -53,11 +68,17 @@ params { pirna = null other_contamination = null + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + + // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options @@ -72,13 +93,12 @@ params { version = false // Config options - - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options @@ -87,14 +107,6 @@ params { max_cpus = 16 max_time = '240.h' - - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' - validationShowHiddenParams = false - validate_params = true - // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false @@ -115,7 +127,7 @@ try { } // Load nf-core/smrnaseq custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/smrnaseq.config" // } catch (Exception e) { @@ -127,6 +139,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -135,6 +148,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -149,17 +163,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true - docker.runOptions = '-u $(id -u):$(id -g)' conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -216,8 +229,10 @@ profiles { } test { includeConfig 'conf/test.config' } + test_umi { includeConfig 'conf/test_umi.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } + test_index { includeConfig 'conf/test_index.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -230,9 +245,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet - + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -259,8 +272,13 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Set default registry for Docker and Podman independent of -profile // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { @@ -287,7 +305,7 @@ manifest { description = """Small RNA-Seq Best Practice Analysis Pipeline.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.2.4' + version = '2.3.0' doi = '10.5281/zenodo.3456879' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 40360e49..cf2d0382 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,6 +16,7 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", @@ -50,6 +51,56 @@ } } }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "properties": { + "with_umi": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_extract_method": { + "type": "string", + "default": "string", + "fa_icon": "fas fa-barcode", + "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" + }, + "umitools_method": { + "type": "string", + "default": "dir", + "description": "UMI grouping method", + "fa_icon": "fas fa-layer-group", + "help_text": "Available options are dir, cc, adj" + }, + "skip_umi_extract_before_dedup": { + "type": "boolean", + "fa_icon": "fas fa-compress-alt", + "description": "Skip the UMI extraction from the reads before deduplication. Please note, if this parameter is set to false, the reads will be deduplicated solely on insert sequence. UMIs might be extracted after deduplication depending on the set umitools_bc_pattern nevertheless if with_umi is set to True.", + "default": true + }, + "umitools_bc_pattern": { + "type": "string", + "fa_icon": "fas fa-barcode", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", + "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." + }, + "umi_discard_read": { + "type": "integer", + "fa_icon": "fas fa-barcode", + "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." + }, + "save_umi_intermeds": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + } + }, + "fa_icon": "fas fa-barcode" + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -104,7 +155,7 @@ "description": "Path to FASTA file with mature miRNAs.", "fa_icon": "fas fa-wheelchair", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/mature.fa" + "default": "https://mirbase.org/download/mature.fa" }, "mirgenedb_mature": { "type": "string", @@ -116,7 +167,7 @@ "description": "Path to FASTA file with miRNAs precursors.", "fa_icon": "fab fa-cuttlefish", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/hairpin.fa" + "default": "https://mirbase.org/download/hairpin.fa" }, "mirgenedb_hairpin": { "type": "string", @@ -135,6 +186,19 @@ "help_text": "Saving generated references means that you can use them for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, + "save_aligned": { + "type": "boolean", + "fa_icon": "fas fa-save", + "help_text": "Save aligned reads of initial bowtie mapping.", + "description": "Save aligned reads of initial mapping in bam format." + }, + "save_aligned_mirna_quant": { + "type": "boolean", + "fa_icon": "fas fa-save", + "default": true, + "help_text": "Save aligned reads of the bowtie runs in BOWTIE_MAP_MATURE, BOWTIE_MAP_HAIRPIN, and BOWTIE_MAP_SEQCLUSTER.", + "description": "Save aligned reads of miRNA quant subworkflow in bam format." + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -162,9 +226,9 @@ }, "three_prime_adapter": { "type": "string", - "default": "TGGAATTCTCGGGTGCCAAGG", "fa_icon": "fas fa-text-width", - "description": "Sequencing adapter sequence to use for trimming." + "description": "Sequencing adapter sequence to use for trimming.", + "default": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA" }, "trim_fastq": { "type": "boolean", @@ -180,7 +244,7 @@ }, "fastp_max_length": { "type": "integer", - "default": 40, + "default": 100, "description": "Maximum filter length for raw reads.", "fa_icon": "fas fa-ruler-horizontal" }, @@ -191,9 +255,23 @@ }, "fastp_known_mirna_adapters": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "default": "${projectDir}/assets/known_adapters.fa", "description": "FastA with known miRNA adapter sequences for adapter trimming", "fa_icon": "far fa-question-circle" + }, + "min_trimmed_reads": { + "type": "integer", + "default": 10, + "fa_icon": "far fa-window-minimize", + "description": "Minimum number of reads required in input file to use it" + }, + "save_merged": { + "type": "boolean", + "description": "Save merged reads.", + "default": true } } }, @@ -361,14 +439,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -392,7 +468,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -407,7 +482,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -446,7 +520,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -454,7 +527,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -462,7 +534,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } @@ -473,6 +544,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/umi_options" + }, { "$ref": "#/definitions/reference_genome_options" }, diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf index 383c85ad..02d89df7 100644 --- a/subworkflows/local/contaminant_filter.nf +++ b/subworkflows/local/contaminant_filter.nf @@ -3,23 +3,23 @@ // include { BLAT_MIRNA as BLAT_CDNA - BLAT_MIRNA as BLAT_NCRNA - BLAT_MIRNA as BLAT_PIRNA - BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna' + BLAT_MIRNA as BLAT_NCRNA + BLAT_MIRNA as BLAT_PIRNA + BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna' include { INDEX_CONTAMINANTS as INDEX_RRNA - INDEX_CONTAMINANTS as INDEX_TRNA - INDEX_CONTAMINANTS as INDEX_CDNA - INDEX_CONTAMINANTS as INDEX_NCRNA - INDEX_CONTAMINANTS as INDEX_PIRNA - INDEX_CONTAMINANTS as INDEX_OTHER } from '../../modules/local/bowtie_contaminants' + INDEX_CONTAMINANTS as INDEX_TRNA + INDEX_CONTAMINANTS as INDEX_CDNA + INDEX_CONTAMINANTS as INDEX_NCRNA + INDEX_CONTAMINANTS as INDEX_PIRNA + INDEX_CONTAMINANTS as INDEX_OTHER } from '../../modules/local/bowtie_contaminants' include { BOWTIE_MAP_CONTAMINANTS as MAP_RRNA - BOWTIE_MAP_CONTAMINANTS as MAP_TRNA - BOWTIE_MAP_CONTAMINANTS as MAP_CDNA - BOWTIE_MAP_CONTAMINANTS as MAP_NCRNA - BOWTIE_MAP_CONTAMINANTS as MAP_PIRNA - BOWTIE_MAP_CONTAMINANTS as MAP_OTHER } from '../../modules/local/bowtie_map_contaminants' + BOWTIE_MAP_CONTAMINANTS as MAP_TRNA + BOWTIE_MAP_CONTAMINANTS as MAP_CDNA + BOWTIE_MAP_CONTAMINANTS as MAP_NCRNA + BOWTIE_MAP_CONTAMINANTS as MAP_PIRNA + BOWTIE_MAP_CONTAMINANTS as MAP_OTHER } from '../../modules/local/bowtie_map_contaminants' include { FILTER_STATS } from '../../modules/local/filter_stats' @@ -123,6 +123,6 @@ workflow CONTAMINANT_FILTER { emit: filtered_reads = FILTER_STATS.out.reads - versions = ch_versions + versions = ch_versions.mix(FILTER_STATS.out.versions) filter_stats = FILTER_STATS.out.stats -} \ No newline at end of file +} diff --git a/subworkflows/local/fastqc_fastp.nf b/subworkflows/local/fastqc_fastp.nf deleted file mode 100644 index 9e4d952e..00000000 --- a/subworkflows/local/fastqc_fastp.nf +++ /dev/null @@ -1,116 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' - -// -// Function that parses fastp json output file to get total number of reads after trimming -// -import groovy.json.JsonSlurper - -def getFastpReadsAfterFiltering(json_file) { - return new JsonSlurper().parseText(json_file.text) - ?.get('summary') - ?.get('after_filtering') - ?.get('total_reads') - ?.toInteger() -} - -String getFastpAdapterSequence(json_file){ - return new JsonSlurper().parseText(json_file.text) - ?.get('adapter_cutting') - ?.get('read1_adapter_sequence') -} - -workflow FASTQC_FASTP { - take: - reads // channel: [ val(meta), [ reads ] ] - adapter_list // channel: [ path/to/adapters.fa ] - save_trimmed_fail // value: boolean - save_merged // value: boolean - - - main: - - ch_versions = Channel.empty() - fastqc_raw_html = Channel.empty() - fastqc_raw_zip = Channel.empty() - adapterseq = reads.map { meta, _ -> [meta, null] } - if (!params.skip_fastqc) { - FASTQC_RAW ( - reads - ) - fastqc_raw_html = FASTQC_RAW.out.html - fastqc_raw_zip = FASTQC_RAW.out.zip - ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - } - - trim_reads = reads - trim_json = Channel.empty() - trim_html = Channel.empty() - trim_log = Channel.empty() - trim_reads_fail = Channel.empty() - trim_reads_merged = Channel.empty() - fastqc_trim_html = Channel.empty() - fastqc_trim_zip = Channel.empty() - if (!params.skip_fastp) { - FASTP ( - reads, - adapter_list, - save_trimmed_fail, - save_merged - ) - trim_reads = FASTP.out.reads - trim_json = FASTP.out.json - trim_html = FASTP.out.html - trim_log = FASTP.out.log - trim_reads_fail = FASTP.out.reads_fail - trim_reads_merged = FASTP.out.reads_merged - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - - // - // Filter empty FastQ files after adapter trimming so FastQC doesn't fail - // - trim_reads - .join(trim_json) - .map { - meta, reads, json -> - if (getFastpReadsAfterFiltering(json) > 0) { - [ meta, reads ] - } - } - .set { trim_reads } - - trim_json - .map { meta, json -> [meta, getFastpAdapterSequence(json)] } - .set { adapterseq } - - if (!params.skip_fastqc) { - FASTQC_TRIM ( - trim_reads - ) - fastqc_trim_html = FASTQC_TRIM.out.html - fastqc_trim_zip = FASTQC_TRIM.out.zip - ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) - } - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trim_json // channel: [ val(meta), [ json ] ] - trim_html // channel: [ val(meta), [ html ] ] - trim_log // channel: [ val(meta), [ log ] ] - trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] - trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] - adapterseq // channel: [ val(meta), [ adapterseq ] ] - - fastqc_raw_html // channel: [ val(meta), [ html ] ] - fastqc_raw_zip // channel: [ val(meta), [ zip ] ] - fastqc_trim_html // channel: [ val(meta), [ html ] ] - fastqc_trim_zip // channel: [ val(meta), [ zip ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index 967b2757..c56f8e5f 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -2,36 +2,24 @@ // Quantify mirna with bowtie and mirtop // -include { INDEX_GENOME } from '../../modules/local/bowtie_genome' include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools' include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' workflow GENOME_QUANT { take: - fasta - index + bowtie_index + fasta_formatted // fasta as generated by bowtie index step reads // channel: [ val(meta), [ reads ] ] main: ch_versions = Channel.empty() - if (!index){ - INDEX_GENOME ( [ [:], fasta ] ) - bowtie_index = INDEX_GENOME.out.index - fasta_formatted = INDEX_GENOME.out.fasta - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - } else { - bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${index}" } - fasta_formatted = fasta - } + BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - if (bowtie_index){ - BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - - BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - } + ch_fasta_formatted_for_sort = fasta_formatted .map { file -> tuple(file.baseName, file) } + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, ch_fasta_formatted_for_sort ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: fasta = fasta_formatted diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 910afd05..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,30 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1 ] ] -def create_fastq_channel(LinkedHashMap row) { - def meta = row.findAll {it.key != "fastq_1"} - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - return [ meta, [ file(row.fastq_1) ] ] -} diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index dfa16ab4..aea1c9a7 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -39,18 +39,9 @@ workflow MIRNA_QUANT { FORMAT_MATURE ( mirna_parsed ) ch_versions = ch_versions.mix(FORMAT_MATURE.out.versions) - PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } - ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) - - FORMAT_HAIRPIN ( hairpin_parsed ) - ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) - INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).index.set { mature_bowtie } ch_versions = ch_versions.mix(INDEX_MATURE.out.versions) - INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } - ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) - reads .map { add_suffix(it, "mature") } .dump (tag:'msux') @@ -64,12 +55,21 @@ workflow MIRNA_QUANT { .dump (tag:'hsux') .set { reads_hairpin } - BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) - BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions) + PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } + ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) + + FORMAT_HAIRPIN ( hairpin_parsed ) + ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) + + INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } + ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) + + BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) + BAM_STATS_HAIRPIN ( BOWTIE_MAP_HAIRPIN.out.bam, FORMAT_HAIRPIN.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions) @@ -79,7 +79,9 @@ workflow MIRNA_QUANT { .flatten() .collect() .set { edger_input } + EDGER_QC ( edger_input ) + ch_versions.mix(EDGER_QC.out.versions) reads .map { add_suffix(it, "seqcluster") } diff --git a/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf new file mode 100644 index 00000000..af31a5ea --- /dev/null +++ b/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf @@ -0,0 +1,291 @@ +// +// Subworkflow with functionality specific to the nf-core/pipeline pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + //Detect Protocol setting, set this early before help so help shows proper adapters etc pp + formatProtocol(params,log) + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() + +}// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +}// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +/* +* Format the protocol +* Given the protocol parameter (params.protocol), +* this function formats the protocol such that it is fit for the respective +* subworkflow +*/ +def formatProtocol(params,log) { + + switch(params.protocol){ + case 'illumina': + params.putIfAbsent("clip_r1", 0); + params.putIfAbsent("three_prime_clip_r1",0); + params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); + break + case 'nextflex': + params.putIfAbsent("clip_r1", 4); + params.putIfAbsent("three_prime_clip_r1", 4); + params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); + break + case 'qiaseq': + params.putIfAbsent("clip_r1",0); + params.putIfAbsent("three_prime_clip_r1",0); + params.putIfAbsent("three_prime_adapter","AACTGTAGGCACCATCAAT"); + break + case 'cats': + params.putIfAbsent("clip_r1",3); + params.putIfAbsent("three_prime_clip_r1", 0); + params.putIfAbsent("three_prime_adapter", "AAAAAAAA"); + break + case 'custom': + params.putIfAbsent("clip_r1", params.clip_r1) + params.putIfAbsent("three_prime_clip_r1", params.three_prime_clip_r1) + default: + log.warn "Please make sure to specify all required clipping and trimming parameters, otherwise only adapter detection will be performed." + } + + log.warn "Running with Protocol ${params.protocol}" + log.warn "Therefore using Adapter: ${params.three_prime_adapter}" + log.warn "Clipping ${params.clip_r1} bases from R1" + log.warn "And clipping ${params.three_prime_clip_r1} bases from 3' end" + } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test index a8a13f2a..75b5b934 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -5,7 +5,9 @@ nextflow_workflow { workflow "BAM_SORT_STATS_SAMTOOLS" tag "subworkflows" tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" tag "bam_stats_samtools" tag "samtools" tag "samtools/index" @@ -22,12 +24,14 @@ nextflow_workflow { } workflow { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) - ] - input[1] = [ [ id:'genome' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } @@ -35,7 +39,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } ) } } @@ -48,12 +56,14 @@ nextflow_workflow { } workflow { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - input[1] = [ [ id:'genome' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } @@ -61,7 +71,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } ) } } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap index 50ffde60..6645a092 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -1,236 +1,110 @@ { - "test_bam_sort_stats_samtools_single_end": { + "test_bam_sort_stats_samtools_paired_end_flagstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" ] - } + ] ], - "timestamp": "2023-10-18T09:34:31.989804787" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.687121177" }, - "test_bam_sort_stats_samtools_paired_end": { + "test_bam_sort_stats_samtools_paired_end_idxstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" ] - } + ] ], - "timestamp": "2023-10-18T09:34:57.682759147" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,cb0bf2b79de52fdf0c61e80efcdb0bb4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:38.553256801" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d7796222a087b9bb97f631f1c21b9c95" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:48.355870518" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.84631" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.829756" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml index a8274109..30b69d6a 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -1,2 +1,2 @@ -bam_sort_stats_samtools: +subworkflows/bam_sort_stats_samtools: - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..c8b21f28 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..bf0b0c69 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,ddaf8f33fe9c1ebe9b06933213aec8ed" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:06.230091746" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.717482" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,dc178e1a4956043aba8abc83e203521b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:57.442208382" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.726719" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d3345c4887f4a9ea4f7f56405b495db0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:14.997164209" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf new file mode 100644 index 00000000..764ce013 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -0,0 +1,157 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +def getFastpAdapterSequence(json_file){ + def Map json = (Map) new JsonSlurper().parseText(json_file.text) + try{ + adapter = json['adapter_cutting']['read1_adapter_sequence'] + } catch(Exception ex){ + adapter = "" + } + return adapter +} + +workflow FASTQ_FASTQC_UMITOOLS_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + skip_trimming // boolean: true/false + adapter_fasta // file: adapter.fasta + save_trimmed_fail // boolean: true/false + save_merged // boolean: true/false + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT ( + reads + ) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + trim_read_count = Channel.empty() + adapter_seq = Channel.empty() + + if (!skip_trimming) { + FASTP ( + umi_reads, + adapter_fasta, + save_trimmed_fail, + save_merged + ) + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + FASTP + .out + .reads + .join(trim_json) + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + + trim_json + .map { meta, json -> [meta, getFastpAdapterSequence(json)] } + .set { adapter_seq } + + if (!skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + adapter_seq // channel: [ val(meta), [ adapter_seq] ] + + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml new file mode 100644 index 00000000..9308fe9b --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -0,0 +1,129 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=yaml-schema.json +name: "fastq_fastqc_umitools_fastp" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - fastp +components: + - fastqc + - umitools/extract + - fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - skip_trimming: + type: boolean + description: | + Allows to skip FastP execution + - adapter_fasta: + type: file + description: | + Fasta file of adapter sequences + - save_trimmed_fail: + type: boolean + description: | + Save trimmed fastqs of failed samples + - save_merged: + type: boolean + description: | + Save merged fastqs + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_json: + type: file + description: FastP Trimming report + pattern: "*.{fastp.json}" + - trim_html: + type: file + description: FastP Trimming report + pattern: "*.{fastp.html}" + - log: + type: file + description: Logfile FastP + pattern: "*.{fastp.log}" + - trim_reads_fail: + type: file + description: Trimmed fastq files failing QC + pattern: "*.{fastq.gz}" + - trim_reads_merged: + type: file + description: Trimmed and merged fastq files + pattern: "*.{fastq.gz}" + - trim_read_count: + type: integer + description: Number of reads after trimming + - fastqc_trim_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_trim_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - adapter_seq: + type: string + description: | + Adapter Sequence found in read1 + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robsyme" +maintainers: + - "@robsyme" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test new file mode 100644 index 00000000..8f1d82d3 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP" + script "../main.nf" + workflow "FASTQ_FASTQC_UMITOOLS_FASTP" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "fastq_fastqc_umitools_fastp" + tag "fastqc" + tag "umitools/extract" + tag "fastp" + + + test("sarscov2 paired-end [fastq]") { + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.umi_log).match("umi_log") }, + { assert snapshot(workflow.out.trim_json).match("trim_json") }, + { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, + { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.adapter_seq).match("adapter_seq") }, + { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..89ba8da1 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -0,0 +1,127 @@ +{ + "trim_reads_merged": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.041635573" + }, + "trim_reads_fail": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.033284693" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.121510557" + }, + "trim_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.024410724" + }, + "adapter_seq": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "unspecified" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.08674429" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:49.994419936" + }, + "umi_log": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.017720214" + }, + "trim_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + 198 + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-12T08:38:50.102326089" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml new file mode 100644 index 00000000..84a4b567 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_fastqc_umitools_fastp: + - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..8ed4310c --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..db2030f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,12 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "timestamp": "2024-01-19T11:32:36.031083" + }, + "Test Function checkCondaChannels": { + "content": null, + "timestamp": "2024-01-19T11:32:50.456" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..f7c54bc6 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,123 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = params.outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = null + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..53574ffe --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..10f948e6 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,138 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "timestamp": "2024-02-09T15:43:55.145717" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "timestamp": "2024-01-19T11:34:13.548431224" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "timestamp": "2024-01-19T11:34:38.840454873" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "timestamp": "2024-01-19T11:34:22.24352016" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "timestamp": "2024-01-19T11:35:04.418416984" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "timestamp": "2024-01-19T11:34:55.420000755" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "timestamp": "2024-01-19T11:35:13.436366565" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..d07ce54c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,15 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "timestamp": "2024-01-19T11:35:22.538940073" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..517ee54e --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index b6c2d3a2..9252fec0 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -1,38 +1,25 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - - -WorkflowSmrnaseq.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, - params.multiqc_config -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -WorkflowSmrnaseq.initialise(params, log) - -// Check optional parameters -if (!params.mirtrace_species) { - exit 1, "Reference species for miRTrace is not defined via the --mirtrace_species parameter." -} - -// Genome options -def mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3" : false -def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' +include { FASTP as FASTP_LENGTH_FILTER } from '../modules/nf-core/fastp' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { INDEX_GENOME } from '../modules/local/bowtie_genome' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { UMICOLLAPSE as UMICOLLAPSE_FASTQ } from '../modules/nf-core/umicollapse/main' +include { UMITOOLS_EXTRACT } from '../modules/nf-core/umitools/extract/main' +include { UNTARFILES as UNTAR_BOWTIE_INDEX } from '../modules/nf-core/untarfiles' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -40,50 +27,10 @@ def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) -ch_fastp_adapters = Channel.fromPath(params.fastp_known_mirna_adapters, checkIfExists: true).collect() // collect to consume for all incoming samples to FASTP - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -if (!params.mirgenedb) { - if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } - if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } -} else { - if (params.mirgenedb_mature) { reference_mature = file(params.mirgenedb_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirgenedb_mature}" } - if (params.mirgenedb_hairpin) { reference_hairpin = file(params.mirgenedb_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirgenedb_hairpin}" } - if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} -} - -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_FASTP } from '../subworkflows/local/fastqc_fastp' -include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_fastp_adapters = Channel.fromPath(params.fastp_known_mirna_adapters, checkIfExists: true).collect() // collect to consume for all incoming samples to FASTP /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -91,35 +38,46 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] +workflow NFCORE_SMRNASEQ { -workflow SMRNASEQ { + take: + ch_input // channel: samplesheet file as specified to --input + ch_samplesheet // channel: sample fastqs parsed from --input + ch_versions // channel: [ path(versions.yml) ] - ch_versions = Channel.empty() + main: + //Config checks + // Check optional parameters + if (!params.mirtrace_species) { + exit 1, "Reference species for miRTrace is not defined via the --mirtrace_species parameter." + } + // Genome options + def mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3" : false + def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species + + if (!params.mirgenedb) { + if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } + if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } + } else { + if (params.mirgenedb_mature) { reference_mature = file(params.mirgenedb_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirgenedb_mature}" } + if (params.mirgenedb_hairpin) { reference_hairpin = file(params.mirgenedb_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirgenedb_hairpin}" } + if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} + if (!params.mirgenedb_species) { exit 1, "MirGeneDB species not set, please specify via the --mirgenedb_species parameter"} + } // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // Create separate channels for samples that have single/multiple FastQ files to merge // - INPUT_CHECK ( - file(params.input) - ) - .reads - .dump(tag: 'group') - .branch { - meta, fastq -> - single : fastq.size() == 1 - return [ meta, fastq.flatten() ] - multiple: fastq.size() > 1 - return [ meta, fastq.flatten() ] - } - .set { ch_fastq } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema + ch_samplesheet + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } - // // MODULE: Concatenate FastQ files from same sample if required // CAT_FASTQ ( @@ -128,38 +86,96 @@ workflow SMRNASEQ { .reads .mix(ch_fastq.single) .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + mirna_adapters = params.with_umi ? [] : params.fastp_known_mirna_adapters // - // SUBWORKFLOW: Read QC and trim adapters + // SUBWORKFLOW: Read QC, extract UMI and trim adapters & dedup UMIs if necessary / desired by the user // - - FASTQC_FASTP ( + FASTQ_FASTQC_UMITOOLS_FASTP ( ch_cat_fastq, - ch_fastp_adapters, - false, - false + params.skip_fastqc, + params.with_umi, + params.skip_umi_extract_before_dedup, + params.umi_discard_read, + params.skip_fastp, + mirna_adapters, + params.save_trimmed_fail, + params.save_merged, + params.min_trimmed_reads ) - ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + + ch_fasta = params.fasta ? file(params.fasta): [] + ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + + // even if bowtie index is specified, there still needs to be a fasta. + // without fasta, no genome analysis. + if(params.fasta) { + //Prepare bowtie index, unless specified + //This needs to be done here as the index is used by GENOME_QUANT + if(params.bowtie_index) { + ch_fasta = Channel.fromPath(params.fasta) + if (params.bowtie_index.endsWith(".tar.gz")) { + UNTAR_BOWTIE_INDEX ( [ [], params.bowtie_index ]).files.map { it[1] }.set {ch_bowtie_index} + ch_versions = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions) + } else { + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } + } + } else { + INDEX_GENOME ( [ [:], ch_fasta ] ) + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + ch_bowtie_index = INDEX_GENOME.out.index + // set to reformatted fasta as generated by `bowtie index` + ch_fasta = INDEX_GENOME.out.fasta + } + } + + // UMI Dedup for fastq input + // This involves running on the sequencing adapter trimmed remnants of the entire reads + // consisting of sequence + common sequence "miRNA adapter" + UMI + // once collapsing happened, we will use umitools extract to get rid of the common miRNA sequence + the UMI to have only plain collapsed reads without any other clutter + if (params.with_umi) { + ch_fastq = Channel.value('fastq') + ch_input_for_collapse = ch_reads_for_mirna.map{ meta, reads -> [meta, reads, []]} //Needs to be done to add a [] + UMICOLLAPSE_FASTQ(ch_input_for_collapse, ch_fastq) + ch_versions = ch_versions.mix(UMICOLLAPSE_FASTQ.out.versions) + UMITOOLS_EXTRACT(UMICOLLAPSE_FASTQ.out.fastq) + + // Filter out sequences smaller than params.fastp_min_length + FASTP_LENGTH_FILTER ( + UMITOOLS_EXTRACT.out.reads, + mirna_adapters, + params.save_trimmed_fail, + params.save_merged + ) + ch_versions = ch_versions.mix(FASTP_LENGTH_FILTER.out.versions) + + ch_reads_for_mirna = FASTP_LENGTH_FILTER.out.reads + } // - // SUBWORKFLOW: mirtrace QC + // MODULE: mirtrace QC // - FASTQC_FASTP.out.adapterseq - .join( FASTQC_FASTP.out.reads ) - .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } + FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq + .join( ch_reads_for_mirna ) + .dump() + .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] } .groupTuple() .set { ch_mirtrace_inputs } + // + // SUBWORKFLOW: MIRTRACE + // MIRTRACE(ch_mirtrace_inputs) - ch_versions = ch_versions.mix(MIRTRACE.out.versions.ifEmpty(null)) - + ch_versions = ch_versions.mix(MIRTRACE.out.versions) // // SUBWORKFLOW: remove contaminants from reads // contamination_stats = Channel.empty() - mirna_reads = FASTQC_FASTP.out.reads if (params.filter_contamination){ CONTAMINANT_FILTER ( reference_hairpin, @@ -169,12 +185,12 @@ workflow SMRNASEQ { params.ncrna, params.pirna, params.other_contamination, - FASTQC_FASTP.out.reads + ch_reads_for_mirna ) contamination_stats = CONTAMINANT_FILTER.out.filter_stats ch_versions = ch_versions.mix(CONTAMINANT_FILTER.out.versions) - mirna_reads = CONTAMINANT_FILTER.out.filtered_reads + ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads } @@ -182,52 +198,58 @@ workflow SMRNASEQ { [ [:], reference_mature], [ [:], reference_hairpin], mirna_gtf, - mirna_reads + ch_reads_for_mirna ) - ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions) // // GENOME // genome_stats = Channel.empty() if (params.fasta){ - GENOME_QUANT ( file(params.fasta), params.bowtie_index, MIRNA_QUANT.out.unmapped ) + GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped ) genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) + hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } + mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } + if (!params.skip_mirdeep) { MIRDEEP2 ( - FASTQC_FASTP.out.reads, + ch_reads_for_mirna, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), - MIRNA_QUANT.out.fasta_hairpin, - MIRNA_QUANT.out.fasta_mature + hairpin_clean, + mature_clean ) ch_versions = ch_versions.mix(MIRDEEP2.out.versions) } } // - // MODULE: Pipeline reporting + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_smrnaseq_software_mqc_versions.yml', sort: true, newLine: true) + .set {ch_collated_versions} // // MODULE: MultiQC // + ch_multiqc_report = Channel.empty() if (!params.skip_multiqc) { - workflow_summary = WorkflowSmrnaseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - methods_description = WorkflowSmrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) + if(params.with_umi) { + ch_multiqc_files = ch_multiqc_files.mix(UMICOLLAPSE_FASTQ.out.log.collect{it[1]}.ifEmpty([])) + } ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([])) @@ -241,28 +263,14 @@ workflow SMRNASEQ { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() - } -} - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + ch_multiqc_report = MULTIQC.out.report -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END