diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 975df6cd..5e32dfd8 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/rnafusion then the best place to ask is on the nf-core Slack [#rnafusion](https://nfcore.slack.com/channels/rnafusion) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/rnafusion then the best place to ask is on the nf-core Slack [#rnafusion](https://nfcore.slack.com/channels/rnafusion) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 31c6cd5f..d2c448d7 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -13,7 +13,7 @@ jobs: if: github.repository == 'nf-core/rnafusion' runs-on: ubuntu-latest steps: - - name: Launch build arriba workflow via tower + - name: Launch build references workflow via tower uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -28,7 +28,7 @@ jobs: "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "arriba": true, + "all": true, "build_references": true } profiles: test_full,aws_tower @@ -39,7 +39,7 @@ jobs: tower_action_*.log tower_action_*.json - - name: Launch arriba workflow via tower + - name: Launch run workflow via tower uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -52,223 +52,6 @@ jobs: "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "arriba": true, + "all": true, } profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build squid workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "squid": true, - "build_references": true - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch squid workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "squid": true, - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build starfusion workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "starfusion": true, - "build_references": true - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch starfusion workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "starfusion": true, - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build fusioncatcher workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "fusioncatcher": true, - "build_references": true - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch fusioncatcher workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "fusioncatcher": true, - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build pizzly workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "pizzly": true, - "build_references": true - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch pizzly workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "pizzly": true, - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch stringtie workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "stringtie": true, - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 003f6786..b72a293b 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: # Launch workflow using Tower CLI tool action - - name: Launch build arriba workflow via tower + - name: Launch build references workflow via tower uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -25,7 +25,7 @@ jobs: "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "arriba": true, + "all": true, "stub": true, "build_references": true } @@ -50,227 +50,7 @@ jobs: "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "arriba": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build squid workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "squid": true, - "stub": true, - "build_references": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch squid workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "squid": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build starfusion workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "starfusion": true, - "build_references": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch starfusion workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "starfusion": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build fusioncatcher workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "fusioncatcher": true, - "build_references": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch fusioncatcher workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "fusioncatcher": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch build pizzly workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "pizzly": true, - "build_references": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch pizzly workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "pizzly": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json - - - name: Launch stringtie workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "stringtie": true, + "all": true, "stub": true } profiles: test,aws_tower diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e62dcf41..6f960cad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,9 @@ jobs: NXF_VER: - "23.04.0" - "latest-everything" + trim_parameters: + - "--fastp_trim false" + - "--fastp_trim true" steps: - name: Check out pipeline code uses: actions/checkout@v3 @@ -35,72 +38,15 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Dry test arriba build + - name: Dry test build run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ - --outdir /home/runner/work/rnafusion/rnafusion/results --arriba \ + --outdir /home/runner/work/rnafusion/rnafusion/results --all \ --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - name: Dry test arriba + - name: Dry test run run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --arriba \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test squid build - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ - --outdir /home/runner/work/rnafusion/rnafusion/results --squid \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test squid - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --squid \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test pizzly build - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ - --outdir /home/runner/work/rnafusion/rnafusion/results --pizzly \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test pizzly - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --pizzly \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test fusioncatcher build - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ - --outdir /home/runner/work/rnafusion/rnafusion/results --fusioncatcher \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test fusioncatcher - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --fusioncatcher \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test starfusion build - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub build_references \ - --outdir /home/runner/work/rnafusion/rnafusion/results --starfusion \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - - name: Dry test starfusion - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --starfusion \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ - --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + --outdir /home/runner/work/rnafusion/rnafusion/results --all ${{ matrix.trim_parameters }} \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c9ff036..75d0ce76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,40 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v3.0.0 - [2023-11-27] + +### Added + +- Add picard CollectInsertSizeMetrics to QC workflow [#408](https://github.com/nf-core/rnafusion/pull/408) +- Build CRAM index in the same directory as CRAM files for Arriba and STAR-Fusion [#427](https://github.com/nf-core/rnafusion/pull/427) + +### Changed + +- Replace PICARD_MARKDUPLICATES with GATK4_MARKDUPLICATES [#409](https://github.com/nf-core/rnafusion/pull/409) +- Removed `--fusioninspector_filter` and `--fusionreport_filter` in favor of `--tools_cutoff` (default = 1, no filters applied) [#389](https://github.com/nf-core/rnafusion/pull/389) +- Now publishing convert2bed output to convert2bed to keep the output file [#420](https://github.com/nf-core/rnafusion/pull/420) +- No more checks for existence of samplesheet, which made building references fail (building references uses a fake sample sheet if none is provided) [#420](https://github.com/nf-core/rnafusion/pull/420) +- `--annotate --examine_coding_effect` to collect more data from fusioninspector [#426](https://github.com/nf-core/rnafusion/pull/426) +- Update vcf creation to get positions/chromosomes and strands even when fusions are filtered out by fusioninspector, using the csv output from fusion-report [#443](https://github.com/nf-core/rnafusion/pull/443) +- `Arriba` updated to 2.4.0 [#429](https://github.com/nf-core/rnafusion/pull/429) +- Change megafusion into vcf_collect, taking into account e.g. the annotation and coding effects outputs from fusioninspector, HGNC ids, frame status... [#414](https://github.com/nf-core/rnafusion/pull/414) +- CI tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#430](https://github.com/nf-core/rnafusion/pull/430) +- AWS tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#433](https://github.com/nf-core/rnafusion/pull/433) +- Update `fusion-report` to 2.1.8, updated COSMIC database to fix 404 error, fix download of references via proxy and removing FusionGDB database [#445](https://github.com/nf-core/rnafusion/pull/445) +- Update documentation [#446](https://github.com/nf-core/rnafusion/pull/446) + +### Fixed + +- Fix channel i/o issue in StringTie workflow and add StringTie in github CI tests [#416](https://github.com/nf-core/rnafusion/pull/416) +- Update modules, and make sure MultiQC displays the QC results properly [#440](https://github.com/nf-core/rnafusion/pull/440) +- Add 'when' condition to run CollectInsertSizeMetrics only when STAR-fusion bam files are available [#444](https://github.com/nf-core/rnafusion/pull/444) + +### Removed + +- Remove `squid` and `pizzly` fusion detection tools [#406](https://github.com/nf-core/rnafusion/pull/406) +- Remove harsh trimming option `--trim` [#413](https://github.com/nf-core/rnafusion/pull/413) +- Remove qualimap rna_seq [#407](https://github.com/nf-core/rnafusion/pull/407) + ## v2.4.0 - [2023/09/22] ### Added @@ -20,6 +54,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - cram file from output bam of `STAR_FOR_STARFUSION`: meta.id to meta.id.star_for_starfusion.Aligned.sortedByCoord.out - `fusion-report` index.html file to meta.id_fusionreport_index.html - meta.id.vcf output from `MEGAFUSION` to meta.id_fusion_data.vcf + - Update metro map [#428](https://github.com/nf-core/rnafusion/pull/428) ### Fixed diff --git a/CITATIONS.md b/CITATIONS.md index 42e5e50c..f058a245 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -24,7 +24,7 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - [FusionCatcher](https://github.com/ndaniel/fusioncatcher) @@ -42,10 +42,6 @@ > Van der Auwera GA. Somatic variation discovery with GATK4. Proceedings of the American Association for Cancer Research Annual Meeting 2017. 2017 Apr 1-5. Cancer Res 2017;77(13 Suppl) doi:10.1158/1538-7445.AM2017-3590 -- [Kallisto](https://pachterlab.github.io/kallisto/) - - > Bray NL, Pimentel H, Melsted P, Pachter L. Near-optimal probabilistic RNA-seq quantification. Nature Biotechnology 2016 Apr. 34, 525–527. doi:10.1038/nbt.3519. PMID: 27043002. - - [MegaFusion](https://github.com/J35P312/MegaFusion) - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) @@ -54,21 +50,10 @@ - [picard-tools](http://broadinstitute.github.io/picard) -- [Pizzly](https://github.com/pmelsted/pizzly) - Melsted P, Hateley S, Joseph IC, Pimentel H, Bray N, Pachter L. Fusion detection and quantification by pseudoalignment. BioRxiv, 2017 Jul. doi: 10.1101/166322. - -- [Qualimap 2](https://pubmed.ncbi.nlm.nih.gov/26428292/) - - > Okonechnikov K, Conesa A, García-Alcalde F. Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data Bioinformatics. 2016 Jan 15;32(2):292-4. doi: 10.1093/bioinformatics/btv566. Epub 2015 Oct 1. PubMed PMID: 26428292; PubMed Central PMCID: PMC4708105. - - [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. -- [Squid](https://github.com/Kingsford-Group/squid) - - > Ma C, Shao M, Kingsford C. SQUID: transcriptomic structural variation detection from RNA-seq. Genome Biol 2028 Apr. 19, 52. doi: 10.1186/s13059-018-1421-5. PubMed PMID: 29650026. PubMed Central PMCID: PMC5896115. - - [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index a6f3e750..463d1e3f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # ![nf-core/rnafusion](docs/images/nf-core-rnafusion_logo_light.png#gh-light-mode-only) ![nf-core/rnafusion](docs/images/nf-core-rnafusion_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2565517-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2565517) +[![GitHub Actions CI Status](https://github.com/nf-core/rnafusion/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/rnafusion/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/rnafusion/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/rnafusion/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3946477-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3946477) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -30,9 +31,8 @@ In rnafusion the full-sized test includes reference building and fusion detectio 2. Create [STAR](https://github.com/alexdobin/STAR) index 3. Download [Arriba](https://github.com/suhrig/arriba) references 4. Download [FusionCatcher](https://github.com/ndaniel/fusioncatcher) references -5. Download [Pizzly](https://github.com/pmelsted/pizzly) references ([kallisto](https://pachterlab.github.io/kallisto/manual) index) -6. Download and build [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) references -7. Download [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) DBs +5. Download and build [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) references +6. Download [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) DBs #### Main workflow @@ -43,38 +43,29 @@ In rnafusion the full-sized test includes reference building and fusion detectio 5. Arriba subworkflow - [STAR](https://github.com/alexdobin/STAR) alignment - [Arriba](https://github.com/suhrig/arriba) fusion detection -6. Pizzly subworkflow - - [Kallisto](https://pachterlab.github.io/kallisto/) quantification - - [Pizzly](https://github.com/pmelsted/pizzly) fusion detection -7. Squid subworkflow - - [STAR](https://github.com/alexdobin/STAR) alignment - - [Samtools view](http://www.htslib.org/): convert sam output from STAR to bam - - [Samtools sort](http://www.htslib.org/): bam output from STAR - - [SQUID](https://github.com/Kingsford-Group/squid) fusion detection - - [SQUID](https://github.com/Kingsford-Group/squid) annotate -8. STAR-fusion subworkflow +6. STAR-fusion subworkflow - [STAR](https://github.com/alexdobin/STAR) alignment - [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) fusion detection -9. Fusioncatcher subworkflow +7. Fusioncatcher subworkflow - [FusionCatcher](https://github.com/ndaniel/fusioncatcher) fusion detection -10. StringTie subworkflow - - [StringTie](https://ccb.jhu.edu/software/stringtie/) -11. Fusion-report - - Merge all fusions detected by the selected tools with [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) -12. Post-processing and analysis of data +8. StringTie subworkflow + - [StringTie](https://ccb.jhu.edu/software/stringtie/) +9. Fusion-report + - Merge all fusions detected by the selected tools with [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) +10. Post-processing and analysis of data - [FusionInspector](https://github.com/FusionInspector/FusionInspector) - [Arriba](https://github.com/suhrig/arriba) visualisation - - QC for mapped reads ([`QualiMap: BAM QC`](https://kokonech.github.io/qualimap/HG00096.chr20_bamqc/qualimapReport.html)) - - Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-) and ([`picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-)) -13. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) -14. Compress bam files to cram with [samtools view](http://www.htslib.org/) + - Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-), [`picard CollectInsertSizeMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037055772-CollectInsertSizeMetrics-Picard-) and ([`picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-)) +11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +12. Compress bam files to cram with [samtools view](http://www.htslib.org/) ## Usage -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +::: As the reference building is computationally heavy (> 24h on HPC), it is recommended to test the pipeline with the `-stub` parameter (creation of empty files): @@ -99,6 +90,12 @@ nextflow run nf-core/rnafusion \ -stub ``` +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: + > **Notes:** > > - Conda is not currently supported; run with singularity or docker. @@ -106,11 +103,6 @@ nextflow run nf-core/rnafusion \ > - GRCh38 is the only supported reference. > - Single-end reads are to be used as last-resort. Paired-end reads are recommended. FusionCatcher cannot be used with single-end reads shorter than 130 bp. -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). - For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnafusion/usage) and the [parameter documentation](https://nf-co.re/rnafusion/parameters). ## Pipeline output diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index ce53881a..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/rnafusion - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-rnafusion-summary: - order: -1001 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 5e0f10e0..6e00ca8c 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,37 @@ report_comment: > - This report has been generated by the nf-core/rnafusion + This report has been generated by the nf-core/rnafusion analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: - "nf-core-rnafusion-methods-description": + nf-core-rnafusion-methods-description: order: -1000 software_versions: order: -1001 - "nf-core-rnafusion-summary": + nf-core-rnafusion-summary: order: -1002 export_plots: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - fastp + - star + - samtools + - picard + - arriba + +module_order: + - fastp + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.zip" + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*_trimmed*.zip" diff --git a/bin/megafusion.py b/bin/megafusion.py deleted file mode 100755 index 76872b57..00000000 --- a/bin/megafusion.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python - -import argparse -import logging -import sys -from pathlib import Path -import pandas as pd -import ast - -logger = logging.getLogger() - -FUSIONINSPECTOR_MAP = { - "fusion": {"column": 0, "delimiter": "\t", "element": 0}, - "chromosomeA": {"column": 7, "delimiter": ":", "element": 0}, - "chromosomeB": {"column": 10, "delimiter": ":", "element": 0}, - "posA": {"column": 7, "delimiter": ":", "element": 1}, - "posB": {"column": 10, "delimiter": ":", "element": 1}, - "strand1": {"column": 7, "delimiter": ":", "element": 2}, - "strand2": {"column": 10, "delimiter": ":", "element": 2}, - "geneA": {"column": 0, "delimiter": "--", "element": 0}, - "geneB": {"column": 0, "delimiter": "--", "element": 1}, - "split_reads": {"column": 1, "delimiter": "\t", "element": 0}, - "discordant_pairs": {"column": 2, "delimiter": "\t", "element": 0}, - "ffpm": {"column": 25, "delimiter": "\t", "element": 0}, -} - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "--fusioninspector", - metavar="FUSIONINSPECTOR", - type=Path, - help="FusionInspector output in TSV format.", - ) - parser.add_argument( - "--fusionreport", - metavar="FUSIONREPORT", - type=Path, - help="Fusionreport output in TSV format.", - ) - parser.add_argument("--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample") - parser.add_argument( - "--out", - metavar="OUT", - type=Path, - help="Output path.", - ) - return parser.parse_args(argv) - - -def header_def(sample): - return '##fileformat=VCFv4.1\n\ -##ALT=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##INFO=\n\ -##FORMAT=\n\ -##FORMAT=\n\ -##FORMAT=\n\ -##FORMAT=\n\ -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( - sample - ) - - -def read_fusioninspector(fusioninspector_file, col_num, delimiter, element): - with open(fusioninspector_file) as fusioninspector: - return [line.split()[col_num].split(delimiter)[element] for line in fusioninspector if not line.startswith("#")] - - -def build_fusioninspector_dataframe(file, map): - new_dict = {} - for key in FUSIONINSPECTOR_MAP: - new_dict[key] = read_fusioninspector( - file, - map[key]["column"], - map[key]["delimiter"], - map[key]["element"], - ) - return pd.DataFrame.from_dict(new_dict).set_index("fusion") - - -def read_build_fusionreport(fusionreport_file): - with open(fusionreport_file) as f: - from_html = [line.split('rows": [')[1] for line in f if 'name="fusion_list' in line] - expression = from_html[0].split('], "tool')[0] - fusion_report = pd.DataFrame.from_dict(ast.literal_eval(expression)).set_index("fusion") - if not "arriba" in fusion_report.columns: - fusion_report["arriba"] = "" - if not "fusioncatcher" in fusion_report.columns: - fusion_report["fusioncatcher"] = "" - if not "pizzly" in fusion_report.columns: - fusion_report["pizzly"] = "" - if not "squid" in fusion_report.columns: - fusion_report["squid"] = "" - if not "starfusion" in fusion_report.columns: - fusion_report["starfusion"] = "" - return fusion_report - - -def column_manipulation(df): - df["ALT"] = "" - df = df.reset_index() - df["FORMAT"] = "GT:DV:RV:FFPM" - df["ID"] = "." - df["QUAL"] = "." - df["FILTER"] = "PASS" - df["REF"] = "N" - df["INFO"] = "" - df["Sample"] = "" - - for index, row in df.iterrows(): - # ALT - if not row["strand1"] in ["+", "-"] or not row["strand2"] in ["+", "-"]: - df.loc[index, "ALT"] = "N[{}:{}[".format(df["chromosomeB"], row["posB"]) - elif row["strand1"] == "-" and row["strand2"] == "-": - df.loc[index, "ALT"] = "[{}:{}[N".format(row["chromosomeB"], row["posB"]) - elif row["strand1"] == "+" and row["strand2"] == "-": - df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) - elif row["strand1"] == "-" and row["strand2"] == "+": - df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) - else: - df.loc[index, "ALT"] = "N[{}:{}[".format(row["chromosomeB"], row["posB"]) - # INFO - df.loc[index, "INFO"] = ( - "SVTYPE=BND;CHRA={};CHRB={};GENEA={};GENEB={};ORIENTATION={},{};FOUND_DB={};" - "ARRIBA={};FUSIONCATCHER={};PIZZLY={};SQUID={};STARFUSION={};TOOL_HITS={};SCORE={}".format( - row["chromosomeA"], - row["chromosomeB"], - row["geneA"], - row["geneB"], - row["strand1"], - row["strand2"], - row["found_db"], - row["arriba"], - row["fusioncatcher"], - row["pizzly"], - row["squid"], - row["starfusion"], - row["tools_hits"], - row["score"], - ) - ) - # FORMAT - df.loc[index, "Sample"] = "./1:{}:{}:{}".format(row["split_reads"], row["discordant_pairs"], row["ffpm"]) - return df - - -def write_vcf(df_to_print, header, out_file): - df_to_print[ - [ - "chromosomeA", - "posA", - "ID", - "REF", - "ALT", - "QUAL", - "FILTER", - "INFO", - "FORMAT", - "Sample", - ] - ].to_csv( - path_or_buf=out_file, - sep="\t", - header=None, - index=False, - ) - - with open(out_file, "r+") as f: - content = f.read() - f.seek(0, 0) - f.write(header.rstrip("\r\n") + "\n" + content) - - -def megafusion(fusioninspector_in_file, fusionreport_in_file, sample, out): - """Convert fusion information from FusionInspector and fusion-report into a vcf file. Adapted from https://github.com/J35P312/MegaFusion""" - merged_df = build_fusioninspector_dataframe(fusioninspector_in_file, FUSIONINSPECTOR_MAP).join( - read_build_fusionreport(fusionreport_in_file), how="left" - ) - write_vcf(column_manipulation(merged_df), header_def(sample), out) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - if not args.fusioninspector.is_file() or not args.fusionreport.is_file(): - logger.error(f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!") - sys.exit(2) - megafusion(args.fusioninspector, args.fusionreport, args.sample, args.out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/vcf_collect.py b/bin/vcf_collect.py new file mode 100755 index 00000000..2401d2e8 --- /dev/null +++ b/bin/vcf_collect.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python + +import argparse +import logging +import sys +from pathlib import Path +import pandas as pd +import ast +import numpy as np +import csv + +logger = logging.getLogger() + + +def vcf_collect( + fusioninspector_in_file: str, + fusionreport_in_file: str, + gtf: str, + fusionreport_csv: str, + hgnc: str, + sample: str, + out_file, +) -> None: + """ + Process FusionInspector and FusionReport data, + merge with GTF from FusionInspector and HGNC database, + and write a VCF file. + + Args: + fusioninspector_in_file (str): Path to FusionInspector input file. + fusionreport_in_file (str): Path to Fusion-report input file. + sample (str): Sample name for the header. + hgnc (str): Path to HGNC file. + gtf (str): Path to output GTF file from FusionInspector in TSV format. + fusionreport_csv (str): Path to Fusion-report CSV output file. + out (str): Output VCF file path. + + Adapted from: https://github.com/J35P312/MegaFusion + """ + merged_df = ( + build_fusioninspector_dataframe(fusioninspector_in_file) + .join(read_build_fusionreport(fusionreport_in_file), how="outer", on="FUSION") + .reset_index() + ) + hgnc_df = build_hgnc_dataframe(hgnc) + + df_symbol = merged_df[merged_df["Left_ensembl_gene_id"].isna()] + df_not_symbol = merged_df[merged_df["Left_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, how="right", left_on="ensembl_gene_id", right_on="Left_ensembl_gene_id" + ) + df_symbol = hgnc_df.merge(df_symbol, how="right", left_on="symbol", right_on="GeneA") + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Left_hgnc_id"}) + + df_symbol = df[df["Right_ensembl_gene_id"].isna()] + df_not_symbol = df[df["Right_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, how="right", left_on="ensembl_gene_id", right_on="Right_ensembl_gene_id" + ) + df_symbol = hgnc_df.merge(df_symbol, how="right", left_on="symbol", right_on="GeneB") + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Right_hgnc_id"}) + + gtf_df = build_gtf_dataframe(gtf) + all_df = df.merge(gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id") + all_df[["PosA", "orig_start", "orig_end"]] = all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int) + + all_df = all_df[ + ((all_df["PosA"] >= all_df["orig_start"]) & (all_df["PosA"] <= all_df["orig_end"])) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df.replace("", np.nan, inplace=True) + all_df = all_df.drop_duplicates() + + all_df[["exon_number", "transcript_version"]] = all_df[["exon_number", "transcript_version"]].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosA")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosA")["transcript_version"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + + all_df = all_df.rename(columns={"transcript_version": "Left_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Left_exon_number"}) + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df = all_df.merge(gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id") + all_df[["PosB", "orig_start", "orig_end"]] = all_df[["PosB", "orig_start", "orig_end"]].fillna(0) + all_df[["PosB", "orig_start", "orig_end"]] = all_df[["PosB", "orig_start", "orig_end"]].astype(int) + all_df = all_df[ + ((all_df["PosB"] >= all_df["orig_start"]) & (all_df["PosB"] <= all_df["orig_end"])) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan) + all_df = all_df.replace("", np.nan) + + all_df[["exon_number", "transcript_version"]] = all_df[["exon_number", "transcript_version"]].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosB")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosB")["transcript_version"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + + all_df = all_df.rename(columns={"transcript_version": "Right_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Right_exon_number"}) + + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_transcript_version", + "Right_exon_number", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df = all_df.rename(columns={"FUSION": "Fusion"}) + all_df = all_df.set_index("Fusion") + + all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv)) + + return write_vcf(column_manipulation(all_df), header_def(sample), out_file) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "--fusioninspector", + metavar="FUSIONINSPECTOR", + type=Path, + help="FusionInspector output in TSV format.", + ) + parser.add_argument( + "--fusionreport", + metavar="FUSIONREPORT", + type=Path, + help="Fusionreport output in index/html format.", + ) + parser.add_argument( + "--fusionreport_csv", + metavar="FUSIONREPORT_CSV", + type=Path, + help="Fusionreport output in CSV format.", + ) + parser.add_argument( + "--fusioninspector_gtf", + metavar="GTF", + type=Path, + help="FusionInspector GTF output.", + ) + parser.add_argument( + "--hgnc", + metavar="HGNC", + type=Path, + help="HGNC database.", + ) + parser.add_argument("--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample") + parser.add_argument( + "--out", + metavar="OUT", + type=Path, + help="VCF output path.", + ) + return parser.parse_args(argv) + + +def header_def(sample: str) -> str: + """ + Define the header of the VCF file + """ + return '##fileformat=VCFv4.1\n\ +##ALT=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( + sample + ) + + +def convert_to_list(annots_str: str) -> list: + try: + return ast.literal_eval(annots_str) + except (SyntaxError, ValueError): + return np.nan + + +def build_fusioninspector_dataframe(file: str) -> pd.DataFrame: + """ + Read FusionInspector output from a CSV file, preprocess the data, and set 'FUSION' as the index. + """ + df = pd.read_csv(file, sep="\t") + df = df.rename(columns={"#FusionName": "FUSION"}) + df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split(":", expand=True) + df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split(":", expand=True) + df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split("^", expand=True) + df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split("^", expand=True) + df["annots"] = ( + df["annots"] + .apply(convert_to_list) + .apply(lambda x: ",".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "") + ) + return df.set_index(["FUSION"]) + + +def replace_value_with_column_name(row: pd.Series, value_to_replace: str, column_name: str) -> str: + """ + Replace a specific value in a row with the corresponding column name. + """ + new_values = "" + for col_name, value in row.items(): + if col_name == column_name: + if value == value_to_replace: + new_values = col_name + else: + new_values = "" + return new_values + + +def concatenate_columns(row: pd.Series) -> str: + """ + Concatenate non-empty values in a row into a single string separated by commas. + """ + non_empty_values = [str(value) for value in row if value != ""] + return ",".join(non_empty_values) + + +def read_build_fusionreport(fusionreport_file: str) -> pd.DataFrame: + """ + Read and preprocess fusion-report data from a file, including handling missing tool columns, + getting the columns with each tool and create a new FOUND_IN column with all the tool hits. + Convert the list of databases in FOUND_DB into a joined string with a comma separator. + Make all column headers uppercase. + """ + with open(fusionreport_file) as f: + from_html = [line.split('rows": [')[1] for line in f if 'name="fusion_list' in line] + expression = from_html[0].split('], "tool')[0] + fusion_report = pd.DataFrame.from_dict(ast.literal_eval(expression)) + if not "arriba" in fusion_report.columns: + fusion_report["arriba"] = "" + if not "fusioncatcher" in fusion_report.columns: + fusion_report["fusioncatcher"] = "" + if not "starfusion" in fusion_report.columns: + fusion_report["starfusion"] = "" + fusion_report["arriba"] = fusion_report[["arriba"]].apply( + replace_value_with_column_name, args=("true", "arriba"), axis=1 + ) + fusion_report["fusioncatcher"] = fusion_report[["fusioncatcher"]].apply( + replace_value_with_column_name, args=("true", "fusioncatcher"), axis=1 + ) + fusion_report["starfusion"] = fusion_report[["starfusion"]].apply( + replace_value_with_column_name, args=("true", "starfusion"), axis=1 + ) + fusion_report["FOUND_IN"] = fusion_report[["arriba", "starfusion", "fusioncatcher"]].apply( + concatenate_columns, axis=1 + ) + fusion_report.columns = fusion_report.columns.str.upper() + fusion_report["FOUND_DB"] = fusion_report["FOUND_DB"].apply(lambda x: ",".join(x)) + fusion_report[["GeneA", "GeneB"]] = fusion_report["FUSION"].str.split("--", expand=True) + + return fusion_report[["FUSION", "GeneA", "GeneB", "TOOLS_HITS", "SCORE", "FOUND_DB", "FOUND_IN"]].set_index( + ["FUSION"] + ) + + +def read_fusionreport_csv(file: str) -> pd.DataFrame: + df = pd.read_csv(file) + df[["starfusion", "arriba", "fusioncatcher"]] = df[["starfusion", "arriba", "fusioncatcher"]].astype("str") + columns_to_iterate = ["starfusion", "arriba", "fusioncatcher"] + for index, row in df.iterrows(): + for column in columns_to_iterate: + cell_value = row[column] + + if "#" in cell_value: + df.at[index, column] = df.at[index, column].split(",")[0] + df.at[index, column] = df.at[index, column].replace("position: ", "") + df.at[index, "A"] = df.at[index, column].split("#")[0] + df.at[index, "B"] = df.at[index, column].split("#")[1] + df.at[index, "ChromosomeA"] = df.at[index, "A"].split(":")[0] + df.at[index, "PosA"] = df.at[index, "A"].split(":")[1] + if "+" in df.at[index, "A"] or "-" in df.at[index, "A"]: + df.at[index, "StrandA"] = df.at[index, "A"].split(":")[2] + else: + df.at[index, "StrandA"] = "" + + df.at[index, "ChromosomeB"] = df.at[index, "B"].split(":")[0] + df.at[index, "PosB"] = df.at[index, "B"].split(":")[1] + if "+" in df.at[index, "B"] or "-" in df.at[index, "B"]: + df.at[index, "StrandB"] = df.at[index, "B"].split(":")[2] + else: + df.at[index, "StrandB"] = "" + + break + df[["GeneA", "GeneB"]] = df["Fusion"].str.split("--", expand=True) + df = df.set_index("Fusion") + df.to_csv("tmp.csv") + return df[["GeneA", "GeneB", "ChromosomeA", "PosA", "StrandA", "ChromosomeB", "PosB", "StrandB"]] + + +def column_manipulation(df: pd.DataFrame) -> pd.DataFrame: + """ + Manipulate and prepare DataFrame for VCF file creation. + """ + df["ALT"] = "" + df = df.reset_index() + df["FORMAT"] = "GT:DV:RV:FFPM" + df["ID"] = "." + df["QUAL"] = "." + df["FILTER"] = "PASS" + df["REF"] = "N" + df["INFO"] = "" + df["Sample"] = "" + df["Strand1"] = df["Strand1"].astype(str) + df["JunctionReadCount"] = df["JunctionReadCount"].fillna(0).astype(int).astype(str) + df["SpanningFragCount"] = df["SpanningFragCount"].fillna(0).astype(int).astype(str) + df["FFPM"] = df["FFPM"].fillna(0).astype(float).astype(str) + df["ChromosomeA"] = df["ChromosomeA"].fillna(0).astype(str) + df["ChromosomeB"] = df["ChromosomeB"].fillna(0).astype(str) + df["Left_hgnc_id"] = df["Left_hgnc_id"].fillna(0).astype(int).astype(str) + df["Right_hgnc_id"] = df["Right_hgnc_id"].fillna(0).astype(int).astype(str) + df["Left_exon_number"] = df["Left_exon_number"].fillna(0).astype(int).astype(str) + df["Right_exon_number"] = df["Right_exon_number"].fillna(0).astype(int).astype(str) + df["Left_transcript_version"] = df["Left_transcript_version"].fillna(0).astype(int).astype(str) + df["Right_transcript_version"] = df["Right_transcript_version"].fillna(0).astype(int).astype(str) + df["PosA"] = df["PosA"].fillna(0).astype(int).astype(str) + df["PosB"] = df["PosB"].fillna(0).astype(int).astype(str) + df["PROT_FUSION_TYPE"] = df["PROT_FUSION_TYPE"].replace(".", "nan") + df["CDS_LEFT_ID"] = df["CDS_LEFT_ID"].replace(".", "nan") + df["CDS_RIGHT_ID"] = df["CDS_RIGHT_ID"].replace(".", "nan") + + for index, row in df.iterrows(): + if row["Strand1"] == "-" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'[{row["ChromosomeB"]}:{row["PosB"]}[N' + elif row["Strand1"] == "+" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + elif row["Strand1"] == "-" and row["Strand2"] == "+": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + else: + df.loc[index, "ALT"] = f'N[{row["ChromosomeB"]}:{row["PosB"]}[' + + df.loc[index, "INFO"] = ( + f"SVTYPE=BND;CHRA={row['ChromosomeA']};CHRB={row['ChromosomeB']};GENEA={row['GeneA']};GENEB={row['GeneB']};" + f"POSA={row['PosA']};POSB={row['PosB']};ORIENTATION={row['Strand1']},{row['Strand2']};FOUND_DB={row['FOUND_DB']};" + f"FOUND_IN={row['FOUND_IN']};TOOL_HITS={row['TOOLS_HITS']};SCORE={row['SCORE']};FRAME_STATUS={row['PROT_FUSION_TYPE']};" + f"TRANSCRIPT_ID_A={row['CDS_LEFT_ID']};TRANSCRIPT_ID_B={row['CDS_RIGHT_ID']};" + f"TRANSCRIPT_VERSION_A={row['Left_transcript_version']};TRANSCRIPT_VERSION_B={row['Right_transcript_version']};" + f"HGNC_ID_A={row['Left_hgnc_id']};HGNC_ID_B={row['Right_hgnc_id']};" + f"EXON_NUMBER_A={row['Left_exon_number']};EXON_NUMBER_B={row['Right_exon_number']};" + f"ANNOTATIONS={row['annots']}" + ) + df.loc[index, "Sample"] = f"./1:{row['JunctionReadCount']}:{row['SpanningFragCount']}:{row['FFPM']}" + + return df + + +def write_vcf(df_to_print: pd.DataFrame, header: str, out_file: str) -> None: + """ + Write a VCF file with a specified DataFrame, header, and output file path. + """ + df_to_print[ + [ + "ChromosomeA", + "PosA", + "ID", + "REF", + "ALT", + "QUAL", + "FILTER", + "INFO", + "FORMAT", + "Sample", + ] + ].to_csv(path_or_buf=out_file, sep="\t", header=None, index=False, quoting=csv.QUOTE_NONE) + + with open(out_file, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(header.rstrip("\r\n") + "\n" + content) + + +def build_hgnc_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from HGNC input file, extracting 'hgnc_id' and 'ensembl_gene_id' columns. + """ + df = pd.read_csv(file, sep="\t", low_memory=False) + df["hgnc_id"] = df["hgnc_id"].str.replace("HGNC:", "") + return df[["hgnc_id", "ensembl_gene_id", "symbol"]].dropna() + + +def build_gtf_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from GTF file converted in TSV, extracting relevant columns. + """ + df = pd.read_csv(file, sep="\t") + df[["fusion_dump", "Transcript_id"]] = df["transcript_id"].str.split("^", expand=True) + df[["orig_chromosome", "orig_start", "orig_end", "orig_dir"]] = df["orig_coord_info"].str.split(",", expand=True) + return df[["Transcript_id", "transcript_version", "exon_number", "orig_start", "orig_end"]] + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if ( + not args.fusioninspector.is_file() + or not args.fusionreport.is_file() + or not args.fusioninspector_gtf + or not args.fusionreport_csv + or not args.hgnc + ): + logger.error(f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!") + sys.exit(2) + vcf_collect( + args.fusioninspector, + args.fusionreport, + args.fusioninspector_gtf, + args.fusionreport_csv, + args.hgnc, + args.sample, + args.out, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/genomes.config b/conf/genomes.config index e7a01b6d..c1385ebe 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -18,7 +18,6 @@ params { transcript = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.cdna.all.fa.gz" refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf.refflat" rrna_intervals= "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" - } } } diff --git a/conf/modules.config b/conf/modules.config index b4dc96d6..e4ae2bb8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -68,12 +68,18 @@ process { withName: FASTQC { ext.args = '--quiet' ext.when = { !params.skip_qc } + publishDir = [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - withName: FASTQC_FOR_TRIM { + withName: FASTQC_FOR_FASTP { ext.args = '--quiet' + ext.prefix = { "${meta.id}_trimmed" } publishDir = [ - path: { "${params.outdir}/fastqc_for_trim" }, + path: { "${params.outdir}/fastqc_for_fastp" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -94,13 +100,12 @@ process { withName: FUSIONINSPECTOR { ext.when = { !params.skip_vis } ext.args = { params.fusioninspector_limitSjdbInsertNsj != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } - + ext.args2 = '--annotate --examine_coding_effect' } withName: FUSIONREPORT { ext.when = { !params.skip_vis } ext.args = "--export csv" - ext.args2 = { params.fusionreport_filter ? "--tool-cutoff 2" : "--tool-cutoff 1"} publishDir = [ path: { "${params.outdir}/fusionreport/${meta.id}" }, mode: params.publish_dir_mode, @@ -133,23 +138,22 @@ process { ] } - withName: KALLISTO_INDEX { - ext.args = '-k 31' + withName: HGNC_DOWNLOAD { publishDir = [ - path: { "${params.genomes_base}/pizzly" }, + path: { "${params.genomes_base}/hgnc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - withName: MEGAFUSION { - ext.when = {!params.fusioninspector_only} - ext.prefix = { "${meta.id}_fusion_data" } - } - - withName: MULTIQC { ext.when = { !params.skip_qc } + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: PICARD_COLLECTRNASEQMETRICS { @@ -157,39 +161,28 @@ process { } - withName: PICARD_MARKDUPLICATES { + withName: GATK4_MARKDUPLICATES { ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } - } - - withName: PIZZLY { - ext.args = "-k 31 --align-score 2 --insert-size 400 --cache index.cache.txt" publishDir = [ - path: { "${params.outdir}/pizzly" }, + path: { "${params.outdir}/picard" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - withName: QUALIMAP_RNASEQ { - ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all)} - } - - withName: REFORMAT { - ext.args = "forcetrimright=75" - ext.args2 = "forcetrimleft=75" - } - - withName: SAMPLESHEET_CHECK { + withName: PICARD_COLLECTINSERTSIZEMETRICS { + ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + ext.prefix = { "${meta.id}_collectinsertsize"} publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/picard" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - withName: SAMTOOLS_INDEX_FOR_STARFUSION { + withName: SAMPLESHEET_CHECK { publishDir = [ - path: { "${params.outdir}/star_for_starfusion" }, + path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -203,18 +196,17 @@ process { ] } - withName: SAMTOOLS_SORT_FOR_SQUID_CHIMERIC { - ext.prefix = { "${meta.id}_chimeric_sorted" } + withName: SAMTOOLS_INDEX_FOR_ARRIBA { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } publishDir = [ - path: { "${params.outdir}/samtools_sort_for_squid_chimeric" }, + path: { "${params.outdir}/cram_arriba" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: SAMTOOLS_VIEW_FOR_ARRIBA { - ext.args = { "--output-fmt cram" } - ext.prefix = { "${meta.id}_star_for_arriba" } + withName: SAMTOOLS_SORT_FOR_ARRIBA { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } publishDir = [ path: { "${params.outdir}/cram_arriba" }, mode: params.publish_dir_mode, @@ -222,30 +214,28 @@ process { ] } - withName: SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC { - ext.prefix = { "${meta.id}_chimeric" } - ext.args = { "--output-fmt bam" } + withName: SAMTOOLS_VIEW_FOR_ARRIBA { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } publishDir = [ - path: { "${params.outdir}/samtools_view_for_squid_chimeric" }, + path: { "${params.outdir}/cram_arriba" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: SAMTOOLS_VIEW_FOR_SQUID_CRAM { - ext.args = { "--output-fmt cram" } + withName: SAMTOOLS_INDEX_FOR_STARFUSION { publishDir = [ - path: { "${params.outdir}/cram_squid" }, + path: { "${params.outdir}/star_for_starfusion" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC { - ext.args = { "--output-fmt cram" } - ext.prefix = { "${meta.id}_chimeric" } + withName: SAMTOOLS_INDEX_FOR_STARFUSION_CRAM { + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } publishDir = [ - path: { "${params.outdir}/cram_squid" }, + path: { "${params.outdir}/cram_starfusion" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -285,23 +275,6 @@ process { --chimMultimapNmax 50' } - withName: STAR_FOR_SQUID { - publishDir = [ - path: { "${params.outdir}/star_for_squid" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - ext.args = '--twopassMode Basic \ - --chimOutType SeparateSAMold \ - --chimSegmentMin 20 \ - --chimJunctionOverhangMin 12 \ - --alignSJDBoverhangMin 10 \ - --outReadsUnmapped Fastx \ - --outSAMstrandField intronMotif \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat' - } - withName: STAR_FOR_STARFUSION { publishDir = [ path: { "${params.outdir}/star_for_starfusion" }, @@ -375,4 +348,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: VCF_COLLECT { + ext.when = {!params.fusioninspector_only} + } } diff --git a/containers/arriba/Dockerfile b/containers/arriba/Dockerfile deleted file mode 100644 index ea36d90f..00000000 --- a/containers/arriba/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM nfcore/base:1.9 - -LABEL authors="Martin Proks" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-rnafusion-arriba_1.2.0/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-rnafusion-arriba_1.2.0 > nf-core-rnafusion-arriba_1.2.0.yml diff --git a/containers/arriba/environment.yml b/containers/arriba/environment.yml deleted file mode 100644 index 20a1024e..00000000 --- a/containers/arriba/environment.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: nf-core-rnafusion-arriba_1.2.0 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::arriba=1.2.0 - - bioconda::bioconductor-genomicalignments - - bioconda::bioconductor-genomicranges - - bioconda::samtools=1.9 - - bioconda::star=2.7.1a - - conda-forge::openssl=1.0 - - conda-forge::r-circlize - - conda-forge::readline=6.2 diff --git a/containers/ericscript/Dockerfile b/containers/ericscript/Dockerfile deleted file mode 100644 index 70b87c48..00000000 --- a/containers/ericscript/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM nfcore/base:1.9 - -LABEL authors="Martin Proks" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-rnafusion-ericscript_0.5.5/bin:$PATH - -# Ignore database check (https://github.com/nf-core/rnafusion/issues/119) -RUN echo 1 > /opt/conda/envs/nf-core-rnafusion-ericscript_0.5.5/share/ericscript-0.5.5-4/lib/data/_resources/.flag.dbexists - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-rnafusion-ericscript_0.5.5 > nf-core-rnafusion-ericscript_0.5.5.yml diff --git a/containers/ericscript/environment.yml b/containers/ericscript/environment.yml deleted file mode 100644 index 1e76273c..00000000 --- a/containers/ericscript/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: nf-core-rnafusion-ericscript_0.5.5 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::ericscript=0.5.5 - - conda-forge::ncurses=6.1 diff --git a/containers/fusioncatcher/Dockerfile b/containers/fusioncatcher/Dockerfile deleted file mode 100644 index 77efaf57..00000000 --- a/containers/fusioncatcher/Dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -FROM ubuntu:18.04 - -LABEL Description="This image is used to run FusionCatcher" Version="1.33" - -RUN apt-get -y clean \ - && apt-get -y update \ - && apt-get -y install \ - automake \ - build-essential \ - bzip2 \ - cmake \ - curl \ - g++ \ - gawk \ - gcc \ - gzip \ - libc6-dev \ - libncurses5-dev \ - libtbb2 \ - libtbb-dev \ - make \ - parallel \ - pigz \ - python \ - python-dev \ - python-biopython \ - python-numpy \ - python-openpyxl \ - python-xlrd \ - tar \ - unzip \ - wget \ - zip \ - zlib1g \ - zlib1g-dev \ - zlibc \ - default-jdk \ - && apt-get -y clean - -WORKDIR /opt - -###################### -## INSTALLATION -###################### - -RUN wget --no-check-certificate http://sf.net/projects/fusioncatcher/files/bootstrap.py -O bootstrap.py \ - && python bootstrap.py -t -y -i /opt/fusioncatcher/v1.33/ - -ENV PATH /opt/fusioncatcher/v1.33/bin:$PATH diff --git a/containers/pizzly/Dockerfile b/containers/pizzly/Dockerfile deleted file mode 100644 index 0056bf9b..00000000 --- a/containers/pizzly/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM nfcore/base:1.9 - -LABEL authors="Martin Proks" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-rnafusion-pizzly_0.37.3/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-rnafusion-pizzly_0.37.3 > nf-core-rnafusion-pizzly_0.37.3.yml diff --git a/containers/pizzly/environment.yml b/containers/pizzly/environment.yml deleted file mode 100644 index 79974871..00000000 --- a/containers/pizzly/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: nf-core-rnafusion-pizzly_0.37.3 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::kallisto=0.44.0 - - bioconda::pizzly=0.37.3 - - conda-forge::pigz=2.3.4 diff --git a/containers/squid/Dockerfile b/containers/squid/Dockerfile deleted file mode 100644 index 20d390ee..00000000 --- a/containers/squid/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM nfcore/base:1.9 - -LABEL authors="Martin Proks" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin:$PATH - -RUN cd /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin \ - && wget https://raw.githubusercontent.com/Kingsford-Group/squid/f45c9025d41cffd982ecbbdd52844e5a4f074de9/utils/AnnotateSQUIDOutput.py \ - && chmod +x /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin/AnnotateSQUIDOutput.py \ - && ln -s /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin/python3 /bin/python - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-rnafusion-squid_1.5-star2.7.1a > nf-core-rnafusion-squid_1.5-star2.7.1a.yml diff --git a/containers/squid/environment.yml b/containers/squid/environment.yml deleted file mode 100644 index 7385b163..00000000 --- a/containers/squid/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: nf-core-rnafusion-squid_1.5-star2.7.1a -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::samtools=1.9 - - bioconda::squid=1.5 - - bioconda::star=2.7.1a - - conda-forge::numpy=1.18.1 - - conda-forge::python=3.7.6 diff --git a/docs/images/nf-core-rnafusion_metro_map.png b/docs/images/nf-core-rnafusion_metro_map.png index 314b61a2..76b6bc3f 100644 Binary files a/docs/images/nf-core-rnafusion_metro_map.png and b/docs/images/nf-core-rnafusion_metro_map.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.svg b/docs/images/nf-core-rnafusion_metro_map.svg index 38eb709b..1cd7980f 100644 --- a/docs/images/nf-core-rnafusion_metro_map.svg +++ b/docs/images/nf-core-rnafusion_metro_map.svg @@ -26,13 +26,13 @@ inkscape:pagecheckerboard="0" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="1.1491182" - inkscape:cx="311.10813" - inkscape:cy="413.79557" - inkscape:window-width="1440" - inkscape:window-height="847" - inkscape:window-x="0" - inkscape:window-y="25" + inkscape:zoom="0.79542318" + inkscape:cx="538.70696" + inkscape:cy="330.01301" + inkscape:window-width="1366" + inkscape:window-height="1051" + inkscape:window-x="696" + inkscape:window-y="83" inkscape:window-maximized="0" inkscape:current-layer="layer1" inkscape:snap-grids="false"> @@ -64,7 +64,7 @@ style="font-size:32px;line-height:1.25;stroke-width:2.10014px" /> + transform="translate(-23.347973,0.557051)"> + - - - - - - align Arriba - SQUID + x="104.54099" + y="48.143513">StringTie pizzly + x="136.53439" + y="65.475723">Arriba - align kallisto + x="81.461792" + y="65.99041">align hardtrimming - fastpfastptrimming FusionCatcher align - - - - - - + sodipodi:nodetypes="ccc" /> STAR-Fusion FastQC Qualimap - MultiQC FusionInspector + x="220.19096" + y="96.223259">FusionInspector fusion-report + x="196.76082" + y="64.750114">fusion-report PicardPicard:CollectRnaSeqMetrics- CollectRnaSeqMetrics+- CollectWgsMetricsPicardCollectWgsMetrics - SQUIDannotate + id="tspan23187">- CollectInsertSizeMetrics - - - - + style="fill:none;stroke:#000000;stroke-width:2.42956;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + d="m 261.03273,71.013135 h -7.93593 c -3.94345,0 -7.79288,2.347188 -10.58131,4.962803 l -2.64532,2.481411" + id="path1232-7-2-7-3-7" + sodipodi:nodetypes="cssc" /> + + transform="translate(-10.236147,12.330532)"> qc Workflows: - - StringTie - + transform="translate(-9.762502,12.377831)"> - + sodipodi:nodetypes="ccc" /> squid - pizzly - stringtie - + y="147.22173">stringtie ArribaArribavisualisation + VCFcollect + + diff --git a/docs/output.md b/docs/output.md index 4144891b..dede2c87 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,11 +11,9 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Download and build references](#references) - Build references needed to run the rest of the pipeline -- [STAR](#star) - Alignment for arriba, squid and STAR-fusion +- [STAR](#star) - Alignment for arriba, and STAR-fusion - [Cat](#cat) - Concatenate fastq files per sample ID - [Arriba](#arriba) - Arriba fusion detection -- [Pizzly](#pizzly) - Pizzly fusion detection -- [Squid](#squid) - Squid fusion detection - [STAR-fusion](#starfusion) - STAR-fusion fusion detection - [StringTie](#stringtie) - StringTie assembly - [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection @@ -23,7 +21,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman, FusionGBD and FusionGDB2 databases - [FusionInspector](#fusionInspector) - Supervised analysis of fusion predictions from fusion-report, recover and re-score evidence for such predictions - [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report for FusionInspector fusions -- [Qualimap](#qualimap) - Quality control of alignments - [Picard](#picard) - Collect QC metrics - [FastQC](#fastqc) - Raw read quality control - [MultiQC](#multiqc) - Aggregate reports describing QC results from the whole pipeline @@ -55,8 +52,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `fusiongdb.db` - `fusiongdb2.db` - `mitelman.db` - - `pizzly` - - `kallisto` - file containing the kallisto index - `star` - dir with STAR index - `starfusion` - files and dirs used to build the index @@ -78,7 +73,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ├── arriba_visualisation ├── cram_arriba ├── cram_starfusion -├── cram_squid ├── fastp ├── fastqc ├── fusioncatcher @@ -88,15 +82,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ├── megafusion ├── multiqc ├── picard -├── pizzly ├── pipeline_info -├── pizzly -├── qualimap ├── samtools_sort_for_arriba -├── squid ├── star_for_arriba ├── star_for_starfusion -├── star_for_squid ├── starfusion └── work .nextflow.log @@ -129,6 +118,8 @@ If no parameters are specified, the default is applied. +The visualisation displays the fusions that fusioninspector outputs. That means that fusions from all callers are aggregated (by fusion-report) and then analyzed through fusioninspector (Note: Fusioninspecor contains a filtering step!). + ### Cat
@@ -177,7 +168,9 @@ If `--trim_fastp` is selected, [fastp](https://github.com/OpenGene/fastp) will f ![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: ### FusionCatcher @@ -209,6 +202,8 @@ If `--trim_fastp` is selected, [fastp](https://github.com/OpenGene/fastp) will f ### Fusion-report +Please note that fusion-report is executed from fork https://github.com/Clinical-Genomics/fusion-report +
Output files @@ -223,9 +218,21 @@ If `--trim_fastp` is selected, [fastp](https://github.com/OpenGene/fastp) will f
[Fusion-report](https://github.com/matq007/fusion-report) is a tool for parsing outputs from fusion detection tools. -The score is explained [on the original fusion-report github page](https://matq007.github.io/fusion-report/#/score). +The score is explained here: . Summary: + +The weights for databases are as follows: + +- COSMIC (50) +- MITELMAN (50) +- FusionGDB2 (0) -`--fusionreport_filter` can be used to filter the output of fusion-report to fusions identified by at least 2 different tools. +The final formula for calculating score is: + +$$ +score = 0.5 * \sum_{tool}^{tools} f(fusion, tool)*w(tool) + 0.5 * \sum_{db}^{dbs} g(fusion, db)*w(db) +$$ + +All tools have the same weight. ### Kallisto @@ -239,14 +246,16 @@ The score is explained [on the original fusion-report github page](https://matq0 Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. -### Megafusion +### Vcf_collect
Output files -- `megafusion` +- `vcf_collect` - `_fusion_data.vcf` - contains the fusions in vcf format with collected statistics. +Vcf-collect takes as input the results of fusion-report and fusioninspector. That means fusions from all tools are aggregated. Fusioninspector applies a filter so it is possible some fusions detected by a caller are not filtered out by fusioninspector. In those cases, vcf-collect will display the fusions, but a lot of data will be missing as fusioninspector performs the analysis for each fusion. +
[Megafusion](https://github.com/J35P312/MegaFusion) converts RNA fusion files to SV VCF and collects statistics and metrics in a VCF file. @@ -277,66 +286,21 @@ Picard CollectRnaMetrics and picard MarkDuplicates share the same output directo - `picard` - `.MarkDuplicates.metrics.txt` - metrics from MarkDuplicates - `_rna_metrics.txt` - metrics from CollectRnaMetrics + - `_insert_size_metrics.txt.txt` - metrics from CollectInsertSizeMetrics - `.bam` - BAM file with marked duplicates
-#### Pizzly - -Pizzly uses the following arguments: - -```bash --k 31 \ ---align-score 2 \ ---insert-size 400 \ ---cache index.cache.txt -``` - -
-Output files - -- `pizzly` - - `.pizzly.txt` - contains the identified fusions - - `.pizzly.unfiltered.json` - -
- -### Qualimap - -
-Output files - -- `qualimap` - - `qualimapReport.html` - HTML report - - `rnaseq_qc_results.txt` - TXT results - - `css` - dir for html style - - `images_qualimapReport`- dir for html images - - `raw_data_qualimapReport` - dir for html raw data - -
- ### Samtools -#### Samtools view - -Samtools view is used to convert the chimeric SAM output from STAR_FOR_SQUID to BAM - -
-Output files - -- `samtools_view_for_squid` - - `_chimeric.bam` - sorted BAM file - -
- #### Samtools sort -Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation) and the chimeric BAM from STAR_FOR_SQUID +Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation)
Output files -- `samtools_sort_for_` +- `samtools_sort_for_` - `(_chimeric)_sorted.bam` - sorted BAM file
@@ -353,19 +317,6 @@ Samtools index is used to index BAM files from STAR_FOR_ARRIBA (for arriba visua -### Squid - -Squid is run in two steps: i) fusion detection and ii) fusion annotation, but the output is in a shared `squid` directory - -
-Output files - -- `squid` - - `.squid.fusions_sv.txt` - contains the identified fusions - - `.squid.fusions.annotated.txt`- contains the identified fusions annotated - -
- ### STAR STAR is used to align to genome reference @@ -393,20 +344,6 @@ For `arriba` with the parameters: --chimMultimapNmax 50 ``` -For `squid` with the parameters: - -```bash ---twopassMode Basic \ ---chimOutType SeparateSAMold \ ---chimSegmentMin 20 \ ---chimJunctionOverhangMin 12 \ ---alignSJDBoverhangMin 10 \ ---outReadsUnmapped Fastx \ ---outSAMstrandField intronMotif \ ---outSAMtype BAM SortedByCoordinate \ ---readFilesCommand zcat -``` - For `STAR-fusion` with the parameters: ```bash @@ -435,7 +372,7 @@ For `STAR-fusion` with the parameters: --quantMode GeneCounts ``` -> STAR_FOR_STARFUSION uses `${params.ensembl}/Homo_sapiens.GRCh38.${params.ensembl_version}.chr.gtf` whereas STAR_FOR_ARRIBA and STAR_FOR_SQUID use `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.gtf` +> STAR_FOR_STARFUSION uses `${params.ensembl}/Homo_sapiens.GRCh38.${params.ensembl_version}.chr.gtf` whereas STAR_FOR_ARRIBA uses `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.gtf`
Output files @@ -451,13 +388,6 @@ For `STAR-fusion` with the parameters: - `.Aligned.out.bam` -**For squid:** - -- `.Aligned.sortedByCoord.out.bam` -- `.Chimeric.out.sam` -- `.unmapped_1.fastq.gz` -- `.unmapped_2.fastq.gz` - **For starfusion:** - `.Aligned.sortedByCoord.out.bam` @@ -497,6 +427,7 @@ The STAR index is generated with `--sjdbOverhang ${params.read_length - 1}`, par - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index 9df6d24e..b3625332 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -13,9 +13,9 @@ The pipeline is divided into two parts: - required only once before running the pipeline - **Important**: has to be run with each new release 2. Detecting fusions - - Supported tools: `Arriba`, `FusionCatcher`, `pizzly`, `SQUID`, `STAR-Fusion`, and `StringTie` - - QC: `Fastqc`, `MultiQC`, and `Qualimap rnaseq` - - Fusions visualization: `Arriba`, `fusion-report` and `FusionInspector`, VCF file creation based on `MegaFusion` + - Supported tools: `Arriba`, `FusionCatcher`, `STAR-Fusion`, and `StringTie` + - QC: `Fastqc`, `MultiQC`, and `Picard CollectInsertSize`, `Picard CollectWgsMetrics`, `Picard Markduplicates` + - Fusions visualization: `Arriba`, `fusion-report`, `FusionInspector`, and `vcf_collect` ## Download and build references @@ -94,6 +94,10 @@ process { The four `fusion-report` files: `cosmic.db`, `fusiongdb.db`, `fusiongdb2.db`, `mitelman.db` should then be copied into the HPC `/references/fusion_report_db`. +#### Note about fusioncatcher references + +The references are only built based on ensembl version 102. It is not possible currently to use any other version/source. + ## Running the pipeline ### Samplesheet input @@ -123,7 +127,7 @@ As you can see above for multiple runs of the same sample, the `sample` name has ### Starting commands -The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `pizzly`, `squid`, `starfusion`, `stringtie`) use the `--all` parameter: +The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `starfusion`, `stringtie`) use the `--all` parameter: ```bash nextflow run nf-core/rnafusion \ @@ -160,7 +164,9 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: The above pipeline run specified with a params file in yaml format: @@ -178,14 +184,16 @@ outdir: './results/' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +:::warning +Conda is not currently supported. +Supported genome is currently only GRCh38. +::: + ### Options #### Trimming -There are 2 options to trim - -1. Fastp - In this case all tools use the trimmed reads. Quality and adapter trimming by default. In addition, tail trimming and adapter_fastq specification are possible. Example usage: +When the flag `--fastp_trim` is used, `fastp` is used to provide all tools with trimmed reads. Quality and adapter trimming by default. In addition, tail trimming and adapter_fastq specification are possible. Example usage: ```bash nextflow run nf-core/rnafusion \ @@ -198,18 +206,6 @@ nextflow run nf-core/rnafusion \ --adapter_fastq (optional) ``` -2. Hard trimming - In this case, only reads fed to fusioncatcher are trimmed. This is a harsh workaround in case of high read-through. The recommended trimming is thus the fastp_trim one. The trimming is done at 75 bp from the tails. Example usage: - -```bash -nextflow run nf-core/rnafusion \ --- -- ... \ ---input \ ---genomes_base \ ---outdir \ ---trim -``` - #### Filter fusions detected by 2 or more tools ```bash @@ -218,12 +214,10 @@ nextflow run nf-core/rnafusion \ --input \ --genomes_base \ --outdir - --fusioninspector_filter - --fusionreport_filter + --tools_cutoff ``` -`--fusioninspector_filter` feed only fusions detected by 2 or more tools to fusioninspector for closer analysis (false by default). -`--fusionreport_filter` displays only fusions detected by 2 or more tools in fusionreport html index (true by default). +`--tools_cutoff INT` will discard fusions detected by less than INT tools both for display in fusionreport html index and to consider in fusioninspector. Default = 1, no filtering. #### Adding custom fusions to consider as well as the detected set: whitelist @@ -273,7 +267,7 @@ nextflow run nf-core/rnafusion \ --outdir ``` -This will skip all QC-related processes (metrics collection, `Qualimap`) +This will skip all QC-related processes (picard metrics collection) #### Skipping visualisation @@ -299,11 +293,24 @@ There are two parameters to increase the `--limitSjdbInsertNsj` parameter if nec - `--fusioncatcher_limitSjdbInsertNsj`, default: 2000000 - `--fusioninspector_limitSjdbInsertNsj`, default: 1000000 -Use the parameter `--cram` to compress the BAM files to CRAM for specific tools. Options: arriba, squid, starfusion. Leave no space between options: +Use the parameter `--cram` to compress the BAM files to CRAM for specific tools. Options: arriba, starfusion. Leave no space between options: -- `--cram arriba,squid,starfusion`, default: [] +- `--cram arriba,starfusion`, default: [] - `--cram arriba` +### Troubleshooting + +#### GstrandBit issues + +The issue below sometimes occurs: + +``` +EXITING because of FATAL ERROR: cannot insert sequence on the fly because of strand GstrandBit problem +SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star +``` + +As the error message suggests, it is a STAR-related error and your best luck in solving it will be the forum. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -322,11 +329,15 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` @@ -334,7 +345,9 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 408951ae..01b8653d 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -222,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowRnafusion.groovy b/lib/WorkflowRnafusion.groovy index 0654f5fc..1e289fdc 100755 --- a/lib/WorkflowRnafusion.groovy +++ b/lib/WorkflowRnafusion.groovy @@ -53,7 +53,7 @@ class WorkflowRnafusion { public static String toolCitationText(params) { - // TODO Optionally add in-text citation tools to this list. + // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ diff --git a/main.nf b/main.nf index 07b9c174..520bcff7 100644 --- a/main.nf +++ b/main.nf @@ -23,7 +23,7 @@ params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') params.chrgtf = WorkflowMain.getGenomeAttribute(params, 'chrgtf') params.transcript = WorkflowMain.getGenomeAttribute(params, 'transcript') params.refflat = WorkflowMain.getGenomeAttribute(params, 'refflat') -params.rrna_intervals = WorkflowMain.getGenomeAttribute(params, 'rrna_intervals') +params.rrna_intervals = WorkflowMain.getGenomeAttribute(params, 'rrna_intervals') /* ======================================================================================== @@ -31,7 +31,6 @@ params.rrna_intervals = WorkflowMain.getGenomeAttribute(params, 'rrna_interval ======================================================================================== */ - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/modules.json b/modules.json index 162a71b5..345e65b8 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "agat/convertspgff2tsv": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "arriba": { "branch": "master", "git_sha": "ea9e2892a9d12e8769402f12096219942bcf6536", @@ -12,102 +17,97 @@ }, "cat/cat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "kallisto/index": { + "gatk4/markduplicates": { "branch": "master", - "git_sha": "699fa6f3002d922380615f3847198aeb57d8b6a9", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"] - }, - "picard/collectwgsmetrics": { - "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "1537442a7be4a78efa3d1ff700a923c627bbda5d", "installed_by": ["modules"] }, - "picard/markduplicates": { + "picard/collectinsertsizemetrics": { "branch": "master", - "git_sha": "2ee934606f1fdf7fc1cb05d6e8abc13bec8ab448", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "qualimap/rnaseq": { + "picard/collectwgsmetrics": { "branch": "master", - "git_sha": "4657d98bc9f565e067c4d924126ce107056f5e2f", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "5394565c5fe4c760e5b35977ec7607c62e81d1f8", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "star/align": { "branch": "master", - "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "git_sha": "9f6b233518f7d9ecdcf24b798b7e491db5424273", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "git_sha": "0e98289b5bec6e3f8f588a8a9d05e8aacc1179a0", "installed_by": ["modules"] }, "stringtie/merge": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b0dcb44b018d9b2bcb35b1abb7bcd34061bc5a6d", "installed_by": ["modules"] }, "stringtie/stringtie": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b0dcb44b018d9b2bcb35b1abb7bcd34061bc5a6d", "installed_by": ["modules"] } } diff --git a/modules/local/arriba/download/main.nf b/modules/local/arriba/download/main.nf index 166ed69f..860439ad 100644 --- a/modules/local/arriba/download/main.nf +++ b/modules/local/arriba/download/main.nf @@ -13,11 +13,11 @@ process ARRIBA_DOWNLOAD { script: """ - wget https://github.com/suhrig/arriba/releases/download/v2.3.0/arriba_v2.3.0.tar.gz -O arriba_v2.3.0.tar.gz - tar -xzvf arriba_v2.3.0.tar.gz - rm arriba_v2.3.0.tar.gz - mv arriba_v2.3.0/database/* . - rm -r arriba_v2.3.0 + wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz + tar -xzvf arriba_v2.4.0.tar.gz + rm arriba_v2.4.0.tar.gz + mv arriba_v2.4.0/database/* . + rm -r arriba_v2.4.0 cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -27,11 +27,11 @@ process ARRIBA_DOWNLOAD { stub: """ - touch blacklist_hg38_GRCh38_v2.3.0.tsv.gz - touch protein_domains_hg38_GRCh38_v2.3.0.gff3 - touch cytobands_hg38_GRCh38_v2.3.0.tsv - touch known_fusions_hg38_GRCh38_v2.3.0.tsv.gz - touch protein_domains_hg38_GRCh38_v2.3.0.gff3 + touch blacklist_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 + touch cytobands_hg38_GRCh38_v2.4.0.tsv + touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf index 5805a904..cc120119 100644 --- a/modules/local/arriba/visualisation/main.nf +++ b/modules/local/arriba/visualisation/main.nf @@ -2,10 +2,10 @@ process ARRIBA_VISUALISATION { tag "$meta.id" label 'process_medium' - conda "bioconda::arriba=2.3.0" + conda "bioconda::arriba=2.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' : - 'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }" + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" input: tuple val(meta), path(bam), path(bai), path(fusions) diff --git a/modules/local/fusioninspector/main.nf b/modules/local/fusioninspector/main.nf index 6f59a590..c7fcd3f0 100644 --- a/modules/local/fusioninspector/main.nf +++ b/modules/local/fusioninspector/main.nf @@ -10,9 +10,11 @@ process FUSIONINSPECTOR { path reference output: - tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv - path "*" , emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv + tuple val(meta), path("*.coding_effect") , optional:true, emit: tsv_coding_effect + tuple val(meta), path("*.gtf") , optional:true, emit: out_gtf + path "*" , emit: output + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,6 +23,7 @@ process FUSIONINSPECTOR { def prefix = task.ext.prefix ?: "${meta.id}" def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' """ FusionInspector \\ --fusions $fusion_list \\ @@ -29,7 +32,7 @@ process FUSIONINSPECTOR { --CPU ${task.cpus} \\ -O . \\ --out_prefix $prefix \\ - --vis $args + --vis $args $args2 cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,9 +41,12 @@ process FUSIONINSPECTOR { """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" """ - touch FusionInspector.log - touch FusionInspector.fusions.tsv + touch ${prefix}.FusionInspector.log + touch ${prefix}.FusionInspector.fusions.tsv + touch ${prefix}.FusionInspector.fusions.tsv.annotated.coding_effect + touch ${prefix}.gtf cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf index 38809803..8024d8f8 100644 --- a/modules/local/fusionreport/detect/main.nf +++ b/modules/local/fusionreport/detect/main.nf @@ -2,14 +2,14 @@ process FUSIONREPORT { tag "$meta.id" label 'process_medium' - // Note: 2.7X indices incompatible with AWS iGenomes. conda "bioconda::star=2.7.9a" - container "docker.io/clinicalgenomics/fusion-report:2.1.5p4" + container "docker.io/clinicalgenomics/fusion-report:2.1.8" input: - tuple val(meta), path(reads), path(arriba_fusions), path(pizzly_fusions), path(squid_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) + tuple val(meta), path(reads), path(arriba_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) tuple val(meta2), path(fusionreport_ref) + val(tools_cutoff) output: path "versions.yml" , emit: versions @@ -27,13 +27,11 @@ process FUSIONREPORT { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' def tools = params.arriba || params.all ? "--arriba ${arriba_fusions} " : '' - tools += params.pizzly || params.all ? "--pizzly ${pizzly_fusions} " : '' - tools += params.squid || params.all ? "--squid ${squid_fusions} " : '' tools += params.starfusion || params.all ? "--starfusion ${starfusion_fusions} " : '' tools += params.fusioncatcher || params.all ? "--fusioncatcher ${fusioncatcher_fusions} " : '' def prefix = task.ext.prefix ?: "${meta.id}" """ - fusion_report run $meta.id . $fusionreport_ref $tools --allow-multiple-gene-symbols $args $args2 + fusion_report run $meta.id . $fusionreport_ref $tools --allow-multiple-gene-symbols --tool-cutoff $tools_cutoff $args $args2 mv fusion_list.tsv ${prefix}.fusionreport.tsv mv fusion_list_filtered.tsv ${prefix}.fusionreport_filtered.tsv diff --git a/modules/local/fusionreport/detect/meta.yml b/modules/local/fusionreport/detect/meta.yml index 7b9de84c..ae3601dc 100644 --- a/modules/local/fusionreport/detect/meta.yml +++ b/modules/local/fusionreport/detect/meta.yml @@ -5,7 +5,7 @@ keywords: tools: - fusionreport: description: Tool for parsing outputs from fusion detection tools - homepage: https://github.com/matq007/fusion-report + homepage: https://github.com/Clinical-Genomics/fusion-report documentation: https://matq007.github.io/fusion-report/#/ doi: "10.1101/011650" licence: ["GPL v3"] @@ -24,14 +24,6 @@ input: type: path description: File pattern: "*.fusions.tsv" - - pizzly_fusions: - type: path - description: File containing fusions from pizzly - pattern: "*.pizzly.txt" - - squid_fusions: - type: path - description: File containing fusions from squid - pattern: "*.annotated.txt" - starfusion_fusions: type: path description: File containing fusions from STARfusion diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf index 3ab1bc03..ac288ade 100644 --- a/modules/local/fusionreport/download/main.nf +++ b/modules/local/fusionreport/download/main.nf @@ -2,9 +2,8 @@ process FUSIONREPORT_DOWNLOAD { tag 'fusionreport' label 'process_medium' - // Note: 2.7X indices incompatible with AWS iGenomes. conda "bioconda::star=2.7.9a" - container "docker.io/clinicalgenomics/fusion-report:2.1.5p4" + container "docker.io/clinicalgenomics/fusion-report:2.1.8" input: val(username) diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf new file mode 100644 index 00000000..1b3808f6 --- /dev/null +++ b/modules/local/hgnc/main.nf @@ -0,0 +1,41 @@ +process HGNC_DOWNLOAD { + tag "hgnc" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + input: + + output: + path "hgnc_complete_set.txt" , emit: hgnc_ref + path "HGNC-DB-timestamp.txt" , emit: hgnc_date + + path "versions.yml" , emit: versions + + + script: + """ + wget https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt + date +%Y-%m-%d/%H:%M > HGNC-DB-timestamp.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + + stub: + """ + touch "hgnc_complete_set.txt" + touch "HGNC-DB-timestamp.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + +} diff --git a/modules/local/kallisto/quant/main.nf b/modules/local/kallisto/quant/main.nf deleted file mode 100644 index 7d3e5dfb..00000000 --- a/modules/local/kallisto/quant/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process KALLISTO_QUANT { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::kallisto=0.46.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : - 'quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1' }" - - - input: - tuple val(meta), path(reads) - path index - - output: - path "versions.yml" , emit: versions - tuple val(meta), path("*fusions.txt") , emit: txt - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - kallisto quant \ - -t $task.cpus \ - -i $index \ - --fusion \ - -o . \ - $reads - mv fusion.txt ${prefix}.kallisto_quant.fusions.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.kallisto_quant.fusions.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS - """ -} - diff --git a/modules/local/kallisto/quant/meta.yml b/modules/local/kallisto/quant/meta.yml deleted file mode 100644 index 31821aa6..00000000 --- a/modules/local/kallisto/quant/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: kallisto_quant -description: runs the kallisto quantification algorithm - - quant -tools: - - kallisto: - description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. - homepage: https://pachterlab.github.io/kallisto/ - documentation: https://pachterlab.github.io/kallisto/manual - tool_dev_url: https://github.com/pachterlab/kallisto - doi: "" - licence: ["BSD-2-Clause"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FASTQ file - pattern: "*.{fastq}" - - reference: - type: directory - description: Path to kallisto index - pattern: "*" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusions: - type: file - description: fusions - pattern: "*.txt" - -authors: - - "@rannick" diff --git a/modules/local/pizzly/detect/main.nf b/modules/local/pizzly/detect/main.nf deleted file mode 100644 index a610b531..00000000 --- a/modules/local/pizzly/detect/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process PIZZLY { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::kallisto=0.46.2 bioconda::pizzly==0.37.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pizzly:0.37.3--py36_2' : - 'quay.io/biocontainers/pizzly:0.37.3--h470a237_3' }" - - input: - tuple val(meta), path(txt) - tuple val(meta2), path(transcript) - tuple val(meta3), path(gtf) - - output: - path "versions.yml" , emit: versions - tuple val(meta), path("*pizzly.txt") , emit: fusions - tuple val(meta), path("*unfiltered.json") , emit: fusions_unfiltered - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - pizzly \\ - $args \\ - --gtf $gtf \\ - --fasta $transcript \\ - --output ${prefix}.pizzly $txt - - pizzly_flatten_json.py ${prefix}.pizzly.json ${prefix}.pizzly.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pizzly: \$(pizzly --version | grep pizzly | sed -e "s/pizzly version: //g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.pizzly.txt - touch ${prefix}.pizzly.unfiltered.json - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pizzly: \$(pizzly --version | grep pizzly | sed -e "s/pizzly version: //g") - END_VERSIONS - """ -} - diff --git a/modules/local/pizzly/detect/meta.yml b/modules/local/pizzly/detect/meta.yml deleted file mode 100644 index 930b3a62..00000000 --- a/modules/local/pizzly/detect/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: pizzly -description: Pizzly detection of fusions. -keywords: - - fusion - - pizzly -tools: - - pizzly: - description: Fast fusion detection using kallisto - homepage: https://github.com/pmelsted/pizzly - documentation: https://github.com/pmelsted/pizzly - tool_dev_url: https://github.com/pmelsted/pizzly - doi: "" - licence: ["BSD-2-Clause"] - -input: - - fasta: - type: file - description: genome fasta file - pattern: "*.{fasta*}" - - reference: - type: directory - description: Path to kallisto index - pattern: "*" - - gtf: - type: file - description: gtf reference - pattern: "*.gtf" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusions: - type: file - description: fusions - pattern: "*pizzly.txt" - - unfiltered: - type: file - description: unfiltered fusions - pattern: "*unfiltered.json" - -authors: - - "@rannick" diff --git a/modules/local/pizzly/download/main.nf b/modules/local/pizzly/download/main.nf deleted file mode 100644 index efaae3aa..00000000 --- a/modules/local/pizzly/download/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process PIZZLY_DOWNLOAD { - tag "pizzly" - label 'process_medium' - - conda "bioconda::kallisto=0.46.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : - 'quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1' }" - - input: - tuple val(meta), path(transcript) - - output: - path "versions.yml" , emit: versions - path "index.idx" , emit: reference - - script: - def args = task.ext.args ?: '' - """ - kallisto index \\ - -i index.idx \\ - $args \\ - $transcript - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS """ - - stub: - """ - touch index.idx - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS - """ - -} diff --git a/modules/local/reformat/main.nf b/modules/local/reformat/main.nf deleted file mode 100644 index 969b0e34..00000000 --- a/modules/local/reformat/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process REFORMAT { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bbmap=38.90" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' : - 'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }" - - - input: - tuple val(meta), path(reads) - - output: - path "versions.yml" , emit: versions - tuple val(meta), path("*trimmed.fq.gz") , emit: reads_out - - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def in1 = "in=${reads[0]}" - def in2 = meta.single_end ? "" : "in=${reads[1]}" - def out1 ="out=${prefix}_R1_trimmed.fq.gz" - def out2 =meta.single_end ? "" : "out=${prefix}_R2_trimmed.fq.gz" - - """ - reformat.sh $in1 $out1 $args - reformat.sh $in2 $out2 $args2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - reformat.sh: \$(echo \$(reformat.sh --version 2>&1)| sed -e "s/BBMap version //g" ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def out1 ="out=${prefix}_R1_trimmed.fq.gz" - def out2 =meta.single_end ? "" : "out=${prefix}_R2_trimmed.fq.gz" - """ - touch $out1 - touch $out2 - cat <<-END_VERSIONS > versions.yml - "${task.process}": - reformat.sh: \$(echo \$(reformat.sh --version 2>&1)| sed -e "s/BBMap version //g" ) - END_VERSIONS - """ -} diff --git a/modules/local/reformat/meta.yml b/modules/local/reformat/meta.yml deleted file mode 100644 index 86e0970a..00000000 --- a/modules/local/reformat/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: fusionreport -description: fusionreport -keywords: - - sort -tools: - - fusionreport: - description: fusionreport - homepage: https://github.com/matq007/fusion-report - documentation: https://matq007.github.io/fusion-report/#/ - doi: "10.1101/011650" - licence: ["GPL v3"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FASTQ file - pattern: "*.{fastq}*" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads: - type: file - description: FASTQ file - pattern: "*.{fq.gz}" - -authors: - - "@praveenraj2018, @rannick" diff --git a/modules/local/squid/annotate/main.nf b/modules/local/squid/annotate/main.nf deleted file mode 100644 index 9b6eebe7..00000000 --- a/modules/local/squid/annotate/main.nf +++ /dev/null @@ -1,41 +0,0 @@ - -process SQUID_ANNOTATE { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::squid=1.5" - container "docker.io/nfcore/rnafusion:squid_1.5-star2.7.1a" - - - - input: - tuple val(meta), path(txt) - tuple val(meta2), path(gtf) - - output: - tuple val(meta), path("*annotated.txt") , emit: fusions_annotated - path "versions.yml" , emit: versions - - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - AnnotateSQUIDOutput.py $gtf $txt ${prefix}.squid.fusions.annotated.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.squid.fusions.annotated.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') - END_VERSIONS - """ -} diff --git a/modules/local/squid/annotate/meta.yml b/modules/local/squid/annotate/meta.yml deleted file mode 100644 index e1a1f0d2..00000000 --- a/modules/local/squid/annotate/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: squid -description: Squid detection of fusions. -keywords: - - fusion - - pizzly -tools: - - pizzly: - description: Fusion detection using squid - homepage: https://github.com/Kingsford-Group/squid - documentation: https://github.com/Kingsford-Group/squid - tool_dev_url: https://github.com/Kingsford-Group/squid - doi: "" - licence: ["BSD-3-Clause"] - -input: - - fusions: - type: directory - description: Path to squid fusions - pattern: "*.txt" - - gtf: - type: file - description: gtf reference - pattern: "*.gtf" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusions_annotated: - type: file - description: squid fusions annotated - pattern: "*squid.fusions.annotated.txt" - -authors: - - "@rannick" diff --git a/modules/local/squid/detect/main.nf b/modules/local/squid/detect/main.nf deleted file mode 100644 index 3ccb6e3e..00000000 --- a/modules/local/squid/detect/main.nf +++ /dev/null @@ -1,40 +0,0 @@ - -process SQUID { - tag "squid" - label 'process_medium' - - conda "bioconda::squid=1.5" - container "docker.io/nfcore/rnafusion:squid_1.5-star2.7.1a" - - - - input: - tuple val(meta), path(bam), path(chimeric_bam) - - output: - tuple val(meta), path("*sv.txt") , emit: fusions - path "versions.yml" , emit: versions - - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - squid -b $bam -c $chimeric_bam -o ${prefix}.squid.fusions - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.squid.fusions_sv.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') - END_VERSIONS - """ -} diff --git a/modules/local/squid/detect/meta.yml b/modules/local/squid/detect/meta.yml deleted file mode 100644 index a7f1e61a..00000000 --- a/modules/local/squid/detect/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: squid -description: Squid detection of fusions. -keywords: - - fusion - - pizzly -tools: - - pizzly: - description: Fusion detection using squid - homepage: https://github.com/Kingsford-Group/squid - documentation: https://github.com/Kingsford-Group/squid - tool_dev_url: https://github.com/Kingsford-Group/squid - doi: "" - licence: ["BSD-3-Clause"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - chimeric_bam: - type: file - description: BAM/CRAM/SAM file containing only chimeric sorted reads - pattern: "*.{bam,cram,sam}" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusions: - type: directory - description: Path to squid fusions - pattern: "*.txt" - -authors: - - "@rannick" diff --git a/modules/local/megafusion/main.nf b/modules/local/vcf_collect/main.nf similarity index 57% rename from modules/local/megafusion/main.nf rename to modules/local/vcf_collect/main.nf index d8cb5db0..3ede7936 100644 --- a/modules/local/megafusion/main.nf +++ b/modules/local/vcf_collect/main.nf @@ -1,18 +1,20 @@ -process MEGAFUSION { +process VCF_COLLECT { tag "$meta.id" label 'process_single' - conda "conda-forge::python=3.8.3" + conda "conda-forge::pandas=1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : 'quay.io/biocontainers/pandas:1.5.2' }" input: - tuple val(meta), path(tsv), path(report) + tuple val(meta), path(fusioninspector_tsv), path(fusioninspector_gtf_tsv), path(fusionreport_report), path(fusionreport_csv) + tuple val(meta2), path(hgnc_ref) + tuple val(meta3), path(hgnc_date) output: path "versions.yml" , emit: versions - tuple val(meta), path("*vcf") , emit: vcf + tuple val(meta), path("*vcf.gz") , emit: vcf when: task.ext.when == null || task.ext.when @@ -20,11 +22,13 @@ process MEGAFUSION { script: def prefix = task.ext.prefix ?: "${meta.id}" """ - megafusion.py --fusioninspector $tsv --fusionreport $report --sample ${prefix} --out ${prefix}.vcf + vcf_collect.py --fusioninspector $fusioninspector_tsv --fusionreport $fusionreport_report --fusioninspector_gtf $fusioninspector_gtf_tsv --fusionreport_csv $fusionreport_csv --hgnc $hgnc_ref --sample ${prefix} --out ${prefix}_fusion_data.vcf + gzip ${prefix}_fusion_data.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') + HGNC DB retrieval: \$(cat $hgnc_date) END_VERSIONS """ diff --git a/modules/local/megafusion/meta.yml b/modules/local/vcf_collect/meta.yml similarity index 90% rename from modules/local/megafusion/meta.yml rename to modules/local/vcf_collect/meta.yml index 31343c7e..de4667bb 100644 --- a/modules/local/megafusion/meta.yml +++ b/modules/local/vcf_collect/meta.yml @@ -1,5 +1,5 @@ -name: megafusion -description: megafusion +name: vcf_collect +description: vcf_collect keywords: - sort tools: @@ -32,8 +32,8 @@ output: pattern: "versions.yml" - vcf: type: file - description: File containing the summary of all fusions as vcf file - pattern: "*.tsv" + description: File containing the summary of all fusions as compressed vcf file + pattern: "*.vcf.gz" authors: - "@rannick" diff --git a/modules/nf-core/agat/convertspgff2tsv/environment.yml b/modules/nf-core/agat/convertspgff2tsv/environment.yml new file mode 100644 index 00000000..b5fdf3db --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/environment.yml @@ -0,0 +1,7 @@ +name: agat_convertspgff2tsv +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::agat=1.2.0 diff --git a/modules/nf-core/agat/convertspgff2tsv/main.nf b/modules/nf-core/agat/convertspgff2tsv/main.nf new file mode 100644 index 00000000..cef48360 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/main.nf @@ -0,0 +1,35 @@ +process AGAT_CONVERTSPGFF2TSV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.2.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.2.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + agat_convert_sp_gff2tsv.pl \\ + --gff $gff \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gff2tsv.pl --help | sed '3!d; s/.*v//' | sed 's/ .*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/convertspgff2tsv/meta.yml b/modules/nf-core/agat/convertspgff2tsv/meta.yml new file mode 100644 index 00000000..f5865dfe --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/meta.yml @@ -0,0 +1,38 @@ +name: agat_convertspgff2tsv +description: | + Converts a GFF/GTF file into a TSV file +keywords: + - genome + - gff + - gtf + - conversion + - tsv +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - tsv: + type: file + description: Annotation file in TSV format + pattern: "*.{gtf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..17a04ef2 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 9f062219..4264a92c 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -2,7 +2,7 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - conda "conda-forge::pigz=2.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : 'biocontainers/pigz:2.3.4' }" diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 8acc0bfa..00a8db0b 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,7 @@ keywords: tools: - cat: description: Just concatenation - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - licence: ["GPL-3.0-or-later"] input: - meta: @@ -21,7 +19,6 @@ input: type: file description: List of compressed / uncompressed files pattern: "*" - output: - versions: type: file @@ -31,7 +28,9 @@ output: type: file description: Concatenated file. Will be gzipped if file_out ends with ".gz" pattern: "${file_out}" - authors: - "@erikrikarddaniel" - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..5766daaf --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..423571ba --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..bff93add --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index 5021e6fc..3d963784 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index 8a39e309..db4ac3c7 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -34,7 +34,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..f5f94182 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..ec2342e5 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + ] + ] + ], + "timestamp": "2023-10-17T23:19:12.990284837" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + ] + ] + ], + "timestamp": "2023-10-17T23:19:31.554568147" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + ] + ] + ], + "timestamp": "2023-10-17T23:19:49.629360033" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", + "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + ] + ] + ] + ], + "timestamp": "2023-10-17T23:19:40.711617539" + }, + "test_cat_fastq_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", + "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + ] + ] + ] + ], + "timestamp": "2023-10-18T07:53:20.923560211" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..f0c63f69 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index c9d014b1..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..4274ed57 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 831b7f12..c8e815ae 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 197ea7ca..c22a16ab 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -33,7 +33,6 @@ input: - save_merged: type: boolean description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` - output: - meta: type: map @@ -71,3 +70,6 @@ output: authors: - "@drpatelh" - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..f610b735 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,485 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..0fa68c7d --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "timestamp": "2023-10-17T11:04:45.794175881" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "timestamp": "2023-10-17T11:04:10.566343705" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "timestamp": "2023-10-17T11:04:10.582076024" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "timestamp": "2023-10-17T11:05:00.379878948" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 00000000..0f7849ad --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..50e59f2b 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 3961de60..6437a144 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -1,13 +1,18 @@ nextflow_process { name "Test Process FASTQC" - script "modules/nf-core/fastqc/main.nf" + script "../main.nf" process "FASTQC" + tag "modules" + tag "modules_nfcore" tag "fastqc" test("Single-Read") { when { + params { + outdir = "$outputDir" + } process { """ input[0] = [ @@ -21,12 +26,16 @@ nextflow_process { } then { - assert process.success - assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" - assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") - assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) } - } - } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 00000000..e7cb4280 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_bedtointervallist +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index a23abd06..88b24b1a 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -2,7 +2,7 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" @@ -29,7 +29,8 @@ process GATK4_BEDTOINTERVALLIST { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ --INPUT $bed \\ --OUTPUT ${prefix}.interval_list \\ --SEQUENCE_DICTIONARY $dict \\ diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 40daf752..187da885 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -2,8 +2,9 @@ name: gatk4_bedtointervallist description: Creates an interval list from a bed file and a reference dict keywords: - bed - - interval list - bedtointervallist + - gatk4 + - interval list tools: - gatk4: description: | @@ -45,3 +46,6 @@ output: authors: - "@kevinmenden" - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 00000000..db663e14 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsequencedictionary +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 15a86bea..b47ad162 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,7 +2,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" @@ -27,7 +27,8 @@ process GATK4_CREATESEQUENCEDICTIONARY { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index a421e681..f9d70be0 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -1,9 +1,10 @@ name: gatk4_createsequencedictionary description: Creates a sequence dictionary for a reference sequence keywords: + - createsequencedictionary - dictionary - fasta - - createsequencedictionary + - gatk4 tools: - gatk: description: | @@ -14,7 +15,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -37,3 +37,6 @@ output: authors: - "@maxulysse" - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 00000000..9adad104 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,8 @@ +name: gatk4_markduplicates +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..564b86d3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,85 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + $args + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..b0f09d4b --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,71 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bai: + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - crai: + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/kallisto/index/main.nf b/modules/nf-core/kallisto/index/main.nf deleted file mode 100644 index fb9e44d9..00000000 --- a/modules/nf-core/kallisto/index/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process KALLISTO_INDEX { - tag "$fasta" - label 'process_medium' - - conda "bioconda::kallisto=0.46.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : - 'biocontainers/kallisto:0.46.2--h4f7b962_1' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("kallisto") , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - kallisto \\ - index \\ - $args \\ - -i kallisto \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS - """ - - stub: - """ - touch kallisto - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/kallisto/index/meta.yml b/modules/nf-core/kallisto/index/meta.yml deleted file mode 100644 index 05dfa53d..00000000 --- a/modules/nf-core/kallisto/index/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: kallisto_index -description: Create kallisto index -keywords: - - kallisto - - kallisto/index - - index -tools: - - kallisto: - description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. - homepage: https://pachterlab.github.io/kallisto/ - documentation: https://pachterlab.github.io/kallisto/manual - tool_dev_url: https://github.com/pachterlab/kallisto - - licence: ["BSD-2-Clause"] - -input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: genome fasta file - pattern: "*.{fasta}" - -output: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - index: - type: directory - description: Kallisto genome index - pattern: "*.idx" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@ggabernet" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..bc0bdb5b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0d..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..68fffa90 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,91 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + setup { + run("FASTQC") { + script "../../fastqc/main.nf" + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = FASTQC.out.zip.collect { it[1] } + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + setup { + run("FASTQC") { + script "../../fastqc/main.nf" + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = FASTQC.out.zip.collect { it[1] } + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/picard/collectinsertsizemetrics/environment.yml b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml new file mode 100644 index 00000000..5c85f872 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml @@ -0,0 +1,7 @@ +name: picard_collectinsertsizemetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.0 diff --git a/modules/nf-core/picard/collectinsertsizemetrics/main.nf b/modules/nf-core/picard/collectinsertsizemetrics/main.nf new file mode 100644 index 00000000..48e4d2ad --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/main.nf @@ -0,0 +1,61 @@ +process PICARD_COLLECTINSERTSIZEMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.1.0--hdfd78af_0' : + 'biocontainers/picard:3.1.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.txt"), emit: metrics + tuple val(meta), path("*.pdf"), emit: histogram + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectInsertSizeMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectInsertSizeMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.txt \\ + --Histogram_FILE ${prefix}.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pdf + touch ${prefix}.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + +} diff --git a/modules/nf-core/picard/collectinsertsizemetrics/meta.yml b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml new file mode 100644 index 00000000..efd5abe0 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml @@ -0,0 +1,47 @@ +name: "picard_collectinsertsizemetrics" +description: Collect metrics about the insert size distribution of a paired-end library. +keywords: + - metrics + - alignment + - insert + - statistics + - bam +tools: + - "picard": + description: "Java tools for working with NGS data in the BAM format" + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pdf: + type: file + description: Histogram plots of the insert size metrics computed by Picard + pattern: "*.pdf" + - metrics: + type: file + description: Values used by Picard to generate the insert size histograms + pattern: "*.txt" +authors: + - "@FerriolCalvet" +maintainers: + - "@FerriolCalvet" diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml new file mode 100644 index 00000000..8adda491 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -0,0 +1,8 @@ +name: picard_collectwgsmetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.0 + - r::r-base diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf index 1d59334c..67aa5b5e 100644 --- a/modules/nf-core/picard/collectwgsmetrics/main.nf +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTWGSMETRICS { tag "$meta.id" label 'process_single' - conda "bioconda::picard=3.0.0 r::r-base" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.0--hdfd78af_0' : + 'biocontainers/picard:3.1.0--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml index 19906f08..5576ef92 100644 --- a/modules/nf-core/picard/collectwgsmetrics/meta.yml +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -68,3 +68,8 @@ authors: - "@flowuenne" - "@lassefolkersen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf deleted file mode 100644 index ebfa0864..00000000 --- a/modules/nf-core/picard/markduplicates/main.nf +++ /dev/null @@ -1,65 +0,0 @@ -process PICARD_MARKDUPLICATES { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::picard=3.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" - - input: - tuple val(meta), path(bam) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , optional:true, emit: bai - tuple val(meta), path("*.metrics.txt"), emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3072 - if (!task.memory) { - log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - - """ - picard \\ - -Xmx${avail_mem}M \\ - MarkDuplicates \\ - $args \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.bam \\ - --REFERENCE_SEQUENCE $fasta \\ - --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - touch ${prefix}.MarkDuplicates.metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ -} diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml deleted file mode 100644 index f7693d2f..00000000 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: picard_markduplicates -description: Locate and tag duplicate reads in a BAM file -keywords: - - markduplicates - - pcr - - duplicates - - bam - - sam - - cram -tools: - - picard: - description: | - A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) - data and formats such as SAM/BAM/CRAM and VCF. - homepage: https://broadinstitute.github.io/picard/ - documentation: https://broadinstitute.github.io/picard/ - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference genome fasta file - pattern: "*.{fasta,fa}" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fai: - type: file - description: Reference genome fasta index - pattern: "*.{fai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with duplicate reads marked/removed - pattern: "*.{bam}" - - bai: - type: file - description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag - pattern: "*.{bai}" - - metrics: - type: file - description: Duplicate metrics file generated by picard - pattern: "*.{metrics.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@projectoriented" - - "@ramprasadn" diff --git a/modules/nf-core/qualimap/rnaseq/main.nf b/modules/nf-core/qualimap/rnaseq/main.nf deleted file mode 100644 index 044c983f..00000000 --- a/modules/nf-core/qualimap/rnaseq/main.nf +++ /dev/null @@ -1,63 +0,0 @@ -process QUALIMAP_RNASEQ { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::qualimap=2.2.2d" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : - 'biocontainers/qualimap:2.2.2d--1' }" - - input: - tuple val(meta), path(bam) - tuple val(meta2), path(gtf) - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def paired_end = meta.single_end ? '' : '-pe' - def memory = (task.memory.mega*0.8).intValue() + 'M' - - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } - """ - unset DISPLAY - mkdir -p tmp - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - qualimap \\ - --java-mem-size=$memory \\ - rnaseq \\ - $args \\ - -bam $bam \\ - -gtf $gtf \\ - -p $strandedness \\ - $paired_end \\ - -outdir $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - mkdir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/qualimap/rnaseq/meta.yml b/modules/nf-core/qualimap/rnaseq/meta.yml deleted file mode 100644 index 7738f08d..00000000 --- a/modules/nf-core/qualimap/rnaseq/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: qualimap_rnaseq -description: Evaluate alignment data -keywords: - - quality control - - qc - - rnaseq -tools: - - qualimap: - description: | - Qualimap 2 is a platform-independent application written in - Java and R that provides both a Graphical User Interface and - a command-line interface to facilitate the quality control of - alignment sequencing data and its derivatives like feature counts. - homepage: http://qualimap.bioinfo.cipf.es/ - documentation: http://qualimap.conesalab.org/doc_html/index.html - doi: 10.1093/bioinformatics/bts503 - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - gtf: - type: file - description: GTF file of the reference genome - pattern: "*.{gtf}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - results: - type: directory - description: Qualimap results dir - pattern: "*/*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..73badedb --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,7 @@ +name: samtools_faidx +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 59ed3088..3aa98822 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 957b25e5..e189af28 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -55,3 +55,7 @@ authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..3c6f95b2 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,7 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 0b20aa4b..256bd7c4 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 8bd2fa6f..01a4ee03 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..c76a9169 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("sarscov2 [BAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CRAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CSI]") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..b3baee7f --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "crai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "timestamp": "2023-11-15T15:17:37.30801" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "timestamp": "2023-11-15T15:17:30.869234" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..508659f0 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,7 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 2b7753fd..60f0c634 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 07328431..2200de72 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -46,3 +46,6 @@ output: authors: - "@drpatelh" - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..1f72f3b9 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("test_samtools_sort") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_samtools_sort_stub") { + + config "./nextflow.config" + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..a43566da --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "test_samtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + ] + } + ], + "timestamp": "2023-10-17T17:21:46.5427968" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..d0f35086 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..141e7bd8 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,7 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index cb91facf..ddf3f88a 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3b05450b..3dadafae 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -82,3 +82,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 00000000..6db20988 --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,9 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.16.1 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index d0e20384..cc4f5af5 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -2,10 +2,10 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index 3d8fed0c..e80dbb7d 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -52,7 +52,6 @@ input: - seq_center: type: string description: Sequencing center - output: - bam: type: file @@ -106,8 +105,11 @@ output: type: file description: STAR output bedGraph format file(s) (optional) pattern: "*.bg" - authors: - "@kevinmenden" - "@drpatelh" - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 00000000..4c878474 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,339 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 00000000..59b735d4 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,769 @@ +{ + "homo_sapiens - paired_end - multiple - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-11-23T13:29:01.19639" + }, + "homo_sapiens - paired_end - multiple - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.857804" + }, + "homo_sapiens - paired_end - arriba - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-11-23T13:25:07.396223" + }, + "homo_sapiens - single_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.24701" + }, + "homo_sapiens - paired_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.383818" + }, + "homo_sapiens - paired_end - arriba - versions": { + "content": [ + [ + "versions.yml:md5,452ef035aacbc68d47041e86279a9333" + ] + ], + "timestamp": "2023-11-23T13:25:07.494015" + }, + "homo_sapiens - paired_end - multiple - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:29:01.396383" + }, + "homo_sapiens - paired_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.368841" + }, + "homo_sapiens - paired_end - arriba - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.102537" + }, + "homo_sapiens - single_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.185369" + }, + "homo_sapiens - paired_end - arriba - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-11-23T13:25:07.348239" + }, + "homo_sapiens - single_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.216183" + }, + "homo_sapiens - paired_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.327236" + }, + "homo_sapiens - single_end - versions": { + "content": [ + [ + "versions.yml:md5,452ef035aacbc68d47041e86279a9333" + ] + ], + "timestamp": "2023-11-23T13:22:55.259282" + }, + "homo_sapiens - paired_end - multiple - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:29:01.022176" + }, + "homo_sapiens - paired_end - arriba - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.15277" + }, + "homo_sapiens - paired_end - multiple - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.52923" + }, + "homo_sapiens - paired_end - multiple - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-11-23T13:29:01.729175" + }, + "homo_sapiens - paired_end - starfusion - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:27:55.905883" + }, + "homo_sapiens - paired_end - starfusion - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.192302" + }, + "homo_sapiens - paired_end - multiple - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.661837" + }, + "homo_sapiens - paired_end - multiple - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:29:00.966417" + }, + "homo_sapiens - paired_end - starfusion - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + ] + ] + ], + "timestamp": "2023-11-23T13:27:56.003675" + }, + "homo_sapiens - paired_end - arriba - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.202776" + }, + "homo_sapiens - single_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.163495" + }, + "homo_sapiens - paired_end - arriba - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.251962" + }, + "homo_sapiens - paired_end - starfusion - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.040843" + }, + "homo_sapiens - single_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.154172" + }, + "homo_sapiens - paired_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.265265" + }, + "homo_sapiens - paired_end - arriba - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.998817" + }, + "homo_sapiens - paired_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:23:33.259699" + }, + "homo_sapiens - paired_end - arriba - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:25:06.849451" + }, + "homo_sapiens - paired_end - multiple - versions": { + "content": [ + [ + "versions.yml:md5,452ef035aacbc68d47041e86279a9333" + ] + ], + "timestamp": "2023-11-23T13:29:01.937182" + }, + "homo_sapiens - paired_end - starfusion - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.082408" + }, + "homo_sapiens - paired_end - starfusion - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-11-23T13:27:56.379367" + }, + "homo_sapiens - single_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.175307" + }, + "homo_sapiens - paired_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.413683" + }, + "homo_sapiens - paired_end - starfusion - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.155413" + }, + "homo_sapiens - single_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.144852" + }, + "homo_sapiens - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,452ef035aacbc68d47041e86279a9333" + ] + ], + "timestamp": "2023-11-23T13:23:33.445323" + }, + "homo_sapiens - paired_end - multiple - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-11-23T13:29:01.793129" + }, + "homo_sapiens - single_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.128568" + }, + "homo_sapiens - paired_end - arriba - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.444214" + }, + "homo_sapiens - paired_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.354416" + }, + "homo_sapiens - paired_end - arriba - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:25:06.829799" + }, + "homo_sapiens - paired_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.300509" + }, + "homo_sapiens - paired_end - arriba - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.300383" + }, + "homo_sapiens - paired_end - multiple - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-11-23T13:29:01.13168" + }, + "homo_sapiens - paired_end - multiple - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.462257" + }, + "homo_sapiens - single_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.134799" + }, + "homo_sapiens - paired_end - arriba - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.94699" + }, + "homo_sapiens - paired_end - starfusion - junction": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" + ] + ] + ], + "timestamp": "2023-11-23T13:27:56.228327" + }, + "homo_sapiens - single_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.236346" + }, + "homo_sapiens - paired_end - starfusion - versions": { + "content": [ + [ + "versions.yml:md5,452ef035aacbc68d47041e86279a9333" + ] + ], + "timestamp": "2023-11-23T13:27:56.460903" + }, + "homo_sapiens - paired_end - multiple - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.330463" + }, + "homo_sapiens - paired_end - arriba - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:25:06.86866" + }, + "homo_sapiens - paired_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.313258" + }, + "homo_sapiens - paired_end - starfusion - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.118974" + }, + "homo_sapiens - paired_end - starfusion - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.264699" + }, + "homo_sapiens - paired_end - multiple - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:29:01.076947" + }, + "homo_sapiens - paired_end - arriba - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.050409" + }, + "homo_sapiens - paired_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.274809" + }, + "homo_sapiens - single_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.226143" + }, + "homo_sapiens - paired_end - starfusion - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-11-23T13:27:56.337072" + }, + "homo_sapiens - single_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:22:55.126286" + }, + "homo_sapiens - paired_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:23:33.253884" + }, + "homo_sapiens - single_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:22:55.11799" + }, + "homo_sapiens - paired_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.287684" + }, + "homo_sapiens - paired_end - starfusion - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:27:55.971484" + }, + "homo_sapiens - paired_end - multiple - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.264176" + }, + "homo_sapiens - paired_end - multiple - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.596406" + }, + "homo_sapiens - single_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.205936" + }, + "homo_sapiens - paired_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.340653" + }, + "homo_sapiens - paired_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-11-23T13:23:33.398603" + }, + "homo_sapiens - paired_end - starfusion - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.300637" + }, + "homo_sapiens - paired_end - arriba - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + ] + ] + ], + "timestamp": "2023-11-23T13:25:06.887604" + }, + "homo_sapiens - single_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-11-23T13:22:55.195544" + }, + "homo_sapiens - paired_end - starfusion - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.422018" + }, + "homo_sapiens - paired_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.429457" + }, + "homo_sapiens - paired_end - starfusion - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:27:55.93945" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 00000000..2324b9e5 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 00000000..c4ac5808 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 00000000..467b6497 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 00000000..8beace16 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..0b35ff51 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +name: star_genomegenerate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.16.1 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 43424042..d2061844 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,10 +2,10 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index eba2d9cf..1061e1b8 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -31,7 +31,6 @@ input: - gtf: type: file description: GTF file of the reference genome - output: - meta: type: map @@ -46,7 +45,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..eed82926 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("homo_sapiens") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).name).match("index") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..bd4e0caa --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,9c11319b80fdedc90dadce4e0fb42ded" + ] + ], + "timestamp": "2023-11-23T11:18:14.835118" + }, + "index": { + "content": [ + "star" + ], + "timestamp": "2023-11-23T11:31:47.560528" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/stringtie/merge/environment.yml b/modules/nf-core/stringtie/merge/environment.yml new file mode 100644 index 00000000..9914b202 --- /dev/null +++ b/modules/nf-core/stringtie/merge/environment.yml @@ -0,0 +1,7 @@ +name: stringtie_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::stringtie=2.2.1 diff --git a/modules/nf-core/stringtie/merge/main.nf b/modules/nf-core/stringtie/merge/main.nf index 12224f78..c2568219 100644 --- a/modules/nf-core/stringtie/merge/main.nf +++ b/modules/nf-core/stringtie/merge/main.nf @@ -2,7 +2,7 @@ process STRINGTIE_MERGE { label 'process_medium' // Note: 2.7X indices incompatible with AWS iGenomes. - conda "bioconda::stringtie=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : 'biocontainers/stringtie:2.2.1--hecb563c_2' }" diff --git a/modules/nf-core/stringtie/merge/meta.yml b/modules/nf-core/stringtie/merge/meta.yml index 2e9784fe..5d02d678 100644 --- a/modules/nf-core/stringtie/merge/meta.yml +++ b/modules/nf-core/stringtie/merge/meta.yml @@ -32,6 +32,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@yuukiiwa" +maintainers: + - "@yuukiiwa" diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test b/modules/nf-core/stringtie/merge/tests/main.nf.test new file mode 100644 index 00000000..90368134 --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process STRINGTIE_MERGE" + script "../main.nf" + process "STRINGTIE_MERGE" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/merge" + + test("homo_sapiens - forward strandedness") { + + setup { + run("STRINGTIE_STRINGTIE") { + script "../../stringtie/main.nf" + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + } + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("fs_gtf") }, + { assert snapshot(process.out.versions).match("fs_versions") } + ) + } + } + + test("homo_sapiens - reverse strandedness") { + + setup { + run("STRINGTIE_STRINGTIE") { + script "../../stringtie/main.nf" + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + } + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("rs_gtf") }, + { assert snapshot(process.out.versions).match("rs_versions") } + ) + } + } +} diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test.snap b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..3e4bc68f --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "rs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "timestamp": "2023-11-23T14:14:39.697712988" + }, + "rs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ] + ], + "timestamp": "2023-11-23T14:14:39.691894799" + }, + "fs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,d959eb2fab0db48ded7275e0a2e83c05" + ] + ], + "timestamp": "2023-11-23T14:14:20.872841278" + }, + "fs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "timestamp": "2023-11-23T14:14:20.883140097" + } +} \ No newline at end of file diff --git a/modules/nf-core/stringtie/merge/tests/tags.yml b/modules/nf-core/stringtie/merge/tests/tags.yml new file mode 100644 index 00000000..58cef46b --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/merge: + - modules/nf-core/stringtie/merge/** diff --git a/modules/nf-core/stringtie/stringtie/environment.yml b/modules/nf-core/stringtie/stringtie/environment.yml new file mode 100644 index 00000000..7a0eccdb --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/environment.yml @@ -0,0 +1,7 @@ +name: stringtie_stringtie +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::stringtie=2.2.1 diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf index d0f8b563..6e25ba27 100644 --- a/modules/nf-core/stringtie/stringtie/main.nf +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -2,7 +2,7 @@ process STRINGTIE_STRINGTIE { tag "$meta.id" label 'process_medium' - conda "bioconda::stringtie=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : 'biocontainers/stringtie:2.2.1--hecb563c_2' }" diff --git a/modules/nf-core/stringtie/stringtie/meta.yml b/modules/nf-core/stringtie/stringtie/meta.yml index 75518470..d8ebdd88 100644 --- a/modules/nf-core/stringtie/stringtie/meta.yml +++ b/modules/nf-core/stringtie/stringtie/meta.yml @@ -5,7 +5,6 @@ keywords: - assembly - quantification - gtf - tools: - stringtie2: description: | @@ -55,3 +54,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test b/modules/nf-core/stringtie/stringtie/tests/main.nf.test new file mode 100644 index 00000000..68786b74 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process STRINGTIE_STRINGTIE" + script "../main.nf" + process "STRINGTIE_STRINGTIE" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/stringtie" + + test("sarscov2 [bam] - forward strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("fs_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("fs_abundance") }, + { assert snapshot(process.out.versions).match("fs_versions") } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("fs_gtf_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("fs_gtf_abundance") }, + { assert snapshot(process.out.ballgown).match("fs_gtf_ballgown") }, + { assert snapshot(process.out.versions).match("fs_gtf_versions") } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("rs_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("rs_abundance") }, + { assert snapshot(process.out.versions).match("rs_versions") } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("rs_gtf_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("rs_gtf_abundance") }, + { assert snapshot(process.out.ballgown).match("rs_gtf_ballgown") }, + { assert snapshot(process.out.versions).match("rs_gtf_versions") } + ) + } + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap new file mode 100644 index 00000000..bf751636 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "fs_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ] + ], + "timestamp": "2023-11-23T13:55:41.032044613" + }, + "fs_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,569137af5be452413086b50653a97203" + ] + ] + ], + "timestamp": "2023-11-23T13:55:41.017978904" + }, + "rs_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ] + ], + "timestamp": "2023-11-23T13:56:13.601112933" + }, + "fs_gtf_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:00.523797974" + }, + "fs_gtf_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,f56cf8aba2c4a5673bc7963ba5f12d04" + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.475164879" + }, + "rs_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:13.623892691" + }, + "rs_gtf_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,bb346053a8c156b803b055133376c7fa" + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.693599559" + }, + "fs_gtf_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,7d8bce7f2a922e367cedccae7267c22e" + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.482135418" + }, + "rs_gtf_ballgown": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,879b6696029d19c4737b562e9d149218", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.715698347" + }, + "rs_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,31c34aec2bf36bb0ea3c16c2afeeeb1f" + ] + ] + ], + "timestamp": "2023-11-23T13:56:13.590054035" + }, + "rs_gtf_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:22.725513476" + }, + "fs_gtf_ballgown": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,6b4cf69bc03f3f69890f972a0e8b7471", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.494299817" + }, + "fs_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:55:41.049417582" + }, + "rs_gtf_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,7385b870b955dae2c2ab78a70cf05cce" + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.701059059" + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/tags.yml b/modules/nf-core/stringtie/stringtie/tests/tags.yml new file mode 100644 index 00000000..da9b051c --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/stringtie: + - modules/nf-core/stringtie/stringtie/** diff --git a/nextflow.config b/nextflow.config index 994e96e3..7570a93b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,11 +32,9 @@ params { starfusion_build = true // Filtering - fusioninspector_filter = false - fusionreport_filter = true + tools_cutoff = 1 // Trimming - trim = false fastp_trim = false trim_tail = null adapter_fasta = [] @@ -55,8 +53,6 @@ params { all = false arriba = false fusioncatcher = false - pizzly = false - squid = false starindex = false starfusion = false stringtie = false @@ -70,13 +66,13 @@ params { // Path to references ensembl_ref = "${params.genomes_base}/ensembl" arriba_ref = "${params.genomes_base}/arriba" - arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.3.0.tsv.gz" - arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.3.0.tsv" - arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.3.0.tsv.gz" - arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.3.0.gff3" + arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv" + arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v102" - pizzly_ref = "${params.genomes_base}/pizzly/kallisto" - squid_ref = "${params.genomes_base}/squid" + hgnc_ref = "${params.genomes_base}/hgnc/hgnc_complete_set.txt" + hgnc_date = "${params.genomes_base}/hgnc/HGNC-DB-timestamp.txt" starfusion_ref = "${params.genomes_base}/starfusion/ctat_genome_lib_build_dir" starindex_ref = "${params.genomes_base}/star" fusionreport_ref = "${params.genomes_base}/fusion_report_db" @@ -84,8 +80,6 @@ params { // Path to fusion outputs arriba_fusions = null - pizzly_fusions = null - squid_fusions = null starfusion_fusions = null fusioncatcher_fusions = null fusioninspector_fusions = null @@ -222,6 +216,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -238,8 +233,8 @@ profiles { gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } } @@ -295,7 +290,7 @@ manifest { description = """Nextflow rnafusion analysis pipeline, part of the nf-core community.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4.0' + version = '3.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 02cd1e27..29ae6288 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,7 +32,6 @@ "input": { "type": "string", "format": "file-path", - "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", @@ -62,6 +61,16 @@ "fa_icon": "far fa-file-code", "description": "Specifies which analysis type for the pipeline - either build references or analyse data" }, + "cosmic_username": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC username" + }, + "cosmic_passwd": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC password" + }, "genomes_base": { "type": "string", "fa_icon": "far fa-file-code", @@ -149,11 +158,6 @@ "fa_icon": "far fa-file-code", "description": "Path to fusioncatcher references" }, - "fusioninspector_filter": { - "type": "boolean", - "fa_icon": "far fa-file-code", - "description": "Feed filtered fusionreport fusions to fusioninspector" - }, "fusioninspector_limitSjdbInsertNsj": { "type": "integer", "fa_icon": "far fa-file-code", @@ -179,41 +183,20 @@ "fa_icon": "far fa-file-code", "description": "Path to fusionreport references" }, - "fusionreport_filter": { - "type": "boolean", - "fa_icon": "far fa-file-code", - "default": true, - "description": "Display fusions identified with 2 tools or more" - }, - "pizzly": { - "type": "boolean", - "fa_icon": "far fa-file-code", - "description": "Build or run pizzly references/analyses" - }, - "pizzly_fusions": { + "hgnc_ref": { "type": "string", "fa_icon": "far fa-file-code", - "description": "Path to pizzly output" + "description": "Path to HGNC database file" }, - "pizzly_ref": { + "hgnc_date": { "type": "string", "fa_icon": "far fa-file-code", - "description": "Path to pizzly references" + "description": "Path to HGNC timestamp file for database retrieval" }, - "squid": { + "qiagen": { "type": "boolean", "fa_icon": "far fa-file-code", - "description": "Build or run squid references/analyses" - }, - "squid_fusions": { - "type": "string", - "fa_icon": "far fa-file-code", - "description": "Path to squid output" - }, - "squid_ref": { - "type": "string", - "fa_icon": "far fa-file-code", - "description": "Path to squid references" + "description": "Use QIAGEN instead of SANGER to download COSMIC database" }, "starfusion": { "type": "boolean", @@ -245,25 +228,15 @@ "fa_icon": "far fa-file-code", "description": "Run stringtie analysis" }, - "whitelist": { - "type": "string", - "fa_icon": "far fa-file-code", - "description": "Path to fusions to add to the input of fusioninspector" - }, - "cosmic_username": { - "type": "string", + "tools_cutoff": { + "type": "integer", "fa_icon": "far fa-file-code", - "description": "COSMIC username" + "description": "Discard fusions identified by less than INT tools" }, - "cosmic_passwd": { + "whitelist": { "type": "string", "fa_icon": "far fa-file-code", - "description": "COSMIC password" - }, - "qiagen": { - "type": "boolean", - "fa_icon": "far fa-file-code", - "description": "Use QIAGEN instead of SANGER to download COSMIC database" + "description": "Path to fusions to add to the input of fusioninspector" } } }, @@ -273,11 +246,6 @@ "fa_icon": "fas fa-cut", "description": "Options to adjust read trimming criteria.", "properties": { - "trim": { - "type": "boolean", - "description": "Preform trimming of reads, default: false", - "fa_icon": "fas fa-cut" - }, "fastp_trim": { "type": "boolean", "description": "Preform fastp trimming of reads, default: false", @@ -303,7 +271,7 @@ "properties": { "cram": { "type": "string", - "description": "List of tools for which to compress BAM file to CRAM,default: [], options: arriba, squid, starfusion. Leave no space between options", + "description": "List of tools for which to compress BAM file to CRAM,default: [], options: arriba, starfusion. Leave no space between options", "fa_icon": "fas fa-cut" } } @@ -471,14 +439,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -502,7 +468,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -517,7 +482,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -556,7 +520,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -564,7 +527,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -572,7 +534,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." }, diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf index 36c3924f..3aa9c090 100644 --- a/subworkflows/local/arriba_workflow.nf +++ b/subworkflows/local/arriba_workflow.nf @@ -1,4 +1,6 @@ include { ARRIBA } from '../../modules/nf-core/arriba/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA} from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA} from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA} from '../../modules/nf-core/samtools/view/main' include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' @@ -35,11 +37,16 @@ workflow ARRIBA_WORKFLOW { if (params.cram.contains('arriba') ){ - SAMTOOLS_VIEW_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) + SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions ) + + SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) - } + SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) + } } else { diff --git a/subworkflows/local/fusioninspector_workflow.nf b/subworkflows/local/fusioninspector_workflow.nf index 5fa21cf1..48fcc19f 100644 --- a/subworkflows/local/fusioninspector_workflow.nf +++ b/subworkflows/local/fusioninspector_workflow.nf @@ -1,6 +1,7 @@ +include { AGAT_CONVERTSPGFF2TSV } from '../../modules/nf-core/agat/convertspgff2tsv/main' include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' -include { MEGAFUSION } from '../../modules/local/megafusion/main' +include { VCF_COLLECT } from '../../modules/local/vcf_collect/main' include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' workflow FUSIONINSPECTOR_WORKFLOW { @@ -8,16 +9,21 @@ workflow FUSIONINSPECTOR_WORKFLOW { reads fusion_list fusion_list_filtered - report + fusionreport_out + fusionreport_csv bam_sorted_indexed ch_gtf ch_arriba_ref_protein_domains ch_arriba_ref_cytobands + ch_hgnc_ref + ch_hgnc_date main: ch_versions = Channel.empty() + ch_arriba_visualisation = Channel.empty() index ="${params.starfusion_ref}" - ch_fusion_list = ( params.fusioninspector_filter ? fusion_list_filtered : fusion_list ) + + ch_fusion_list = ( params.tools_cutoff > 1 ? fusion_list_filtered : fusion_list ) .branch{ no_fusions: it[1].size() == 0 fusions: it[1].size() > 0 @@ -33,22 +39,27 @@ workflow FUSIONINSPECTOR_WORKFLOW { ch_fusion_list.fusions = CAT_CAT.out.file_out } - reads_fusion = reads.join(ch_fusion_list.fusions ) + ch_reads_fusion = reads.join(ch_fusion_list.fusions ) - FUSIONINSPECTOR( reads_fusion, index) + FUSIONINSPECTOR( ch_reads_fusion, index) ch_versions = ch_versions.mix(FUSIONINSPECTOR.out.versions) - fusion_data = FUSIONINSPECTOR.out.tsv.join(report) - MEGAFUSION(fusion_data) - ch_versions = ch_versions.mix(MEGAFUSION.out.versions) + AGAT_CONVERTSPGFF2TSV(FUSIONINSPECTOR.out.out_gtf) + ch_versions = ch_versions.mix(AGAT_CONVERTSPGFF2TSV.out.versions) + + fusion_data = FUSIONINSPECTOR.out.tsv_coding_effect.join(AGAT_CONVERTSPGFF2TSV.out.tsv).join(fusionreport_out).join(fusionreport_csv) + VCF_COLLECT(fusion_data, ch_hgnc_ref, ch_hgnc_date) + ch_versions = ch_versions.mix(VCF_COLLECT.out.versions) if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only && !params.skip_vis) { - bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) - ARRIBA_VISUALISATION(bam_sorted_indexed_fusions, ch_gtf, ch_arriba_ref_protein_domains, ch_arriba_ref_cytobands) + ch_bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) + ARRIBA_VISUALISATION(ch_bam_sorted_indexed_fusions, ch_gtf, ch_arriba_ref_protein_domains, ch_arriba_ref_cytobands) ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) + ch_arriba_visualisation = ARRIBA_VISUALISATION.out.pdf } emit: - versions = ch_versions.ifEmpty(null) + ch_arriba_visualisation + versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index 478986a4..632ba2a3 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -6,28 +6,26 @@ workflow FUSIONREPORT_WORKFLOW { reads fusionreport_ref arriba_fusions - pizzly_fusions - squid_fusions starfusion_fusions fusioncatcher_fusions main: ch_versions = Channel.empty() ch_report = Channel.empty() + ch_csv = Channel.empty() if (!params.fusioninspector_only) { reads_fusions = reads .join(arriba_fusions, remainder: true) - .join(pizzly_fusions, remainder: true) - .join(squid_fusions, remainder: true) .join(starfusion_fusions, remainder: true) .join(fusioncatcher_fusions, remainder: true) - FUSIONREPORT(reads_fusions, fusionreport_ref) + FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) ch_report = FUSIONREPORT.out.report + ch_csv = FUSIONREPORT.out.csv } else { ch_fusion_list = reads.combine(Channel.value(file(params.fusioninspector_fusions, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } @@ -39,6 +37,8 @@ workflow FUSIONREPORT_WORKFLOW { versions = ch_versions.ifEmpty(null) fusion_list = ch_fusion_list fusion_list_filtered = ch_fusion_list_filtered - report = ch_report.ifEmpty(null) + report = ch_report.ifEmpty(null) + csv = ch_csv.ifEmpty(null) + } diff --git a/subworkflows/local/pizzly_workflow.nf b/subworkflows/local/pizzly_workflow.nf deleted file mode 100644 index 7675432b..00000000 --- a/subworkflows/local/pizzly_workflow.nf +++ /dev/null @@ -1,37 +0,0 @@ -include { KALLISTO_QUANT } from '../../modules/local/kallisto/quant/main' -include { PIZZLY } from '../../modules/local/pizzly/detect/main' - -workflow PIZZLY_WORKFLOW { - take: - reads - ch_gtf - ch_transcript - - main: - ch_versions = Channel.empty() - ch_dummy_file = file("$baseDir/assets/dummy_file_pizzly.txt", checkIfExists: true) - - if ((params.pizzly || params.all) && !params.fusioninspector_only) { - if (params.pizzly_fusions) { - ch_pizzly_fusions = reads.combine(Channel.value(file(params.pizzly_fusions, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } - } else { - KALLISTO_QUANT(reads, params.pizzly_ref ) - ch_versions = ch_versions.mix(KALLISTO_QUANT.out.versions) - - PIZZLY( KALLISTO_QUANT.out.txt, ch_transcript, ch_gtf ) - ch_versions = ch_versions.mix(PIZZLY.out.versions) - - ch_pizzly_fusions = PIZZLY.out.fusions - } - } - else { - ch_pizzly_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } - } - - emit: - fusions = ch_pizzly_fusions - versions = ch_versions.ifEmpty(null) - } - diff --git a/subworkflows/local/qc_workflow.nf b/subworkflows/local/qc_workflow.nf index bdf887d1..576701e5 100644 --- a/subworkflows/local/qc_workflow.nf +++ b/subworkflows/local/qc_workflow.nf @@ -2,9 +2,9 @@ // Check input samplesheet and get read channels // -include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq/main' include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/local/picard/collectrnaseqmetrics/main' -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/picard/markduplicates/main' +include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/gatk4/markduplicates/main' +include { PICARD_COLLECTINSERTSIZEMETRICS } from '../../modules/nf-core/picard/collectinsertsizemetrics/main' workflow QC_WORKFLOW { take: @@ -19,24 +19,24 @@ workflow QC_WORKFLOW { main: ch_versions = Channel.empty() - QUALIMAP_RNASEQ(ch_bam_sorted, ch_chrgtf) - ch_versions = ch_versions.mix(QUALIMAP_RNASEQ.out.versions) - ch_qualimap_qc = Channel.empty().mix(QUALIMAP_RNASEQ.out.results) - PICARD_COLLECTRNASEQMETRICS(ch_bam_sorted_indexed, ch_refflat, ch_rrna_interval) ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions) ch_rnaseq_metrics = Channel.empty().mix(PICARD_COLLECTRNASEQMETRICS.out.metrics) - PICARD_MARKDUPLICATES(ch_bam_sorted, ch_fasta, ch_fai) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) - ch_duplicate_metrics = Channel.empty().mix(PICARD_MARKDUPLICATES.out.metrics) + GATK4_MARKDUPLICATES(ch_bam_sorted, ch_fasta.map { meta, fasta -> [ fasta ]}, ch_fai.map { meta, fasta_fai -> [ fasta_fai ]}) + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + ch_duplicate_metrics = Channel.empty().mix(GATK4_MARKDUPLICATES.out.metrics) + + PICARD_COLLECTINSERTSIZEMETRICS(ch_bam_sorted) + ch_versions = ch_versions.mix(PICARD_COLLECTINSERTSIZEMETRICS.out.versions) + ch_insertsize_metrics = Channel.empty().mix(PICARD_COLLECTINSERTSIZEMETRICS.out.metrics) emit: versions = ch_versions.ifEmpty(null) - qualimap_qc = ch_qualimap_qc rnaseq_metrics = ch_rnaseq_metrics duplicate_metrics = ch_duplicate_metrics + insertsize_metrics = ch_insertsize_metrics } diff --git a/subworkflows/local/squid_workflow.nf b/subworkflows/local/squid_workflow.nf deleted file mode 100644 index c4f29425..00000000 --- a/subworkflows/local/squid_workflow.nf +++ /dev/null @@ -1,80 +0,0 @@ -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC } from '../../modules/nf-core/samtools/view/main' -include { SQUID } from '../../modules/local/squid/detect/main' -include { SQUID_ANNOTATE } from '../../modules/local/squid/annotate/main' -include { STAR_ALIGN as STAR_FOR_SQUID } from '../../modules/nf-core/star/align/main' - -workflow SQUID_WORKFLOW { - - take: - reads - ch_gtf - ch_starindex_ensembl_ref - ch_fasta - - main: - ch_versions = Channel.empty() - ch_dummy_file = file("$baseDir/assets/dummy_file_squid.txt", checkIfExists: true) - - if ((params.squid || params.all) && !params.fusioninspector_only) { - if (params.squid_fusions){ - ch_squid_fusions = reads.combine(Channel.value(file(params.squid_fusions, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } - } else { - - STAR_FOR_SQUID(reads, ch_starindex_ensembl_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') - ch_versions = ch_versions.mix(STAR_FOR_SQUID.out.versions) - - STAR_FOR_SQUID.out.sam - .map { meta, sam -> - return [meta, sam, []] - }.set { chimeric_sam } - - - - SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC (chimeric_sam, ch_fasta, []) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.versions) - - SAMTOOLS_SORT_FOR_SQUID_CHIMERIC (SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.versions) - - bam_chimeric = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) - - if (params.cram.contains('squid')){ - SAMTOOLS_INDEX_FOR_SQUID(STAR_FOR_SQUID.out.bam_sorted) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID.out.versions) - SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.versions) - - bam_sorted_indexed = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_SQUID.out.bai) - chimeric_sorted_indexed = SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam.join(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.bai) - - SAMTOOLS_VIEW_FOR_SQUID_CRAM (bam_sorted_indexed, ch_fasta, []) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) - SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC (chimeric_sorted_indexed, ch_fasta, []) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) - } - - SQUID (bam_chimeric) - ch_versions = ch_versions.mix(SQUID.out.versions) - - SQUID_ANNOTATE (SQUID.out.fusions, ch_gtf) - ch_versions = ch_versions.mix(SQUID_ANNOTATE.out.versions) - - ch_squid_fusions = SQUID_ANNOTATE.out.fusions_annotated - } - } - else { - ch_squid_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } - } - - emit: - fusions = ch_squid_fusions - versions = ch_versions.ifEmpty(null) - } - diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index c9ba4bf3..38264c01 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -1,7 +1,8 @@ -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION } from '../../modules/nf-core/samtools/index/main' -include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main' -include { STARFUSION } from '../../modules/local/starfusion/detect/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION_CRAM } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main' +include { STARFUSION } from '../../modules/local/starfusion/detect/main' workflow STARFUSION_WORKFLOW { take: @@ -33,6 +34,9 @@ workflow STARFUSION_WORKFLOW { if (params.cram.contains('starfusion')){ SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) + + SAMTOOLS_INDEX_FOR_STARFUSION_CRAM (SAMTOOLS_VIEW_FOR_STARFUSION.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION_CRAM.out.versions) } reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) @@ -41,16 +45,19 @@ workflow STARFUSION_WORKFLOW { ch_starfusion_fusions = STARFUSION.out.fusions ch_star_stats = STAR_FOR_STARFUSION.out.log_final + ch_star_gene_count = STAR_FOR_STARFUSION.out.read_per_gene_tab } } else { ch_starfusion_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } ch_star_stats = Channel.empty() + ch_star_gene_count = Channel.empty() } emit: fusions = ch_starfusion_fusions star_stats = ch_star_stats + star_gene_count = ch_star_gene_count ch_bam_sorted = ch_align.ifEmpty([[],[]]) ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty([[],[],[]]) versions = ch_versions.ifEmpty(null) diff --git a/subworkflows/local/stringtie_workflow.nf b/subworkflows/local/stringtie_workflow.nf index d9e1c6c3..93a50e0c 100644 --- a/subworkflows/local/stringtie_workflow.nf +++ b/subworkflows/local/stringtie_workflow.nf @@ -12,7 +12,7 @@ workflow STRINGTIE_WORKFLOW { ch_stringtie_gtf = Channel.empty() if ((params.stringtie || params.all) && !params.fusioninspector_only) { - STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf) + STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf.map { meta, gtf -> [ gtf ]}) ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) STRINGTIE_STRINGTIE @@ -21,7 +21,7 @@ workflow STRINGTIE_WORKFLOW { .map { it -> it[1] } .set { stringtie_gtf } - STRINGTIE_MERGE ( stringtie_gtf, ch_chrgtf ) + STRINGTIE_MERGE (stringtie_gtf, ch_chrgtf.map { meta, gtf -> [ gtf ]}) ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf } diff --git a/subworkflows/local/trim_workflow.nf b/subworkflows/local/trim_workflow.nf index bf3781f8..61cce26f 100644 --- a/subworkflows/local/trim_workflow.nf +++ b/subworkflows/local/trim_workflow.nf @@ -1,5 +1,3 @@ -include { REFORMAT } from '../../modules/local/reformat/main' -include { FASTQC as FASTQC_FOR_TRIM } from '../../modules/nf-core/fastqc/main' include { FASTP } from '../../modules/nf-core/fastp/main' include { FASTQC as FASTQC_FOR_FASTP } from '../../modules/nf-core/fastqc/main' @@ -10,18 +8,11 @@ workflow TRIM_WORKFLOW { main: ch_versions = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_trimmed = Channel.empty() - if (params.trim) { - - REFORMAT( reads ) - ch_versions = ch_versions.mix(REFORMAT.out.versions) - FASTQC_FOR_TRIM (REFORMAT.out.reads_out) - ch_versions = ch_versions.mix(FASTQC_FOR_TRIM.out.versions) - - ch_reads_all = reads - ch_reads_fusioncatcher = REFORMAT.out.reads_out - } - else if (params.fastp_trim) { + if (params.fastp_trim) { FASTP(reads, params.adapter_fasta, false, false) ch_versions = ch_versions.mix(FASTP.out.versions) @@ -30,6 +21,9 @@ workflow TRIM_WORKFLOW { ch_reads_all = FASTP.out.reads ch_reads_fusioncatcher = ch_reads_all + ch_fastp_html = FASTP.out.html + ch_fastp_json = FASTP.out.json + ch_fastqc_trimmed = FASTQC_FOR_FASTP.out.zip } else { @@ -40,6 +34,9 @@ workflow TRIM_WORKFLOW { emit: ch_reads_all ch_reads_fusioncatcher + ch_fastp_html + ch_fastp_json + ch_fastqc_trimmed versions = ch_versions.ifEmpty(null) } diff --git a/tower.yml b/tower.yml index 5813f5d3..2edf5a7f 100644 --- a/tower.yml +++ b/tower.yml @@ -13,20 +13,16 @@ reports: display: "FusionInspector TSV report" "**/fusionreport/*/*_fusionreport_index.html": display: "Fusion-report HTML report" - "**/megafusion/*_fusion_data.vcf": + "**/vcf/*_fusion_data.vcf.gz": display: "Collected statistics on each fusion fed to FusionInspector in VCF format" "**/picard/*.MarkDuplicates.metrics.txt": display: "Picard: Metrics from CollectRnaMetrics" "**/picard/*_rna_metrics.txt": - display: "Picard: Metrics from MarkDuplicates" - "**/pizzly/*.pizzly.txt": - display: "Pizzly identified fusion TXT report" - "**/qualimap/qualimapReport.html": - display: "Qualimap HTML report from STAR_FOR_STARFUSION alignment" - "**/qualimap/rnaseq_qc_results.txt": - display: "Qualimap QC results from STAR_FOR_STARFUSION alignment in TXT format" - "**/squid/*.squid.fusions.annotated.txt": - display: "Squid identified fusion TXT report" + display: "GATK4: Metrics from MarkDuplicates" + "**/picard/*insert*size*metrics.txt": + display: "GATK4: Metrics from InsertSizeMetrics" + "**/picard/*pdf": + display: "GATK4: InsertSizeMetrics histogram" "**/star_for_starfusion/*ReadsPerGene.out.tab": display: "Number of reads per gene" "**/starfusion/*.starfusion.fusion_predictions.tsv": diff --git a/workflows/build_references.nf b/workflows/build_references.nf index 6a03edb6..0ebf3c08 100644 --- a/workflows/build_references.nf +++ b/workflows/build_references.nf @@ -8,6 +8,7 @@ include { ARRIBA_DOWNLOAD } from '../modules/local/arriba/downlo include { ENSEMBL_DOWNLOAD } from '../modules/local/ensembl/main' include { FUSIONCATCHER_DOWNLOAD } from '../modules/local/fusioncatcher/download/main' include { FUSIONREPORT_DOWNLOAD } from '../modules/local/fusionreport/download/main' +include { HGNC_DOWNLOAD } from '../modules/local/hgnc/main' include { STARFUSION_BUILD } from '../modules/local/starfusion/build/main' include { STARFUSION_DOWNLOAD } from '../modules/local/starfusion/download/main' include { GTF_TO_REFFLAT } from '../modules/local/uscs/custom_gtftogenepred/main' @@ -21,7 +22,6 @@ include { CONVERT2BED } from '../modules/local/convert2bed/m include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' -include { KALLISTO_INDEX as PIZZLY_INDEX } from '../modules/nf-core/kallisto/index/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' @@ -36,6 +36,7 @@ workflow BUILD_REFERENCES { def fake_meta = [:] fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" ENSEMBL_DOWNLOAD( params.ensembl_version, params.genome, fake_meta ) + HGNC_DOWNLOAD( ) SAMTOOLS_FAIDX(ENSEMBL_DOWNLOAD.out.fasta, [[],[]]) @@ -47,7 +48,7 @@ workflow BUILD_REFERENCES { GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) - if (params.starindex || params.all || params.starfusion || params.arriba || params.squid ) { + if (params.starindex || params.all || params.starfusion || params.arriba) { STAR_GENOMEGENERATE( ENSEMBL_DOWNLOAD.out.fasta, ENSEMBL_DOWNLOAD.out.gtf ) } @@ -59,10 +60,6 @@ workflow BUILD_REFERENCES { FUSIONCATCHER_DOWNLOAD() } - if (params.pizzly || params.all) { - PIZZLY_INDEX( ENSEMBL_DOWNLOAD.out.transcript ) - } - if (params.starfusion || params.all) { if (params.starfusion_build){ STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.fasta, ENSEMBL_DOWNLOAD.out.chrgtf ) diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 6f498e11..8319b49e 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -19,6 +19,8 @@ WorkflowRnafusion.initialise(params, log) if (file(params.input).exists() || params.build_references) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet does not exist or was not specified!' } if (params.fusioninspector_only && !params.fusioninspector_fusions) { exit 1, 'Parameter --fusioninspector_fusions PATH_TO_FUSION_LIST expected with parameter --fusioninspector_only'} +if (params.tools_cutoff < 1) { exit 1, 'Parameter: --tools_cutoff should be >= 1'} + ch_chrgtf = params.starfusion_build ? Channel.fromPath(params.chrgtf).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_annot.gtf").map { it -> [[id:it.Name], it] }.collect() ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() @@ -30,8 +32,8 @@ ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() - - +ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() +ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.Name], it] }.collect() ch_gtf = Channel.fromPath(params.gtf).map { it -> [[id:it.Name], it] }.collect() ch_transcript = Channel.fromPath(params.transcript).map { it -> [[id:it.Name], it] }.collect() @@ -54,7 +56,6 @@ if (params_fasta_path_uri){ else { for (param in checkPathParamList) if ((param.toString())!= file(param).toString() && !params.build_references) { exit 1, "Problem with ${param}: ABSOLUTE PATHS are required! Check for trailing '/' at the end of paths too." } } -if ((params.squid || params.all) && params.ensembl_version != 102) { exit 1, 'Ensembl version is not supported by squid' } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -80,9 +81,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil include { INPUT_CHECK } from '../subworkflows/local/input_check' include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' -include { PIZZLY_WORKFLOW } from '../subworkflows/local/pizzly_workflow' include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' -include { SQUID_WORKFLOW } from '../subworkflows/local/squid_workflow' include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' @@ -180,26 +179,6 @@ workflow RNAFUSION { ) ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions.first().ifEmpty(null)) - // Run pizzly/kallisto - - PIZZLY_WORKFLOW ( - ch_reads_all, - ch_gtf, - ch_transcript - ) - ch_versions = ch_versions.mix(PIZZLY_WORKFLOW.out.versions.first().ifEmpty(null)) - - -// Run squid - - SQUID_WORKFLOW ( - ch_reads_all, - ch_gtf, - ch_starindex_ensembl_ref, - ch_fasta - ) - ch_versions = ch_versions.mix(SQUID_WORKFLOW.out.versions.first().ifEmpty(null)) - //Run STAR fusion STARFUSION_WORKFLOW ( @@ -231,8 +210,6 @@ workflow RNAFUSION { ch_reads_all, ch_fusionreport_ref, ARRIBA_WORKFLOW.out.fusions, - PIZZLY_WORKFLOW.out.fusions, - SQUID_WORKFLOW.out.fusions, STARFUSION_WORKFLOW.out.fusions, FUSIONCATCHER_WORKFLOW.out.fusions ) @@ -245,10 +222,13 @@ workflow RNAFUSION { FUSIONREPORT_WORKFLOW.out.fusion_list, FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, FUSIONREPORT_WORKFLOW.out.report, + FUSIONREPORT_WORKFLOW.out.csv, STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, ch_chrgtf, ch_arriba_ref_protein_domains, - ch_arriba_ref_cytobands + ch_arriba_ref_cytobands, + ch_hgnc_ref, + ch_hgnc_date ) ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions.first().ifEmpty(null)) @@ -284,10 +264,15 @@ workflow RNAFUSION { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.qualimap_qc.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) @@ -315,6 +300,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)