Merge branch 'master' into docs-update-install

nextflow-io · Sep 5, 2024 · f987a32 · f987a32
2 parents 6efdfef + 6e866ae
commit f987a32
Show file tree

Hide file tree

Showing 76 changed files with 810 additions and 215 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -143,7 +143,7 @@ jobs:
       - name: Run tests
         run: |
           cat $HOME/.nextflow/scm
-          make assemble install
+          make clean assemble install
           bash test-ci.sh
         env:
           TEST_JDK: ${{ matrix.java_version }}

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-24.07.0-edge
+24.08.0-edge
diff --git a/changelog.txt b/changelog.txt
@@ -1,5 +1,62 @@
 NEXTFLOW CHANGE-LOG
 ===================
+24.08.0-edge - 4 Sep 2024
+- Add Google Batch warning when for conflicting disk image config (#5279) [96cb57cb]
+- Add support for Google Batch used specified boot images  (#5268) [0aaa6482]
+- Disable AWS spot retry (#5215) [f28fcb25]
+- Disable Google Batch automatic spot retries (#5223) [aad21533]
+- Disable automatic detection of virtual threads (#5270) [b3ba2c2d]
+- Fix  missing .command.env when eval is used and task runs on a cloud env [4a6b54aa]
+- Fix job array syntax for PBS/Torque executor (#5281) [d59f5fae]
+- Fix k8s client status cond is possible null in podState (#5264) [46672415]
+- Fix non-determinist behaviour of join operator with bag array as key (#5189) [e7dc0d69]
+- Fix stage retry on corrupted HTTP downloads  (#5275) [bf0cd326]
+- Support Azure Managed Identities in Fusion configuration logic (#5278) [a0bf8b40]
+- Use public.cr.seqera.io in place of AWS ECR [5a01f277]
+- Wave client logs improvement [5a37e617]
+- Bump amazoncorretto:21-al2023 [59aed581]
+- Bump [email protected] [97c4e08f]
+- Bump [email protected] [24133f2a]
+- Bump [email protected] [29f49ba7]
+- Bump [email protected] [bbc3adca]
+
+24.07.0-edge - 8 Aug 2024
+- Add runtime error for missing channel factory (#5170) [1f9210ab]
+- Apply k8s.cpuLimits to kuberun driver pod (#5160) [4300adf1]
+- Await build completion for all Wave containers [2b8117e9]
+- Deprecate module addParams() and params() (#5200) [82c97f8c]
+- Remove capsule launcher dependencies (#3395) [f15e4246]
+- Fix AWS Cloudwatch access when using custom log group name [30195838]
+- Fix Invalid AWS Fargate CPUs usage error reporting [d9c50e59]
+- Fix Prevent AwS Batch retry the job execution when the container does not exist [4e218f22]
+- Fix Prevent negative cpus values [af2e4ef7]
+- Fix ProcessUnrecoverableException detection [17ec6c96]
+- Fix aws logs group name (#5146) [643b16b0]
+- Fix readAllBytes with http files (#5202) [7e90ce62]
+- Improve Google Batch 5000x error class handling (#5141) [61b2205f]
+- Improve docs [7703f0d0]
+- Improve error message for topic channel feature flag (#5161) [c4d407fb]
+- Improve fusion docs (#5166) [abdd78f1]
+- Improve queue docs (#5154) [bc2fb409]
+- Improve version checking [e7a879de]
+- Link to other docs on the failOnIgnore option (#5122) [572f2116]
+- Make Google Batch auto retry codes configurable (#5148) [e562ce06]
+- More robust parsing of shm-size containerOptions (#5177) [b56802a3]
+- Prevent default output/error logs for job arrays (#5152) [71f6ed0b]
+- Bump amazon sdk to version 1.12.766 [cc6ec314]
+- Bump gradle 8.9 [a437e97f]
+- Bump jgit 6.10.0 [177dc474]
+- Bump [email protected] [46c69d77]
+- Bump [email protected] [26dcb604]
+- Bump [email protected] [c171b601]
+- Bump [email protected] [d7dd4611]
+- Bump [email protected] [c1175157]
+- Bump [email protected] [1eda221a]
+- Bump [email protected] [257bebfd]
+- Bump [email protected] [e8d643c2]
+- Bump pf4j to version 3.12.0 [96117b9a]
+- Bump wave-api to 0.11.1 [96ec4ded]
+
 24.04.4 - 1 Aug 2024
 - Fix parsing of shm-size containerOptions (#5177) [98cf0068]
 - Fix aws logs group name (#5146) [b2ab651c]
@@ -24,7 +81,7 @@ NEXTFLOW CHANGE-LOG
 - Bump jgit 6.10.0 [4cf6b9f7]
 
 24.04.3 - 9 Jul 2024
-- Add ability to override failOnError setting default via env variable (#5117) [ci fast] [6852429c]
+- Add ability to override failOnError setting default via env variable (#5117) [6852429c]
 - Fix normalization of consecutive slashes in uri path (#5114) [3f366b7e]
 - Fix executions hangs on finalisation exception (#5070) [4c207c23]
 - Bump [email protected] [55ec5ec5]

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM amazoncorretto:17-al2023
+FROM amazoncorretto:21-al2023
 RUN yum install -y procps-ng shadow-utils which
 
 ENV NXF_HOME=/.nextflow

diff --git a/docker/Makefile b/docker/Makefile
@@ -24,14 +24,10 @@ build-arm: dist/docker/arm64
 	docker buildx build --platform linux/arm64 --output=type=docker --progress=plain --tag nextflow/nextflow:${version} --build-arg TARGETPLATFORM=linux/arm64 .
 
 release: build
-	docker tag nextflow/nextflow:${version} nextflow/nextflow:latest
 	docker push nextflow/nextflow:${version}
-	docker push nextflow/nextflow:latest
 	#
-	docker tag nextflow/nextflow:${version} public.ecr.aws/seqera-labs/nextflow:${version}
-	docker tag nextflow/nextflow:${version} public.ecr.aws/seqera-labs/nextflow:latest
-	docker push public.ecr.aws/seqera-labs/nextflow:${version}
-	docker push public.ecr.aws/seqera-labs/nextflow:latest
+	docker tag nextflow/nextflow:${version} public.cr.seqera.io/nextflow/nextflow:${version}
+	docker push public.cr.seqera.io/nextflow/nextflow:${version}
 
 #Static builds can now be found at:
 #

diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
@@ -21,7 +21,7 @@
     <div class="nav-footer-logo">
         <a href="https://seqera.io/" target="_blank" title="Developed by Seqera Labs">
             Nextflow is developed by:<br>
-            <img src="_static/seqera-logo.svg" alt="Seqera Labs">
+            <img src="{{ pathto('_static/seqera-logo.svg', 1) }}" alt="Seqera Labs">
         </a>
     </div>
 {% endblock %}

diff --git a/docs/conf.py b/docs/conf.py
@@ -79,7 +79,7 @@
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-html_title = f"Nextflow v{release} documentation"
+html_title = f"Nextflow {release} documentation"
 
 # Get the current sha if not checked out at a specific version
 if len(release) == 0:

diff --git a/docs/config.md b/docs/config.md
@@ -197,7 +197,10 @@ The following settings are available:
 `aws.batch.maxSpotAttempts`
 : :::{versionadded} 22.04.0
   :::
-: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `5`)
+: :::{versionchanged} 24.08.0-edge
+  The default value was changed from `5` to `0`.
+  :::
+: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `0`)
 
 `aws.batch.maxTransferAttempts`
 : Max number of downloads attempts from S3 (default: `1`).
@@ -869,7 +872,10 @@ The following settings are available for Google Cloud Batch:
 `google.batch.maxSpotAttempts`
 : :::{versionadded} 23.11.0-edge
   :::
-: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `5`).
+: :::{versionchanged} 24.08.0-edge
+  The default value was changed from `5` to `0`.
+  :::
+: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `0`).
 : See also: `google.batch.autoRetryExitCodes`
 
 `google.project`
@@ -880,6 +886,11 @@ The following settings are available for Google Cloud Batch:
   :::
 : Define the set of allowed locations for VMs to be provisioned. See [Google documentation](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#locationpolicy) for details (default: no restriction).
 
+`google.batch.bootDiskImage`
+: :::{versionadded} 24.08.0-edge
+  :::
+: Set the image URI of the virtual machine boot disk, e.g `batch-debian`. See [Google documentation](https://cloud.google.com/batch/docs/vm-os-environment-overview#vm-os-image-options) for details (default: none).
+
 `google.batch.bootDiskSize`
 : Set the size of the virtual machine boot disk, e.g `50.GB` (default: none).
 

diff --git a/docs/fusion.md b/docs/fusion.md
@@ -44,7 +44,7 @@ configuration. For example:
 fusion.enabled = true
 wave.enabled = true
 process.executor = 'azure-batch'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then run your pipeline using the usual command:
@@ -71,7 +71,7 @@ wave.enabled = true
 process.executor = 'awsbatch'
 process.queue = '<YOUR BATCH QUEUE>'
 aws.region = '<YOUR AWS REGION>'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then you can run your pipeline using the following command:
@@ -146,7 +146,7 @@ configuration. For example:
 fusion.enabled = true
 wave.enabled = true
 process.executor = 'google-batch'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then run your pipeline using the usual command:
@@ -172,10 +172,10 @@ process.executor = 'k8s'
 k8s.context = '<YOUR K8S CONFIGURATION CONTEXT>'
 k8s.namespace = '<YOUR K8S NAMESPACE>'
 k8s.serviceAccount = '<YOUR K8S SERVICE ACCOUNT>'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
-The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is run as a pod in the Kubernetes clusters.
+The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is running as a pod in the Kubernetes clusters.
 
 The `k8s.namespace` represents the Kubernetes namespace where the jobs submitted by the pipeline execution should be executed.
 
@@ -191,9 +191,10 @@ nextflow run <YOUR PIPELINE> -work-dir s3://<YOUR BUCKET>/scratch
 You an also use Fusion and Kubernetes with Azure Blob Storage and Google Storage using the same deployment approach.
 :::
 
-### Local execution
+### Local execution with AWS S3
 
-Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.
+Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This
+configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.
 
 The AWS S3 bucket credentials should be made accessible via standard `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
 
@@ -204,6 +205,7 @@ docker.enabled = true
 fusion.enabled = true
 fusion.exportStorageCredentials = true
 wave.enabled = true
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then you can run your pipeline using the following command:
@@ -223,6 +225,55 @@ The option `fusion.exportStorageCredentials` leaks the AWS credentials on the ta
 This option should only be used for development purposes.
 :::
 
+### Local execution with Minio
+
+[Minio](https://min.io/) is an open source, enterprise grade, object storage compatible with AWS S3. Nextflow and Fusion
+can use Minio (or other S3-compatible object storages) as an alternative to AWS S3 in some deployment scenarios.
+
+This configuration requires the the use of Nextflow local execution and Docker (or similar container engine) for the
+execution of your pipeline tasks.
+
+For the same of this example, runs a local instance of Minio using this command:
+
+```
+docker run -p 9000:9000 \
+    --rm -d -p 9001:9001 \
+    -e "MINIO_ROOT_USER=admin" \
+    -e "MINIO_ROOT_PASSWORD=secret" \
+    quay.io/minio/minio server /data --console-address ":9001"
+```
+
+Open the Minio console opening in your browser this address `http://localhost:9001`, then create a credentials pair,
+and a bucket. For the sake of this example the bucket name `foobar` will be used.
+
+
+The following configuration should be added in your Nextflow configuration file:
+
+```groovy
+aws.accessKey = '<YOUR MINIO ACCESS KEY>'
+aws.secretKey = '<YOUR MINIO SECRET KEY>'
+aws.client.endpoint = 'http://localhost:9000'
+aws.client.s3PathStyleAccess = true
+wave.enabled = true
+fusion.enabled = true
+fusion.exportStorageCredentials = true
+docker.enabled = true
+tower.accessToken = '<your platform access token>' // optional
+```
+
+Then you can run your pipeline using the following command:
+
+```bash
+nextflow run <YOUR PIPELINE> -work-dir s3://foobar/scratch
+```
+
+Replace `<YOUR PIPELINE>` with a pipeline script and bucket or your choice:
+
+:::{warning}
+The option `fusion.exportStorageCredentials` leaks the AWS credentials on the task launcher script created by Nextflow.
+This option should only be used for development purposes.
+:::
+
 ## Advanced settings
 
 Fusion advanced configuration settings are described in the {ref}`Fusion <config-fusion>` section on the Nextflow configuration page.
diff --git a/docs/google.md b/docs/google.md
@@ -156,7 +156,7 @@ process myTask {
 
 :::{note}
 Using an instance template will overwrite the `accelerator` and `disk` directives, as well as the following Google Batch
-config options: `cpuPlatform`, `preemptible`, and `spot`.
+config options: `bootDiskImage`, `cpuPlatform`, `preemptible`, and `spot`.
 :::
 
 To use an instance template with GPUs, you must also set the `google.batch.installGpuDrivers` config option to `true`.

diff --git a/docs/module.md b/docs/module.md
@@ -98,6 +98,10 @@ workflow {
 
 ## Module parameters
 
+:::{deprecated} 24.07.0-edge
+As a best practice, parameters should be used in the entry workflow and passed to functions / processes / workflows as explicit inputs.
+:::
+
 A module script can define parameters using the same syntax as a Nextflow workflow script:
 
 ```groovy
@@ -247,7 +251,7 @@ baseDir
 
 Modules can define binary scripts that are locally scoped to the processes defined by the tasks.
 
-To enable this feature, enable the following flag in your pipeline script or configuration file:
+To enable this feature, enable the following flag in your pipeline configuration file:
 
 ```groovy
 nextflow.enable.moduleBinaries = true

diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy
@@ -757,7 +757,7 @@ class BashWrapperBuilder {
         result += copyFileToWorkDir(TaskRun.CMD_ERRFILE) + ' || true' + ENDL
         if( statsEnabled )
             result += copyFileToWorkDir(TaskRun.CMD_TRACE) + ' || true' + ENDL
-        if(  outputEnvNames )
+        if( outputEnvNames || outputEvals )
             result += copyFileToWorkDir(TaskRun.CMD_ENV) + ' || true' + ENDL
         return result
     }

diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/CrgExecutor.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/CrgExecutor.groovy
@@ -49,10 +49,8 @@ class CrgExecutor extends SgeExecutor {
 
         result << '-N' << getJobNameFor(task)
 
-        if( task !instanceof TaskArrayRun ) {
-            result << '-o' << quote(task.workDir.resolve(TaskRun.CMD_LOG))
-            result << '-j' << 'y'
-        }
+        result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG)))
+        result << '-j' << 'y'
 
         result << '-terse' << ''    // note: directive need to be returned as pairs
 

diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/LsfExecutor.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/LsfExecutor.groovy
@@ -70,9 +70,7 @@ class LsfExecutor extends AbstractGridExecutor implements TaskArrayExecutor {
      */
     protected List<String> getDirectives(TaskRun task, List<String> result) {
 
-        if( task !instanceof TaskArrayRun ) {
-            result << '-o' << task.workDir.resolve(TaskRun.CMD_LOG).toString()
-        }
+        result << '-o' << (task.isArray() ?  '/dev/null' : task.workDir.resolve(TaskRun.CMD_LOG).toString())
 
         // add other parameters (if any)
         if( task.config.queue ) {

diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/PbsExecutor.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/PbsExecutor.groovy
@@ -46,15 +46,13 @@ class PbsExecutor extends AbstractGridExecutor implements TaskArrayExecutor {
 
         if( task instanceof TaskArrayRun ) {
             final arraySize = task.getArraySize()
-            result << '-J' << "0-${arraySize - 1}".toString()
+            result << '-t' << "0-${arraySize - 1}".toString()
         }
 
         result << '-N' << getJobNameFor(task)
 
-        if( task !instanceof TaskArrayRun ) {
-            result << '-o' << quote(task.workDir.resolve(TaskRun.CMD_LOG))
-            result << '-j' << 'oe'
-        }
+        result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG)))
+        result << '-j' << 'oe'
 
         // the requested queue name
         if( task.config.queue ) {
@@ -190,7 +188,7 @@ class PbsExecutor extends AbstractGridExecutor implements TaskArrayExecutor {
 
     @Override
     String getArrayIndexName() {
-        return 'PBS_ARRAY_INDEX'
+        return 'PBS_ARRAYID'
     }
 
     @Override

diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/PbsProExecutor.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/PbsProExecutor.groovy
@@ -58,10 +58,8 @@ class PbsProExecutor extends PbsExecutor {
 
         result << '-N' << getJobNameFor(task)
 
-        if( task !instanceof TaskArrayRun ) {
-            result << '-o' << quote(task.workDir.resolve(TaskRun.CMD_LOG))
-            result << '-j' << 'oe'
-        }
+        result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG)))
+        result << '-j' << 'oe'
 
         // the requested queue name
         if( task.config.queue ) {
@@ -131,4 +129,9 @@ class PbsProExecutor extends PbsExecutor {
         DECODE_STATUS.get(status)
     }
 
+    @Override
+    String getArrayIndexName() {
+        return 'PBS_ARRAY_INDEX'
+    }
+
 }