Skip to content

Commit

Permalink
Remove check_max calls and copied function, and remove duplicated blo…
Browse files Browse the repository at this point in the history
…cks in SAGE config and replace with resourceLimits
  • Loading branch information
jfy133 committed Oct 4, 2024
1 parent 5b12fc2 commit b8f2140
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 89 deletions.
138 changes: 50 additions & 88 deletions conf/sage.config
Original file line number Diff line number Diff line change
@@ -1,30 +1,16 @@
// Config profile metadata
params {
config_profile_description = 'The Sage Bionetworks Nextflow Config Profile'
config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
}

// Leverage us-east-1 mirror of select human and mouse genomes
params {
igenomes_base = 's3://sage-igenomes/igenomes'
cpus = 4
max_cpus = 32
max_memory = 128.GB
max_time = 240.h
single_cpu_mem = 6.GB
}
config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'

// Enable retries globally for certain exit codes
process {
resourceLimits = [
memory: 128.GB,
cpus: 32,
time: 240.h
]
maxErrors = '-1'
maxRetries = 5
errorStrategy = { task.attempt <= 5 ? 'retry' : 'finish' }
// Leverage us-east-1 mirror of select human and mouse genomes
igenomes_base = 's3://sage-igenomes/igenomes'
cpus = 4
max_cpus = 32
max_memory = 128.GB
max_time = 240.h
single_cpu_mem = 6.GB
}

// Increase time limit to allow file transfers to finish
Expand All @@ -35,57 +21,67 @@ threadPool.FileTransfer.maxAwait = '24 hour'
aws {
region = "us-east-1"
client {
uploadMaxThreads = 4
uploadMaxThreads = 4
}
batch {
retryMode = 'built-in'
maxParallelTransfers = 1
maxTransferAttempts = 10
delayBetweenAttempts = '60 sec'
retryMode = 'built-in'
maxParallelTransfers = 1
maxTransferAttempts = 10
delayBetweenAttempts = '60 sec'
}
}

// Adjust default resource allocations (see `../docs/sage.md`)

process {

cpus = { check_max( 1 * factor(task, 2), 'cpus' ) }
memory = { check_max( 6.GB * factor(task, 1), 'memory' ) }
time = { check_max( 24.h * factor(task, 1), 'time' ) }
resourceLimits = [
memory: 128.GB,
cpus: 32,
time: 240.h
]

maxErrors = '-1'
maxRetries = 5
// Enable retries globally for certain exit codes
errorStrategy = { task.attempt <= 5 ? 'retry' : 'finish' }

cpus = { 1 * factor(task, 2) }
memory = { 6.GB * factor(task, 1) }
time = { 24.h * factor(task, 1) }

// Process-specific resource requirements
withLabel: 'process_single' {
cpus = { check_max( 1 * factor(task, 2), 'cpus' ) }
memory = { check_max( 6.GB * factor(task, 1), 'memory' ) }
time = { check_max( 24.h * factor(task, 1), 'time' ) }
withLabel: process_single {
cpus = { 1 * factor(task, 2) }
memory = { 6.GB * factor(task, 1) }
time = { 24.h * factor(task, 1) }
}
withLabel: 'process_low' {
cpus = { check_max( 2 * factor(task, 2), 'cpus' ) }
memory = { check_max( 12.GB * factor(task, 1), 'memory' ) }
time = { check_max( 24.h * factor(task, 1), 'time' ) }
withLabel: process_low {
cpus = { 2 * factor(task, 2) }
memory = { 12.GB * factor(task, 1) }
time = { 24.h * factor(task, 1) }
}
withLabel: 'process_medium' {
cpus = { check_max( 8 * factor(task, 2), 'cpus' ) }
memory = { check_max( 32.GB * factor(task, 1), 'memory' ) }
time = { check_max( 48.h * factor(task, 1), 'time' ) }
withLabel: process_medium {
cpus = { 8 * factor(task, 2) }
memory = { 32.GB * factor(task, 1) }
time = { 48.h * factor(task, 1) }
}
withLabel: 'process_high' {
cpus = { check_max( 16 * factor(task, 2), 'cpus' ) }
memory = { check_max( 64.GB * factor(task, 1), 'memory' ) }
time = { check_max( 96.h * factor(task, 1), 'time' ) }
withLabel: process_high {
cpus = { 16 * factor(task, 2) }
memory = { 64.GB * factor(task, 1) }
time = { 96.h * factor(task, 1) }
}
withLabel: 'process_long' {
time = { check_max( 96.h * factor(task, 1), 'time' ) }
withLabel: process_long {
time = { 96.h * factor(task, 1) }
}
withLabel: 'process_high_memory|memory_max' {
memory = { check_max( 128.GB * factor(task, 1), 'memory' ) }
memory = { 128.GB * factor(task, 1) }
}
withLabel: 'cpus_max' {
cpus = { check_max( 32 * factor(task, 2), 'cpus' ) }
withLabel: cpus_max {
cpus = { 32 * factor(task, 2) }
}

}


// Function to finely control the increase of the resource allocation
def factor(task, slow_factor = 1) {
if ( task.exitStatus in [143,137,104,134,139,247] ) {
Expand All @@ -94,37 +90,3 @@ def factor(task, slow_factor = 1) {
return 1 as int
}
}


// Function to ensure that resource requirements don't go
// beyond a maximum limit (copied here for Sarek v2)
def check_max(obj, type) {
if (type == 'memory') {
try {
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
try {
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}
1 change: 0 additions & 1 deletion docs/sage.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ This global configuration includes the following tweaks:
- Increase the default time limits because we run pipelines on AWS
- Increase the amount of time allowed for file transfers
- Improve reliability of file transfers with retries and reduced concurrency
- Define the `check_max()` function, which is missing in Sarek v2

## Additional information about iGenomes

Expand Down

0 comments on commit b8f2140

Please sign in to comment.