From a0a7fa58adc43e8801b099be26cdb53a12fddfce Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Thu, 14 Jan 2021 15:27:03 +0000 Subject: [PATCH 1/5] fixing the inputs for the pipeline --- loading/tsv2hdf.nf | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/loading/tsv2hdf.nf b/loading/tsv2hdf.nf index 5a73ddf..00ad66f 100644 --- a/loading/tsv2hdf.nf +++ b/loading/tsv2hdf.nf @@ -15,8 +15,6 @@ tsv_to_process = Channel.fromPath(tsv_glob) /* Any previously generated HDF5 files in the hdf5_study_dir will be included in the chromosome + quant_method files. */ -hdf5_study_glob = new File(params.hdf5_study_dir, "*/file_*.h5") -hdf5_study = Channel.fromPath(hdf5_study_glob) /* @@ -30,7 +28,8 @@ process study_tsv_to_hdf5 { containerOptions "--bind $params.tsv_in" containerOptions "--bind $params.hdf5_study_dir" containerOptions "--bind $params.meta_table" - + + publishDir "$params.hdf5_study_dir", mode: 'copy' memory { 8.GB * task.attempt } @@ -43,7 +42,6 @@ process study_tsv_to_hdf5 { output: file "${chr}/*.h5" optional true into study - val true into study2hdf_complete """ mkdir $chr; @@ -64,7 +62,7 @@ process consolidate_hdfs_by_chrom { containerOptions "--bind $params.hdf5_chrom_dir" containerOptions "--bind $params.meta_table" - publishDir "$params.hdf5_chrom_dir", mode: 'copy' + publishDir "$params.hdf5_chrom_dir", mode: 'move' memory { 8.GB * task.attempt } maxRetries 3 @@ -73,8 +71,7 @@ process consolidate_hdfs_by_chrom { input: each chr from params.chromosomes each method from params.quant_methods - val flag from study2hdf_complete.collect() - file "${chr}/*.h5" from hdf5_study.collect() + file "${chr}/*.h5" from study.collect() output: file "file_${chr}.${method}.h5" optional true into hdf5_chrom @@ -84,4 +81,5 @@ process consolidate_hdfs_by_chrom { echo $method; eqtl-consolidate -in_dir $params.hdf5_study_dir -out_file file_${chr}.${method}.h5 -meta $params.meta_table -quant $method -chrom $chr """ + } From fea31a152032e6b00787794358538c481701c7a9 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Thu, 14 Jan 2021 16:41:01 +0000 Subject: [PATCH 2/5] fix singularity binding --- loading/nextflow.config | 19 +++++++++---------- loading/tsv2hdf.nf | 14 ++++---------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/loading/nextflow.config b/loading/nextflow.config index 72e2f09..91b7f08 100644 --- a/loading/nextflow.config +++ b/loading/nextflow.config @@ -8,22 +8,21 @@ params.quant_methods = ['ge', 'microarray', 'exon', 'tx', 'txrev'] 'study', 'qtl_group', 'quant_method', 'tissue_ontology_id','filename' where 'filename' stores the basenames of the input tsvs */ -params.meta_table = 'example_metadata.tsv' +params.meta_table = 'data/example_metadata.tsv' // The directory for the input tsvs: -params.tsv_in = './sumstats_tsvs/' +params.tsv_in = 'data/sumstats_small/' // The directories where the HDF5 are published (by study, by chromsome) -params.hdf5_study_dir = './bystudy/' -params.hdf5_chrom_dir = './bychr/' +params.hdf5_study_dir = 'data/hdf/bystudy/' +params.hdf5_chrom_dir = 'data/hdf/bychr/' -executor { - name = 'local' - queueSize = 1 -} +// The parent directory for your container to bind +params.data_dir = 'data/' -//// For LSF execution the exitReadTimeout may need increasing to -//// something much larger than you'd expect + +// For LSF execution the exitReadTimeout may need increasing to +// something much larger than you'd expect //executor { // name = 'lsf' // queueSize = 100 diff --git a/loading/tsv2hdf.nf b/loading/tsv2hdf.nf index 00ad66f..d16763e 100644 --- a/loading/tsv2hdf.nf +++ b/loading/tsv2hdf.nf @@ -25,11 +25,7 @@ tsv_to_process = Channel.fromPath(tsv_glob) process study_tsv_to_hdf5 { - containerOptions "--bind $params.tsv_in" - containerOptions "--bind $params.hdf5_study_dir" - containerOptions "--bind $params.meta_table" - - + containerOptions "--bind $params.data_dir" publishDir "$params.hdf5_study_dir", mode: 'copy' memory { 8.GB * task.attempt } @@ -58,11 +54,8 @@ Consolidate all chromosome + quant method combinations into their own HDF5 files process consolidate_hdfs_by_chrom { - containerOptions "--bind $params.hdf5_study_dir" - containerOptions "--bind $params.hdf5_chrom_dir" - containerOptions "--bind $params.meta_table" - - publishDir "$params.hdf5_chrom_dir", mode: 'move' + containerOptions "--bind $params.data_dir" + publishDir "$params.hdf5_chrom_dir", mode: 'copy' memory { 8.GB * task.attempt } maxRetries 3 @@ -79,6 +72,7 @@ process consolidate_hdfs_by_chrom { """ echo $chr; echo $method; + ls -lR $params.hdf5_study_dir; eqtl-consolidate -in_dir $params.hdf5_study_dir -out_file file_${chr}.${method}.h5 -meta $params.meta_table -quant $method -chrom $chr """ From cdf0f43cce50faf42a5844fe14546bd7f194cdcb Mon Sep 17 00:00:00 2001 From: jdhayhurst <38317975+jdhayhurst@users.noreply.github.com> Date: Thu, 14 Jan 2021 16:43:01 +0000 Subject: [PATCH 3/5] Update nextflow.config --- loading/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loading/nextflow.config b/loading/nextflow.config index 91b7f08..35402c5 100644 --- a/loading/nextflow.config +++ b/loading/nextflow.config @@ -11,7 +11,7 @@ params.quant_methods = ['ge', 'microarray', 'exon', 'tx', 'txrev'] params.meta_table = 'data/example_metadata.tsv' // The directory for the input tsvs: -params.tsv_in = 'data/sumstats_small/' +params.tsv_in = 'data/sumstats/' // The directories where the HDF5 are published (by study, by chromsome) params.hdf5_study_dir = 'data/hdf/bystudy/' From cd886a9714cc7ed99ffd07e621518a04a17ccc40 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Fri, 15 Jan 2021 10:43:09 +0000 Subject: [PATCH 4/5] fix LSF error strategy --- loading/tsv2hdf.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loading/tsv2hdf.nf b/loading/tsv2hdf.nf index d16763e..f43185f 100644 --- a/loading/tsv2hdf.nf +++ b/loading/tsv2hdf.nf @@ -30,7 +30,7 @@ process study_tsv_to_hdf5 { memory { 8.GB * task.attempt } maxRetries 3 - errorStrategy { task.exitStatus == 140 ? 'retry' : 'terminate' } + errorStrategy { task.exitStatus == 130 ? 'retry' : 'terminate' } input: each chr from params.chromosomes @@ -59,7 +59,7 @@ process consolidate_hdfs_by_chrom { memory { 8.GB * task.attempt } maxRetries 3 - errorStrategy { task.exitStatus == 140 ? 'retry' : 'terminate' } + errorStrategy { task.exitStatus == 130 ? 'retry' : 'terminate' } input: each chr from params.chromosomes From 840b65965394604638d4c6c01ab4db95bf5422c2 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Fri, 15 Jan 2021 10:48:01 +0000 Subject: [PATCH 5/5] fix LSF error strategy --- loading/tsv2hdf.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/loading/tsv2hdf.nf b/loading/tsv2hdf.nf index f43185f..47490fc 100644 --- a/loading/tsv2hdf.nf +++ b/loading/tsv2hdf.nf @@ -72,7 +72,6 @@ process consolidate_hdfs_by_chrom { """ echo $chr; echo $method; - ls -lR $params.hdf5_study_dir; eqtl-consolidate -in_dir $params.hdf5_study_dir -out_file file_${chr}.${method}.h5 -meta $params.meta_table -quant $method -chrom $chr """