Skip to content

Commit

Permalink
Merge pull request #36 from eQTL-Catalogue/loader_pipeline
Browse files Browse the repository at this point in the history
Loader pipeline
  • Loading branch information
jdhayhurst authored Jan 15, 2021
2 parents e017f2a + 840b659 commit 791ed56
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 25 deletions.
19 changes: 9 additions & 10 deletions loading/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,21 @@ params.quant_methods = ['ge', 'microarray', 'exon', 'tx', 'txrev']
'study', 'qtl_group', 'quant_method', 'tissue_ontology_id','filename'
where 'filename' stores the basenames of the input tsvs
*/
params.meta_table = 'example_metadata.tsv'
params.meta_table = 'data/example_metadata.tsv'

// The directory for the input tsvs:
params.tsv_in = './sumstats_tsvs/'
params.tsv_in = 'data/sumstats/'

// The directories where the HDF5 are published (by study, by chromsome)
params.hdf5_study_dir = './bystudy/'
params.hdf5_chrom_dir = './bychr/'
params.hdf5_study_dir = 'data/hdf/bystudy/'
params.hdf5_chrom_dir = 'data/hdf/bychr/'

executor {
name = 'local'
queueSize = 1
}
// The parent directory for your container to bind
params.data_dir = 'data/'

//// For LSF execution the exitReadTimeout may need increasing to
//// something much larger than you'd expect

// For LSF execution the exitReadTimeout may need increasing to
// something much larger than you'd expect
//executor {
// name = 'lsf'
// queueSize = 100
Expand Down
21 changes: 6 additions & 15 deletions loading/tsv2hdf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ tsv_to_process = Channel.fromPath(tsv_glob)
/* Any previously generated HDF5 files in the hdf5_study_dir will be included
in the chromosome + quant_method files.
*/
hdf5_study_glob = new File(params.hdf5_study_dir, "*/file_*.h5")
hdf5_study = Channel.fromPath(hdf5_study_glob)


/*
Expand All @@ -27,23 +25,19 @@ hdf5_study = Channel.fromPath(hdf5_study_glob)

process study_tsv_to_hdf5 {

containerOptions "--bind $params.tsv_in"
containerOptions "--bind $params.hdf5_study_dir"
containerOptions "--bind $params.meta_table"

containerOptions "--bind $params.data_dir"
publishDir "$params.hdf5_study_dir", mode: 'copy'

memory { 8.GB * task.attempt }
maxRetries 3
errorStrategy { task.exitStatus == 140 ? 'retry' : 'terminate' }
errorStrategy { task.exitStatus == 130 ? 'retry' : 'terminate' }

input:
each chr from params.chromosomes
file tsv from tsv_to_process

output:
file "${chr}/*.h5" optional true into study
val true into study2hdf_complete

"""
mkdir $chr;
Expand All @@ -60,21 +54,17 @@ Consolidate all chromosome + quant method combinations into their own HDF5 files

process consolidate_hdfs_by_chrom {

containerOptions "--bind $params.hdf5_study_dir"
containerOptions "--bind $params.hdf5_chrom_dir"
containerOptions "--bind $params.meta_table"

containerOptions "--bind $params.data_dir"
publishDir "$params.hdf5_chrom_dir", mode: 'copy'

memory { 8.GB * task.attempt }
maxRetries 3
errorStrategy { task.exitStatus == 140 ? 'retry' : 'terminate' }
errorStrategy { task.exitStatus == 130 ? 'retry' : 'terminate' }

input:
each chr from params.chromosomes
each method from params.quant_methods
val flag from study2hdf_complete.collect()
file "${chr}/*.h5" from hdf5_study.collect()
file "${chr}/*.h5" from study.collect()

output:
file "file_${chr}.${method}.h5" optional true into hdf5_chrom
Expand All @@ -84,4 +74,5 @@ process consolidate_hdfs_by_chrom {
echo $method;
eqtl-consolidate -in_dir $params.hdf5_study_dir -out_file file_${chr}.${method}.h5 -meta $params.meta_table -quant $method -chrom $chr
"""

}

0 comments on commit 791ed56

Please sign in to comment.