Skip to content

Commit

Permalink
Implement ROSE
Browse files Browse the repository at this point in the history
  • Loading branch information
nictru committed Mar 26, 2024
1 parent 6490409 commit 57993c1
Show file tree
Hide file tree
Showing 10 changed files with 777 additions and 4 deletions.
666 changes: 666 additions & 0 deletions bin/rose.py

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc"
}
'GRCh38' {
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -37,6 +38,7 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc"
}
'CHM13' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -59,6 +61,7 @@ params {
macs_gsize = "1.87e9"
blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc"
}
'TAIR10' {
fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa"
Expand Down Expand Up @@ -294,6 +297,7 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc"
}
'hg19' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -308,6 +312,7 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc"
}
'mm10' {
fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -322,6 +327,7 @@ params {
macs_gsize = "1.87e9"
blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc"
}
'bosTau8' {
fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa"
Expand Down
19 changes: 19 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,25 @@ process {
withName: ".*:CHROMHMM:REHEADER_CONTROL" {
ext.prefix = {"${meta.id}_control"}
}

withName: BED_TO_GFF {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} { print \$1, \"bed2gff\", \"region\", \$2+1, \$3, \".\", \".\", \".\", \".\"}'"}
ext.prefix = {"$meta.id"}
ext.suffix = "gff"
}

withName: REFORMAT_GFF {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {if(!match(\$1, /^chr/)) \$1=\"chr\"\$1; \$2=\"seq_\"NR; print \$1, \$2, \"\", \$4, \$5, \"\", \$7, \"\", \$2}'"}
ext.prefix = {"${meta.id}_reformatted"}
ext.suffix = "gff"
}

withName: ROSE_OUTPUT_TO_BED {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {print \$1, \$4-1, \$5}'"}
ext.prefix = {"$meta.id"}
ext.suffix = "bed"
}

withName: ".*DYNAMITE:FILTER" {
ext.args = {"'BEGIN{OFS=\"\\t\"} NR==1 || (\$2 >= ${params.dynamite_min_regression} || \$2 <= -${params.dynamite_min_regression} )'"}
ext.prefix = {"${meta.id}.filtered"}
Expand Down
2 changes: 2 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ params.fasta = getGenomeAttribute('fasta')
params.gtf = getGenomeAttribute('gtf')
params.blacklist = getGenomeAttribute('blacklist')
params.pwms = getGenomeAttribute('pwms')
params.rose_ucsc = getGenomeAttribute('rose_ucsc')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -87,6 +88,7 @@ workflow NFCORE_TFACTIVITY {
samplesheet_bam,
PREPARE_GENOME.out.chrom_sizes,
params.chromhmm_states,
params.rose_ucsc,
params.window_size,
params.decay,
params.merge_samples,
Expand Down
31 changes: 31 additions & 0 deletions modules/local/rose/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process ROSE {
tag "$meta.id"
label 'process_single'

conda "conda-forge::mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6==fccb0c41a243c639e11dd1be7b74f563e624fcca-0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0':
'biocontainers/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0' }"

input:
tuple val(meta), path(gff)
path ucsc_file

output:
tuple val(meta), path("${gff.baseName}_STITCHED.gff")

script:
"""
rose.py \
-g ${ucsc_file} \
-i ${gff} \
-o ${gff.baseName}_STITCHED.gff \
-s 12500 \
-t 2500
"""

stub:
"""
touch "${gff.baseName}_STITCHED.gff"
"""
}
10 changes: 10 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,16 @@
"help_text": "This parameter is *mandatory* if `--genome` is not specified.",
"fa_icon": "far fa-file-code"
},
"rose_ucsc": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/plain",
"pattern": "^\\S+\\.ucsc$",
"description": "Path to ROSE UCSC file.",
"help_text": "This parameter is *mandatory* if `--genome` is not specified and input_bam is defined.",
"fa_icon": "far fa-file-code"
},
"igenomes_ignore": {
"type": "boolean",
"description": "Do not load the iGenomes reference config.",
Expand Down
7 changes: 4 additions & 3 deletions subworkflows/local/chromhmm.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ workflow CHROMHMM {
ch_joined = ch_signal.join(ch_control)
ch_mixed = ch_signal.mix(ch_control)

ch_versions = ch_versions.mix(REHEADER_SIGNAL.out.versions)
ch_versions = ch_versions.mix(REHEADER_CONTROL.out.versions)

ch_table = ch_joined .map{meta, signal, control -> [meta.condition, meta.antibody, signal.name, control.name]}
.collectFile() {
["cellmarkfiletable.tsv", it.join("\t") + "\n"]
Expand All @@ -50,8 +53,6 @@ workflow CHROMHMM {
n_states
)

LEARN_MODEL.out.transpose().view()

GET_RESULTS(LEARN_MODEL.out.transpose()
.map{meta, emmisions, bed ->
[meta + [id: bed.simpleName.split("_")[0]],
Expand All @@ -62,6 +63,6 @@ workflow CHROMHMM {
emit:
enhancers = ch_enhancers

versions = ch_versions // channel: [ versions.yml ]
versions = ch_versions
}

6 changes: 6 additions & 0 deletions subworkflows/local/peaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include { COMBINE_TABLES as AFFINITY_SUM } from '../../modules/local/combine_ta
include { FOOTPRINTING } from './footprinting'
include { MERGE_SAMPLES } from './merge_samples'
include { CHROMHMM } from './chromhmm'
include { ROSE } from './rose'

workflow PEAKS {

Expand All @@ -31,6 +32,7 @@ workflow PEAKS {
ch_samplesheet_bam
chrom_sizes
chromhmm_states
rose_ucsc

main:

Expand Down Expand Up @@ -60,6 +62,10 @@ workflow PEAKS {
}

CHROMHMM(ch_samplesheet_bam, chrom_sizes, chromhmm_states)
ROSE(CHROMHMM.out.enhancers, rose_ucsc)

ch_versions = ch_versions.mix(CHROMHMM.out.versions)
ch_versions = ch_versions.mix(ROSE.out.versions)

FILTER_PWMS(tfs, pwms)

Expand Down
30 changes: 30 additions & 0 deletions subworkflows/local/rose.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
include { GAWK as BED_TO_GFF } from "../../modules/nf-core/gawk"
include { GAWK as REFORMAT_GFF } from "../../modules/nf-core/gawk"
include { ROSE as RUN_ROSE } from "../../modules/local/rose"
include { GAWK as ROSE_OUTPUT_TO_BED } from "../../modules/nf-core/gawk"

workflow ROSE {
take:
ch_bed
ucsc_file

main:

ch_versions = Channel.empty()

BED_TO_GFF(ch_bed, [])
REFORMAT_GFF(BED_TO_GFF.out.output, [])

RUN_ROSE(REFORMAT_GFF.out.output, ucsc_file)
ROSE_OUTPUT_TO_BED(RUN_ROSE.out, [])

ch_versions = ch_versions.mix(BED_TO_GFF.out.versions)
ch_versions = ch_versions.mix(REFORMAT_GFF.out.versions)
ch_versions = ch_versions.mix(ROSE_OUTPUT_TO_BED.out.versions)

emit:
enhancers = ROSE_OUTPUT_TO_BED.out.output

versions = ch_versions
}

4 changes: 3 additions & 1 deletion workflows/tfactivity.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ workflow TFACTIVITY {
ch_samplesheet_bam
chrom_sizes
chromhmm_states
rose_ucsc

window_size
decay
Expand Down Expand Up @@ -98,7 +99,8 @@ workflow TFACTIVITY {
affinity_agg_method,
ch_samplesheet_bam,
chrom_sizes,
chromhmm_states
chromhmm_states,
rose_ucsc
)

DYNAMITE(
Expand Down

0 comments on commit 57993c1

Please sign in to comment.