Skip to content

Commit

Permalink
gather isa and rmat stats... scripts added
Browse files Browse the repository at this point in the history
  • Loading branch information
kopardev committed Jan 4, 2022
1 parent c09ca2a commit 6544a9c
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 1 deletion.
4 changes: 3 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ rule all:
expand(join(WORKDIR,"rsem","isoformcounts","{sample}","{sample}.RSEM.isoforms.results"),sample=SAMPLES),
# rmats output
expand(join(RESULTSDIR,"{contrast}","rmats","summary.txt"),contrast=CONTRASTS),
join(RESULTSDIR,"rmats.results.xlsx"),
# IsoformSwitchAnalyzeR output
expand(join(RESULTSDIR,"{contrast}","isa","SplicingSummary.pdf"),contrast=CONTRASTS)
expand(join(RESULTSDIR,"{contrast}","isa","SplicingSummary.pdf"),contrast=CONTRASTS),
join(RESULTSDIR,"isa.results.xlsx")

15 changes: 15 additions & 0 deletions workflow/rules/isa.smk
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,18 @@ Rscript {params.rscript} \
-o {params.outdir}
"""

localrules: gather_isa_stats

rule gather_isa_stats:
input:
expand(join(RESULTSDIR,"{contrast}","isa","SplicingSummary.pdf"),contrast=CONTRASTS)
output:
xlsx=join(RESULTSDIR,"isa.results.xlsx")
params:
rscript=join(SCRIPTSDIR,"gather_isa_stats.R"),
resultsdir=RESULTSDIR
envmodules: TOOLS["R"]["version"]
shell:"""
set -exuf -o pipefail
Rscript {params.rscript} -d {params.resultsdir}
"""
16 changes: 16 additions & 0 deletions workflow/rules/rmats.smk
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,19 @@ python ${{RMATS_SRC}}/rmats.py \
--libType "fr-secondstrand" \
--tmp /lscratch/${{SLURM_JOB_ID}}/
"""

localrules: gather_rmats_stats

rule gather_rmats_stats:
input:
expand(join(RESULTSDIR,"{contrast}","rmats","summary.txt"),contrast=CONTRASTS)
output:
xlsx=join(RESULTSDIR,"rmats.results.xlsx")
params:
rscript=join(SCRIPTSDIR,"gather_rmats_stats.R"),
resultsdir=RESULTSDIR
envmodules: TOOLS["R"]["version"]
shell:"""
set -exuf -o pipefail
Rscript {params.rscript} -d {params.resultsdir}
"""
95 changes: 95 additions & 0 deletions workflow/scripts/gather_isa_stats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env Rscript
# This script is used to concatenate (row bind) all results from IsoformSwitchAnalyzer.
# ISA generated 4 types of files:
# 1. top_switched_genes.tsv which are sorted by gene_switch_q_value
# 2. top_switched_isoforms.tsv which are sorted by isoform_switch_q_value
# 3. splicingEnrichment.tsv which are sorted by propUpQval
# 4. genomeWideSplicing.tsv which are sorted by wilcoxQval
#
# Eg:
#
# Rscript gather_isa_stats.R \
# -d /data/Ziegelbauer_lab/ccbr1140/samples_1/results \
# -p top_switched_genes.tsv \
# -s gene_switch_q_value \
# -o top_switched_genes.all_samples.tsv

#
suppressPackageStartupMessages(library("argparse"))

# create parser object
parser <- ArgumentParser()

# specify our desired options
# by default ArgumentParser will add an help option

parser$add_argument("-d", "--dirname",
type="character",
help="results dir of the pipeline or dir where to search recursively for files with given pattern",
required=TRUE)


args <- parser$parse_args()

suppressPackageStartupMessages(library("tidyverse"))
suppressPackageStartupMessages(library("openxlsx"))

wd=args$dirname
setwd(wd)

globpatterns=c("top_switched_genes.tsv","top_switched_isoforms.tsv","splicingEnrichment.tsv","genomeWideSplicing.tsv")
sortcolumns=c("gene_switch_q_value","isoform_switch_q_value","propUpQval","wilcoxQval")
outfiles=c("top_switched_genes.all_samples.tsv","top_switched_isoforms.all_samples.tsv","splicingEnrichment.all_samples.tsv","genomeWideSplicing.all_samples.tsv")

for (i in 1:length(globpatterns)){
glob_pattern=globpatterns[i]
sort_column=sortcolumns[i]
outfile=outfiles[i]

files=list.files(pattern = glob_pattern,recursive = TRUE)
files

read_input_file<-function(fp){
df=read.csv(fp,header=TRUE,sep="\t",
check.names = FALSE,
comment.char = "#",
strip.white = TRUE)
return(df)
}

for (i in 1:length(files)){
x=read_input_file(files[i])
if (i==1){
d=x
} else {
d=rbind(d,x)
}
rm(x)
}
rm(i)

d=d[order(d[,c(sort_column)]),]
write.table(d,
file=outfile,
row.names = FALSE,
col.names = TRUE,
quote = FALSE,
sep="\t")
}

read2df=function(f){
d=read.csv(f,header=TRUE,
sep="\t",
check.names=FALSE,
strip.white = TRUE,
comment.char = "#")
return(as.data.frame(d))
}

list_of_datasets<-list("top_switched_genes"=read2df("top_switched_genes.all_samples.tsv"),
"top_switched_isoforms"=read2df("top_switched_isoforms.all_samples.tsv"),
"splicingEnrichment"=read2df("splicingEnrichment.all_samples.tsv"),
"genomeWideSplicing"=read2df("genomeWideSplicing.all_samples.tsv"))


write.xlsx(list_of_datasets, file = "isa.results.xlsx", overwrite = TRUE)
132 changes: 132 additions & 0 deletions workflow/scripts/gather_rmats_stats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env Rscript

suppressPackageStartupMessages(library("argparse"))

# create parser object
parser <- ArgumentParser()

# specify our desired options
# by default ArgumentParser will add an help option

parser$add_argument("-d", "--dirname",
type="character",
help="results dir of the pipeline or dir where to search recursively for files with given pattern",
required=TRUE)


args <- parser$parse_args()


suppressPackageStartupMessages(library("tidyverse"))
suppressPackageStartupMessages(library("openxlsx"))

wd=args$dirname
setwd(wd)

# summary files only

glob_pattern="summary.txt"
sort_column="SignificantEventsJCEC"


files=list.files(pattern = glob_pattern,recursive = TRUE)
files
# functions
get_contrast<-function(fp){
return(unlist(strsplit(fp,split="/"))[1])
}
read_input_file<-function(fp,contrast_name){
df=read.csv(fp,header=TRUE,sep="\t",
check.names = FALSE,
comment.char = "#",
strip.white = TRUE)
df$contrast_name=contrast_name
return(df)
}


contrasts=unlist(lapply(files, get_contrast))

for (i in 1:length(contrasts)){
x=read_input_file(files[i],contrasts[i])
if (i==1){
d=x
} else {
d=rbind(d,x)
}
rm(x)
}
rm(i)

d <- arrange(d,desc(SignificantEventsJCEC))
write.table(d,file="summarys.tsv",sep = "\t",quote = FALSE,row.names = FALSE,col.names = TRUE)


# loop througth ASE events

event_types=c("RI","A3SS","A5SS","MXE","SE")

for (event_type in event_types){
glob_pattern=paste(event_type,"MATS.JCEC.txt",sep=".")
# sort_column="SignificantEventsJCEC"


files=list.files(pattern = glob_pattern,recursive = TRUE)
files
# functions
get_contrast<-function(fp){
return(unlist(strsplit(fp,split="/"))[1])
}
read_input_file<-function(fp,contrast_name){
print("Reading file")
print(fp)
df=read.csv(fp,header=TRUE,sep="\t",
check.names = FALSE,
comment.char = "#",
strip.white = TRUE)
if (nrow(df)!=0){
df$contrast_name=contrast_name
}
return(df)
}



contrasts=unlist(lapply(files, get_contrast))

for (i in 1:length(contrasts)){
x=read_input_file(files[i],contrasts[i])
if (i==1){
d=x
} else {
if (nrow(d)!=0){
d=rbind(d,x)
}
}
rm(x)
}
rm(i)

d<-d[order(d$FDR),]
head(d)
write.table(d,file=paste(event_type,"JCEC.all_contrasts.tsv",sep="."),sep = "\t",quote = FALSE,row.names = FALSE,col.names = TRUE)
rm(d)
}

read2df=function(f){
d=read.csv(f,header=TRUE,
sep="\t",
check.names=FALSE,
strip.white = TRUE,
comment.char = "#")
return(as.data.frame(d))
}
list_of_datasets<-list("Summary"=read2df("summarys.tsv"),
"RI"=read2df("RI.JCEC.all_contrasts.tsv"),
"SE"=read2df("SE.JCEC.all_contrasts.tsv"),
"A3SS"=read2df("A3SS.JCEC.all_contrasts.tsv"),
"A5SS"=read2df("A5SS.JCEC.all_contrasts.tsv"),
"MXE"=read2df("MXE.JCEC.all_contrasts.tsv"))


write.xlsx(list_of_datasets, file = "rmats.results.xlsx", overwrite = TRUE)

0 comments on commit 6544a9c

Please sign in to comment.