generated from CCBR/CCBR_SnakemakeTemplate
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
gather isa and rmat stats... scripts added
- Loading branch information
Showing
5 changed files
with
261 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#!/usr/bin/env Rscript | ||
# This script is used to concatenate (row bind) all results from IsoformSwitchAnalyzer. | ||
# ISA generated 4 types of files: | ||
# 1. top_switched_genes.tsv which are sorted by gene_switch_q_value | ||
# 2. top_switched_isoforms.tsv which are sorted by isoform_switch_q_value | ||
# 3. splicingEnrichment.tsv which are sorted by propUpQval | ||
# 4. genomeWideSplicing.tsv which are sorted by wilcoxQval | ||
# | ||
# Eg: | ||
# | ||
# Rscript gather_isa_stats.R \ | ||
# -d /data/Ziegelbauer_lab/ccbr1140/samples_1/results \ | ||
# -p top_switched_genes.tsv \ | ||
# -s gene_switch_q_value \ | ||
# -o top_switched_genes.all_samples.tsv | ||
|
||
# | ||
suppressPackageStartupMessages(library("argparse")) | ||
|
||
# create parser object | ||
parser <- ArgumentParser() | ||
|
||
# specify our desired options | ||
# by default ArgumentParser will add an help option | ||
|
||
parser$add_argument("-d", "--dirname", | ||
type="character", | ||
help="results dir of the pipeline or dir where to search recursively for files with given pattern", | ||
required=TRUE) | ||
|
||
|
||
args <- parser$parse_args() | ||
|
||
suppressPackageStartupMessages(library("tidyverse")) | ||
suppressPackageStartupMessages(library("openxlsx")) | ||
|
||
wd=args$dirname | ||
setwd(wd) | ||
|
||
globpatterns=c("top_switched_genes.tsv","top_switched_isoforms.tsv","splicingEnrichment.tsv","genomeWideSplicing.tsv") | ||
sortcolumns=c("gene_switch_q_value","isoform_switch_q_value","propUpQval","wilcoxQval") | ||
outfiles=c("top_switched_genes.all_samples.tsv","top_switched_isoforms.all_samples.tsv","splicingEnrichment.all_samples.tsv","genomeWideSplicing.all_samples.tsv") | ||
|
||
for (i in 1:length(globpatterns)){ | ||
glob_pattern=globpatterns[i] | ||
sort_column=sortcolumns[i] | ||
outfile=outfiles[i] | ||
|
||
files=list.files(pattern = glob_pattern,recursive = TRUE) | ||
files | ||
|
||
read_input_file<-function(fp){ | ||
df=read.csv(fp,header=TRUE,sep="\t", | ||
check.names = FALSE, | ||
comment.char = "#", | ||
strip.white = TRUE) | ||
return(df) | ||
} | ||
|
||
for (i in 1:length(files)){ | ||
x=read_input_file(files[i]) | ||
if (i==1){ | ||
d=x | ||
} else { | ||
d=rbind(d,x) | ||
} | ||
rm(x) | ||
} | ||
rm(i) | ||
|
||
d=d[order(d[,c(sort_column)]),] | ||
write.table(d, | ||
file=outfile, | ||
row.names = FALSE, | ||
col.names = TRUE, | ||
quote = FALSE, | ||
sep="\t") | ||
} | ||
|
||
read2df=function(f){ | ||
d=read.csv(f,header=TRUE, | ||
sep="\t", | ||
check.names=FALSE, | ||
strip.white = TRUE, | ||
comment.char = "#") | ||
return(as.data.frame(d)) | ||
} | ||
|
||
list_of_datasets<-list("top_switched_genes"=read2df("top_switched_genes.all_samples.tsv"), | ||
"top_switched_isoforms"=read2df("top_switched_isoforms.all_samples.tsv"), | ||
"splicingEnrichment"=read2df("splicingEnrichment.all_samples.tsv"), | ||
"genomeWideSplicing"=read2df("genomeWideSplicing.all_samples.tsv")) | ||
|
||
|
||
write.xlsx(list_of_datasets, file = "isa.results.xlsx", overwrite = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
suppressPackageStartupMessages(library("argparse")) | ||
|
||
# create parser object | ||
parser <- ArgumentParser() | ||
|
||
# specify our desired options | ||
# by default ArgumentParser will add an help option | ||
|
||
parser$add_argument("-d", "--dirname", | ||
type="character", | ||
help="results dir of the pipeline or dir where to search recursively for files with given pattern", | ||
required=TRUE) | ||
|
||
|
||
args <- parser$parse_args() | ||
|
||
|
||
suppressPackageStartupMessages(library("tidyverse")) | ||
suppressPackageStartupMessages(library("openxlsx")) | ||
|
||
wd=args$dirname | ||
setwd(wd) | ||
|
||
# summary files only | ||
|
||
glob_pattern="summary.txt" | ||
sort_column="SignificantEventsJCEC" | ||
|
||
|
||
files=list.files(pattern = glob_pattern,recursive = TRUE) | ||
files | ||
# functions | ||
get_contrast<-function(fp){ | ||
return(unlist(strsplit(fp,split="/"))[1]) | ||
} | ||
read_input_file<-function(fp,contrast_name){ | ||
df=read.csv(fp,header=TRUE,sep="\t", | ||
check.names = FALSE, | ||
comment.char = "#", | ||
strip.white = TRUE) | ||
df$contrast_name=contrast_name | ||
return(df) | ||
} | ||
|
||
|
||
contrasts=unlist(lapply(files, get_contrast)) | ||
|
||
for (i in 1:length(contrasts)){ | ||
x=read_input_file(files[i],contrasts[i]) | ||
if (i==1){ | ||
d=x | ||
} else { | ||
d=rbind(d,x) | ||
} | ||
rm(x) | ||
} | ||
rm(i) | ||
|
||
d <- arrange(d,desc(SignificantEventsJCEC)) | ||
write.table(d,file="summarys.tsv",sep = "\t",quote = FALSE,row.names = FALSE,col.names = TRUE) | ||
|
||
|
||
# loop througth ASE events | ||
|
||
event_types=c("RI","A3SS","A5SS","MXE","SE") | ||
|
||
for (event_type in event_types){ | ||
glob_pattern=paste(event_type,"MATS.JCEC.txt",sep=".") | ||
# sort_column="SignificantEventsJCEC" | ||
|
||
|
||
files=list.files(pattern = glob_pattern,recursive = TRUE) | ||
files | ||
# functions | ||
get_contrast<-function(fp){ | ||
return(unlist(strsplit(fp,split="/"))[1]) | ||
} | ||
read_input_file<-function(fp,contrast_name){ | ||
print("Reading file") | ||
print(fp) | ||
df=read.csv(fp,header=TRUE,sep="\t", | ||
check.names = FALSE, | ||
comment.char = "#", | ||
strip.white = TRUE) | ||
if (nrow(df)!=0){ | ||
df$contrast_name=contrast_name | ||
} | ||
return(df) | ||
} | ||
|
||
|
||
|
||
contrasts=unlist(lapply(files, get_contrast)) | ||
|
||
for (i in 1:length(contrasts)){ | ||
x=read_input_file(files[i],contrasts[i]) | ||
if (i==1){ | ||
d=x | ||
} else { | ||
if (nrow(d)!=0){ | ||
d=rbind(d,x) | ||
} | ||
} | ||
rm(x) | ||
} | ||
rm(i) | ||
|
||
d<-d[order(d$FDR),] | ||
head(d) | ||
write.table(d,file=paste(event_type,"JCEC.all_contrasts.tsv",sep="."),sep = "\t",quote = FALSE,row.names = FALSE,col.names = TRUE) | ||
rm(d) | ||
} | ||
|
||
read2df=function(f){ | ||
d=read.csv(f,header=TRUE, | ||
sep="\t", | ||
check.names=FALSE, | ||
strip.white = TRUE, | ||
comment.char = "#") | ||
return(as.data.frame(d)) | ||
} | ||
list_of_datasets<-list("Summary"=read2df("summarys.tsv"), | ||
"RI"=read2df("RI.JCEC.all_contrasts.tsv"), | ||
"SE"=read2df("SE.JCEC.all_contrasts.tsv"), | ||
"A3SS"=read2df("A3SS.JCEC.all_contrasts.tsv"), | ||
"A5SS"=read2df("A5SS.JCEC.all_contrasts.tsv"), | ||
"MXE"=read2df("MXE.JCEC.all_contrasts.tsv")) | ||
|
||
|
||
write.xlsx(list_of_datasets, file = "rmats.results.xlsx", overwrite = TRUE) |