-
Notifications
You must be signed in to change notification settings - Fork 0
/
Manuscript_Code.R
104 lines (73 loc) · 4.11 KB
/
Manuscript_Code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Preprocessing h5 Data ~ required for downstream steps
SO <- source("Preprocessing.R")
# garbage collection
rm(list = setdiff(ls(), "SO"))
gc()
# Classification by ModuleScores ~ Geneset averages, CNV for Malignant / Cholangiocyte
SO <- SO$value
source("ModuleScore.R")
library(stringr)
library(tidyverse)
#Basic Parameters:
[email protected]$pooled_ident <- str_sub([email protected]$orig.ident, 1, nchar([email protected]$orig.ident)-1)
# Append information from infercnv analysis
cnv_meta <- read.csv("~/files_for_analysis/CCBR_1190_epi_meta_w_cnv.csv")
cnv_meta$Barcode2 <- paste(cnv_meta$new_sample_name,sub(".*_([^_]+)$", "\\1", cnv_meta$Barcode),sep="_")
[email protected]$cnv_score <- cnv_meta$cnv_scores[match([email protected]$Barcode, cnv_meta$Barcode2)]
[email protected]$epi_ident <- cnv_meta$Likely_CellType[match([email protected]$Barcode, cnv_meta$Barcode2)]
meta <- [email protected]
meta <- meta %>% mutate(adj_Likely_CellType = case_when(
Likely_CellType == "Epithelial_cells" ~ epi_ident,
TRUE ~ Likely_CellType
))
[email protected]$adj_Likely_CellType <- meta$adj_Likely_CellType[match([email protected]$Barcode,meta$Barcode)]
SO.sub <- subset(SO, cells = SO$Barcode[!is.na(SO$adj_Likely_CellType)])
# Figure 5A ~ Dimension Reduction Plot with Celltype Counts
source("Plot_Metadata.R")
# Figure 5B ~ Barplot
cbPalette <- c("green","#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#e6beff","#9A6324","#800000","#aaffc3","#808000","yellow","grey")
g <- ggplot([email protected], aes(x=factor(pooled_ident), fill=factor(adj_Likely_CellType))) +
geom_bar(position="fill") + scale_fill_manual(values=cbPalette) + theme_classic() + xlab("Sample") + ylab("Proportion") + scale_x_discrete(labels=c("Vehicle","VP")) + theme(axis.text=element_text(size=12), legend.title= element_blank())
print(g)
# Figure 6A and Supplementary ~ Pathway Analysis
# Run DEG first before GSEA
#source("Differential_Gene_Analysis.R")
# file size restriction ~ msigDB v6.2 rbind with spike data available upon request
#source("Geneset_Enrichment.R")
# upregulated pathways
df_tot <- read.csv("~/files_for_analysis/Gsea_malign.csv")
df <- head(df_tot[order(df_tot$padj),],30)
df <- df[order(df$fraction_leadingEdge),]
df$pathway <- factor(df$pathway, levels = df$pathway)
g <- ggplot(df, aes(x = fraction_leadingEdge, y = pathway)) +
geom_point(aes(color = padj, size = size_leadingEdge), alpha = 0.5) +
scale_color_gradientn(colours = rainbow(5)) + labs(x='fraction leadingEdge', y=NULL, color='padj',size='size leadingEdge') + theme(axis.title = element_text(face='bold'), axis.text = element_text(face='bold')) + theme(legend.position="right") + theme_classic()
print(g)
# downregulated pathways to plot
down_path <- c("KEGG_MATURITY_ONSET_DIABETES_OF_THE_YOUNG",
"KEGG_STEROID_BIOSYNTHESIS",
"REACTOME_APOPTOTIC_CLEAVAGE_OF_CELL_ADHESION_PROTEINS",
"KEGG_PANTOTHENATE_AND_COA_BIOSYNTHESIS",
"REACTOME_HS_GAG_DEGRADATION",
"REACTOME_HS_GAG_BIOSYNTHESIS",
"REACTOME_CHOLESTEROL_BIOSYNTHESIS",
"REACTOME_REGULATION_OF_BETA_CELL_DEVELOPMENT",
"REACTOME_LIPOPROTEIN_METABOLISM",
"REACTOME_CELL_CELL_JUNCTION_ORGANIZATION",
"KEGG_ECM_RECEPTOR_INTERACTION")
df_down <- df_tot[df_tot$pathway %in% down_path,]
g <- ggplot(df_down, aes(x = fraction_leadingEdge, y = pathway)) +
geom_point(aes(color = padj, size = size_leadingEdge), alpha = 0.5) +
scale_color_gradientn(colours = rainbow(5)) + labs(x='fraction leadingEdge', y=NULL, color='padj',size='size leadingEdge') + theme(axis.title = element_text(face='bold'), axis.text = element_text(face='bold')) + theme(legend.position="right") + theme_bw()
print(g)
# Figure 6D and # Figure S5B ~ Dimension Reduction Plot by Gene Expression
source("Color_by_Gene.R")
# Prom1 as Cd133
colorByGene(object = SO.sub,
gene = c("Aldh1a1","Cd24a","Prom1","Epcam","Sox9"))
colorByGene(object = SO.sub,
gene = c("Cd3d","Ms4a1","Cd7","Cd4","Clec9a","Cd8a","Cd163","Foxp3"))
# Figure S5A ~ Violin
source("Violin.R")
# Figure S8 ~ GSEA Enrichment Plot
source("GSEA_plot_path.R")