-
Notifications
You must be signed in to change notification settings - Fork 2
/
.Rhistory
387 lines (387 loc) · 12.4 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
library(jsonlite)
library(httr)
hm <- GET("http://www.informatics.jax.org/diseasePortal/summary#hdp=phenotypes%3DAlzheimer%20disease%26results%3D250%26startIndex%3D0%26sort%3D%26dir%3Dasc%26tab%3Dgridtab")
html_table(hm)
library(rvest)
html_table(hm)
html_table(html(hm))
html_table(html(hm), fill=TRUE)
library(rdom)
install.packages("rdom")
library(httr)
hm <- GET("http://www.informatics.jax.org/diseasePortal/summary#hdp=phenotypes%3DAlzheimer%20disease%26results%3D250%26startIndex%3D0%26sort%3D%26dir%3Dasc%26tab%3Dgridtab")
content(hm)
library(readr)
mgi_allele <- read_table("datasource/MGI_alzheimer.txt")
View(mgi_allele)
mgi_allele <- read_table("datasource/MGI_alzheimer.txt", sep="\t", header=TRUE)
mgi_allele <- read.table("datasource/MGI_alzheimer.txt", sep="\t", header=TRUE)
mgi_allele <- read.table("datasource/MGI_alzheimer.txt", sep="\t", header=TRUE)
mgi_allele <- read_csv("datasource/MGI_alzheimer.csv")
View(mgi_allele)
type(mgi_allele)
class(mgi_allele)
View(mgi_allele)
mgi_allele[`Allele Type` = "Targeted"]
mgi_allele[Allele Type = "Targeted"]
mgi_allele['Allele Type' = "Targeted"]
names(mgi_allele)
names(mgi_allele) <- make.names(names(mgi_allele), unique=TRUE)
mgi_target <- mgi_allele[Allele.Type = "Targeted"]
mgi_target <- mgi_allele[Allele.Type == "Targeted"]
mgi_allele %>% filter(Allele.Type=="Targeted")
library(magrittr)
mgi_allele %>% filter(Allele.Type=="Targeted")
names(mgi_allele)
subset(mgi_allele, Allele.Type == "Targeted")
mgi_target <- subset(mgi_allele, Allele.Type == "Targeted")
View(mgi_target)
class(mgi_target)
mgi_target[2:]
mgi_target[,2]
sapply(strsplit(mgi_target[,2], "<"), "[", 1)
unique(sapply(strsplit(mgi_target[,2], "<"), "[", 1))
mgi_gene <- unique(sapply(strsplit(mgi_target[,2], "<"), "[", 1))
library(biomaRt)
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
genes = c("Zfp286", "Tmx2")
genes = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = genes ,mart = mouse, attributesL = c("hgnc_symbol","chromosome_name", "start_position"), martL = human, uniqueRows=T)
View(genes)
hs_gene <- getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = mgi_gene ,mart = mouse, attributesL = c("hgnc_symbol","chromosome_name", "start_position"), martL = human, uniqueRows=T)
View(hs_gene)
library(biomaRt)
# set Biomart human gene set
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
# set Biomart mouse gene set
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
library(readr)
hmd_hp <- read_table("datasource/HMD_HumanPhenotype.rpt")
View(hmd_hp)
hmd_hp <- read_table("datasource/HMD_HumanPhenotype.rpt", sep="\t")
hmd_hp <- read.table("datasource/HMD_HumanPhenotype.rpt", sep="\t")
View(hmd_hp)
class(hmd_hp)
apply(hmd_hp, 2, function(x) x[1:5,])
apply(hmd_hp, 2, function(x) x[,1:5])
apply(hmd_hp, 2, function(x) x[,1])
apply(hmd_hp, 2, function(x) x)
sepColumn <- function(v, i) {}
sepColumn <- function(v, i) {
if (v[6]) {
u <- strsplit(v[6], " ")
for (mp in u) {
}}
}
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (v[6]) {
u <- strsplit(v[6], " ")
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
apply(hmd_hp, 2, sepColumn(x))
apply(hmd_hp, 2, sepColumn())
?mean()
for (r in hmd_hp) {
sepColumn(r)
}
sepColumn(hmd_hp[,1])
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (v[6]) {
u <- strsplit(v[6], " ")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn(hmd_hp[,1])
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (v[6]) {
u <- strsplit(v[6], "\s+")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (v[6]) {
u <- strsplit(v[6], "\\s+")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn(hmd_hp[,1])
hmd_hp[,1]
hmd_hp[1,]
sepColumn(hmd_hp[1,])
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (v[6]) {
u <- strsplit(v[6], " ")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn(hmd_hp[1,])
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (len(v[6]) > 1) {
u <- strsplit(v[6], " ")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn(hmd_hp[1,])
sepColumn <- function(v) {
df <- data.frame(human_symbol=character(),
human_id=integer(),
mouse_id=integer(),
mouse_symbol=character(),
mgi=character(),
human_pheno=character())
if (length(v[6]) > 1) {
u <- strsplit(v[6], " ")[[1]]
for (mp in u) {
newrow <- c(v[1:5], mp)
df <- rbind(df,newrow)
}
}
else {
newrow <- c(v[1:5])
df <- rbind(df,newrow)
}
return(df)
}
sepColumn(hmd_hp[1,])
sepColumn(hmd_hp[12,])
sepColumn(hmd_hp[2,])
hmd_hp[2,]
s <- strsplit(df$V2, split = " ")
s <- strsplit(hmd_hp$V6, split = " ")
df <- read.table(textConnection("1|a,b,c\n2|a,c\n3|b,d\n4|e,f"), header = F, sep = "|", stringsAsFactors = F)
s <- strsplit(df$V2, split = ",")
data.frame(V1 = rep(df$V1, sapply(s, length)), V2 = unlist(s))
View(hmd_hp)
hmd_hp[i.na(hmd_hp$V6),6]
hmd_hp[is.na(hmd_hp$V6),6]
str(hmd_hp)
hmd_hp <- read.table("datasource/HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE)
str(hmd_hp)
hmd_hp[is.na(hmd_hp$V6),6]
hmd_hp[is.na(hmd_hp$V6),6]
hmd_hp[hmd_hp$V6=="",6]
head(hmd_hp[hmd_hp$V6=="",])
head(hmd_hp[hmd_hp$V6=="",])<-NA
hmd_hp[hmd_hp$V6=="",6]<-NA
head(hmd_hp)
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " */ *"))
})))
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
as.data.frame(do.call(rbind, apply(hmd_hp[1:10,], 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
hmd_hp <- read.table("datasource/HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
as.data.frame(do.call(rbind, apply(hmd_hp[1:10,], 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
hmd_hp <- read.table("datasource/HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
View(hmd_hp)
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
do.call(expand.grid, strsplit(x, " "))
}
{}
hmd_mhp <- as.data.frame(do.call(rbind, apply(hmd_hp, 1, function(x) {
do.call(expand.grid, strsplit(x, " "))
})))
library(devtools)
source_gist(11380733
)
hmd_mhp <- cSplit(hmd_hp, "V6", sep = " ", direction = "long")
View(hmd_mhp)
hmd_hp[hmd_hp$V6=="",6] <- NA
hmd_mhp <- cSplit(hmd_hp, "V6", sep = " ", direction = "long")
View(hmd_mhp)
write.table(hmd_hp, "normHMD_HumanPhenotype.txt", sep = "\t")
write.table(hmd_mhp, "normHMD_HumanPhenotype.txt", sep = "\t", quote=FALSE)
write.table(hmd_mhp, "normHMD_HumanPhenotype.txt", sep = "\t", quote=FALSE, col.names=FALSE)
hmd_hp <- read.table("HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
hmd_hp[hmd_hp$V6=="",6] <- NA
library(devtools)
source_gist(11380733)
hmd_mhp <- cSplit(hmd_hp, "V6", sep = " ", direction = "long")
write.table(hmd_mhp, "normHMD_HumanPhenotype.txt", sep = "\t", quote=FALSE, col.names=FALSE)
load("~/Documents/WorkDir/jhudash/biogrid2.Rdata")
View(biogrid2)
load("~/Documents/WorkDir/jhudash/GeneUniquePair.Rdata")
View(A2)
load("~/Documents/WorkDir/jhudash/biogrid2.Rdata")
View(biogrid2)
hmd_hp <- read.table("HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
hmd_hp[hmd_hp$V6=="",6] <- NA
library(splitstackshape)
hmd_mhp <- cSplit(hmd_hp, "V6", sep = " ", direction = "long")
View(hmd_mhp)
write.table(hmd_mhp, "normHMD_HumanPhenotype.txt", sep = "\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
View(hmd_hp)
hmd_mhp[,7] <- NULL
View(hmd_mhp)
write.table(hmd_mhp, "normHMD_HumanPhenotype.txt", sep = "\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
unique(hmd_mhp[,6])
mgi_mp <- read.table("MGI_PhenoGenoMP.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
mgi_mp <- read.table("MGI_PhenoGenoMP.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "")
mgi_mp <- read.table("MGI_PhenoGenoMP.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "", comment.char = "")
View(mgi_mp)
mgi_mp[1,4]
mp <- unique(mgi_mp[,4])
length(mp)
View(hmd_mhp)
class(hmd_mhp)
class(mp)
class(mgi_mp)
library(data.table)
tmgi_mp <- data.table(mgi_mp)
setkey(hmd_mhp, V5)
View(tmgi_mp)
setkey(tmgi_mp, V6)
mgi_hmd <- hmd_mhp[tmgi_mp, nomatch=0]
mgi_hmd <- merge(hmd_mhp,tmgi_mp)
mgi_hmd <- merge(hmd_mhp,tmgi_mp, all=FALSE)
mgi_hmd <- merge(hmd_mhp,tmgi_mp, by.x = V5, by.y = V6)
mgi_hmd <- merge(hmd_mhp,tmgi_mp, by.x = "V5", by.y = "V6")
View(hmd_hp)
mgi_hmd <- merge(hmd_hp,tmgi_mp, by.x = "V5", by.y = "V6")
View(mgi_hmd)
mgi_hmd[1,2]
mgi_hmd[3,2]
mgi_phenotype <- mgi_hmd[,c(2:5,11)]
View(mgi_phenotype)
View(hmd_hp)
save(mgi_phenotype, "mgi_phenotype.Rdata")
write.table(mgi_phenotype, "mgi_Phenotype.txt", sep = "\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
length(unique(mgi_phenotype[,5]))
length(unique(hmd_mhp[,6]))
str(hmd_mhp)
str(mgi_phenotype)
hcat <- sapply(hmd_mhp, is.factor)
hmd_mhp[hcat] <- lapply(hmd_mhp[hcat], factor)
length(unique(hmd_mhp[,6]))
hmd_mhp[,6]
str(hmd_mhp)
hmd_mhp[] <- <- lapply(hmd_mhp, as.character)
hmd_mhp[] <- lapply(hmd_mhp, as.character)
str(hmd_mhp)
length(unique(hmd_mhp[,6]))
length(unique(hmd_mhp[6,]))
length(unique(hmd_mhp$V6))
unique(hmd_mhp$V6)
View(biogrid2)
View(mgi_phenotype)
length(unique(mgi_phenotype$V2.x))
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "", comment.char = "")
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "", comment.char = "")
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE)
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "", comment.char = "")
View(mgi_omim)
hmd_hp <- read.table("HMD_HumanPhenotype.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE)
mgi_mp <- read.table("MGI_PhenoGenoMP.rpt", sep="\t", header=FALSE, stringsAsFactors = FALSE, strip.white=TRUE, quote = "", comment.char = "")
View(hmd_hp)
View(mgi_mp)
mgi_hmd <- merge(hmd_hp, mgi_mp, by.x = "V5", by.y = "V6")
View(mgi_mp)
View(mgi_hmd)
mgi_phenotype <- mgi_hmd[,c(2:5,11,1)]
View(mgi_phenotype)
colnames(mgi_phenotype) <- c("Human_GeneSymbol","Human_GeneID","Mouse_GeneID","Mouse_GeneSymbol","Mammal_PhenotypeID","MGI_IndexID")
write.table(mgi_phenotype, "mgi_Phenotype.txt", sep = "\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE)
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE)
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE, strip.white=TRUE)
?read.table()
mgi_omim <- read.table("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE)
mgi_omim <- read.delim("MGI_OMIM.rpt", sep="\t", header=TRUE, stringsAsFactors = FALSE)
View(mgi_omim)