-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.R
158 lines (131 loc) · 4.11 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Return sentence from the right position
lookContext<-function(document, paragraph.number, sentence.number, match, option)
{
#Verify if it is not a real file
if (document==1)
{
my.corpus<-VCorpus(VectorSource(textual.content),
readerControl=list(language=idiom,id="id1"))
}
else
{
my.corpus<-VCorpus(DirSource(directory=textual.content,pattern=document),
readerControl=list(language=idiom,id="id1"))
}
#Find the right ones between everyone
text.sentences<-(as.character(my.corpus[[document]]))
sentence<-getSentence(c(paragraph.number,sentence.number),
text.sentences)
if(option=="fixed")
{
matched <- Reduce("|",
sapply(1:length(match),
function(x) grepl(match[x],
sentence,
fixed = TRUE)))
}
else if(option=="case sensitive")
{
matched <- Reduce("|",
sapply(1:length(match),
function(x) grepl(match[x],
sentence)))
}
else
{
matched <- Reduce("|",
sapply(1:length(match),
function(x) grepl(match[x],
sentence,
ignore.case = TRUE)))
}
if(matched)
sentence
else
NA
}
# Explore sentences from texts
inspectContexts<-function(sentence.positions, certain.string=c(""), exact.string=TRUE,
case.sensitive=FALSE)
{
explored.contexts <- data.frame(position=character(),
sentence=character(),
stringsAsFactors=FALSE)
#Set sentence locations to explore
contexts <- unique(sentence.positions[,c("File","Paragraph","Sentence")])
contexts[1] <- lapply(contexts[1], as.character)
files.number<-length(unique(contexts$File))
#Specify pattern
if(exact.string)
string.condition<-"fixed"
else
{
if(case.sensitive)
string.condition<-"case sensitive"
else
string.condition<-""
}
if(files.number==1 & contexts[1,"File"]=="?")
{
inspection <- do.call("rbind", lapply(1:nrow(contexts), function(s) {
paragraph<- contexts[s,2]
sentence<- contexts[s,3]
context<-lookContext(1,paragraph,sentence,
certain.string,string.condition)
if(!is.na(context))
{
sentenceLog(c(paragraph,sentence), context)
from<-paste("(",
paste(contexts[s,1], contexts[s,2], sep = ", "),
")", sep='')
instance <- c(from, context)
}
else
instance <- c(NA,NA)
instance
}))
}
else
{
inspection <- do.call("rbind", lapply(1:nrow(contexts), function(s) {
file<- contexts[s,1]
paragraph<- contexts[s,2]
sentence<- contexts[s,3]
context<-lookContext(file,paragraph,sentence,
certain.string,string.condition)
if(!is.na(context))
{
sentenceLog(c(paragraph,sentence,file), context)
from<-paste("(",
paste(contexts[s,1], contexts[s,2], contexts[s,3],
sep = ", "),
")", sep='')
instance <- c(from, context)
}
else
instance <- c(NA,NA)
instance
}))
}
# Fill the sentence info
inspection <- na.exclude(inspection)
if(nrow(inspection)!=0)
explored.contexts[c(1:nrow(inspection)),] <- inspection
explored.contexts
}
createPairs<-function(pairs)
{
do.call("rbind", lapply(1:nrow(pairs), function(i) {
pair<-paste(pairs[i,1], "-", pairs[i,2])
cat("Created pair:", pair, "\n")
pair}))
}
lookPairs<-function(relation.pairs)
{
#Select only named entity pairs
pairs<-relation.pairs[,c(1,2)]
freq<-createPairs(pairs)
barplot(sort(table(freq[,1]), decreasing=TRUE), xlab="pairs",
ylab = "number of occurrences")
freq
}