-
Notifications
You must be signed in to change notification settings - Fork 0
/
Convert2.py
73 lines (63 loc) · 1.17 KB
/
Convert2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
'''
f1 = open("pos.txt","r")
#f2 = open("tag.train","w")
content = f1.readlines()
d = {}
for x in content:
temp = x.split()
#print temp
#d = input()
if len(temp)==0:
continue
if temp[1] not in d:
d[temp[1]]=1
else:
d[temp[1]]+=1
#f2.write('\n')
f1.close()
#f2.close()
for i in d:
print i,d[i]
'''
f = open("pos.txt",'r')
cont = f.readlines()
#print cont
delimiters = [",","\'\'","``","#","$","(",")",".",":"]
tags={}
tags["delimiters"]=0
tags["PRP"]=0
tags["WP"]=0
tags["RB"]=0
#tags[""]
for i in cont:
s = i.split()
if len(s)==0:
continue
if s[-1] in delimiters:
tags["delimiters"]+=1
elif s[-1]=="PRP$":
tags["PRP"]+=1
elif s[-1]=="WP$":
tags["WP"]+=1
elif s[-1]=="RBR" or s[-1]=="RBS" or s[-1]=="RB" or s[-1]=="WRB":
tags["RB"]+=1
elif s[-1]=="TD":
tags[""]+=1
elif s[-1].split('|') > 1:
ch = s[-1].split('|')[-1]
if ch in tags.keys():
tags[ch]+=1
else:
tags[ch]=1
elif s[-1] in tags.keys():
tags[s[-1]]+=1
else:
tags[s[-1]]=1
f.close()
o = open("Tags_Tweet_Full","w")
for x in tags.keys():
li = [x,str(tags[x])]
o.write(" ".join(li))
o.write('\n')
o.close()
#Reference https://cs.nyu.edu/grishman/jet/guide/PennPOS.html