-
Notifications
You must be signed in to change notification settings - Fork 0
/
commentStats.py
executable file
·114 lines (90 loc) · 2.82 KB
/
commentStats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from __future__ import division
from collections import Counter
import scipy
import os
import myFunctions as m
from scipy import stats
from scipy.stats import mode
def stats():
posts = open("lenPosts.dat").readlines()
#f = open("statsData.csv","w")
List = os.listdir("USA")
path = "USA/"
newPath = "USACommentStats/"
g = open("zeroLikeComments.dat",'w')
h = open("commentStats.csv",'w')
h.write("university,numPosts,numComments,numCommentators,commentsPerCommentator\n")
j = open("topCommentators.dat",'w')
import sys
grandTotalLikes = []
totalLikes = {}
grandTotalPosts = 0
grandTotalComments = 0
grandTotalCommentators = 0
zeroLikes = {}
zeroComments = {}
avoidList = []
count = 0
for Filename in List:
actualFileName = Filename
newFilename = newPath + Filename
Filename = path + Filename
if "comments" not in Filename:
avoidList += [Filename]
continue
f = open(newFilename.replace(".csv","ByLikes.dat"),'w')
lines = open(Filename).readlines()
if len(lines)<3:
avoidList += [Filename]
continue
count +=1
prefix = actualFileName.split("comments.csv")[0]
idx = -1
totalPosts = 0
for post in posts:
if prefix in post:
idx = posts.index(post)
totalPosts = post.rsplit(":",1)[1]
break
#if ":" in prefix:
# prefix = prefix.replace(":","")
#if " " in prefix:
# prefix = prefix.replace(" ","")
totalLikes[prefix] = [[]]
totalLikes[prefix].append([])
commentatorID = []
totalComments = 0
for line in lines[1:]:
columns = line.split("|")
try:
totalComments += 1
commentatorID += [columns[3].strip()]
numLikes = int(columns[5])
totalLikes[prefix][0] += [columns[6]]
totalLikes[prefix][1] += [numLikes]
except:
pass
grandTotalPosts += int(totalPosts)
grandTotalComments += totalComments
commentatorCount = Counter(commentatorID)
totalCommentators = len(list(set(commentatorID)))
grandTotalCommentators += totalCommentators
commentsPerCommentator = totalComments/totalCommentators
zeroLikes[prefix] = totalLikes[prefix][1].count(0)
totalLikes[prefix] = sorted(zip(*totalLikes[prefix]), key = lambda l:l[1], reverse = True)
grandTotalLikes += totalLikes[prefix]
for like in totalLikes[prefix]:
f.write(str('"'+ like[0].replace('"','').strip()+'"')+"|"+str(like[1])+"\n")
f.close()
zeroTotalLikes = grandTotalLikes.count(0)
g.write(prefix+"|"+str(zeroLikes[prefix])+"\n")
j.write(prefix)
for ID in commentatorCount.keys():
j.write("|"+ str(ID))
j.write("\n")
if idx == -1:
print prefix
else:
h.write(prefix+","+str(totalPosts.strip())+","+str(totalComments)+","+str(totalCommentators)+","+str(commentsPerCommentator)+"\n")
h.write("total,"+str(grandTotalPosts)+","+str(grandTotalComments)+","+str(grandTotalCommentators)+","+str(grandTotalComments/grandTotalCommentators)+"\n")
stats()