-
Notifications
You must be signed in to change notification settings - Fork 0
/
bigTable.py
executable file
·89 lines (82 loc) · 2.68 KB
/
bigTable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from collections import Counter
from string import ascii_uppercase as letters
import os
import csv
def gen(L):
c = Counter(L)
for elt, count in c.items():
if count == 1:
yield elt
else:
for letter in letters[:count]:
yield elt + letter
lst = os.listdir("FCBPosts")
lines = open("FCBCollegeList.txt").readlines()
if len(lines) == 1:
lines = lines[0].split("\r")
dictionary = {}
lines = lines[1:]
for line in lines:
line = line.split("\t")
if "Rutgers" in line[3]:
dictionary.setdefault(line[3].replace('"',''),[]).append(line[0])
else:
dictionary.setdefault(line[3],[]).append(line[0])
csvwriter = csvreader = None
count = 0
'''
with open("FCBPosts/Auburn University:149244371900507posts.csv", 'rb') as f,open("allPosts.csv", 'wb') as outf:
csvreader = csv.DictReader(f,delimiter='|')
fieldnames = ['school_id'] + csvreader.fieldnames # add column name to beginning
csvwriter = csv.DictWriter(outf, fieldnames, delimiter="|")
csvwriter.writeheader()
for fileName in lst:
if "DS_Store" not in fileName:
ID = str(dictionary[fileName.split(":")[0]]).replace("[","").replace("]","")
if "," in ID and count == 0:
count = 1
ID = ID.split(",")[0]
elif "," in ID and count == 1:
count = 0
ID = ID.split(",")[1]
ID = ID.replace("'","")
ID = str(ID)
with open("FCBPosts/"+fileName, 'rb') as inf:
csvreader = csv.DictReader(inf,delimiter='|')
for row in csvreader:
csvwriter.writerow(dict(row,school_id=ID))
'''
with open("FCBComments/Auburn University:149244371900507comments.csv", 'rb') as f,open("allComments.csv", 'wb') as outf:
csvreader = csv.DictReader(f,delimiter='|')
fieldnames = ['school_id'] + csvreader.fieldnames # add column name to beginning
csvwriter = csv.DictWriter(outf, fieldnames, delimiter="|")
csvwriter.writeheader()
for fileName in lst:
if "DS_Store" not in fileName:
ID = str(dictionary[fileName.split(":")[0]]).replace("[","").replace("]","")
ID = ID.replace("'","")
if "," in ID and count == 0:
count = 1
ID = ID.split(",")[0]
elif "," in ID and count == 1:
count = 0
ID = ID.split(",")[1]
fileName = fileName.replace("posts","comments")
with open("FCBComments/"+fileName, 'rb') as inf:
csvreader = csv.DictReader(inf,delimiter='|')
for row in csvreader:
csvwriter.writerow(dict(row,school_id=ID))
'''
lst = os.listdir("FCBPosts")
fileList = []
for fileName in lst:
fileList += [fileName.split(":")[0]]
fileList = list(gen(fileList))
print fileList
lst = os.listdir("FCBComments")
fileList = []
for fileName in lst:
fileList += [fileName.split(":")[0]]
fileList = list(gen(fileList))
print fileList
'''