-
Notifications
You must be signed in to change notification settings - Fork 0
/
g2s_parser.py
119 lines (99 loc) · 3.02 KB
/
g2s_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import json
import re
import sys
def load_json(file):
#file: path to the .json file
with open(file) as to_be_loaded:
return json.load(to_be_loaded)
def get_context_value(data, name, get):
#returns a list of values
if(name == "common.dataType"):
return ["DIM"]
elif(name == "common.authsender"):
return [None]
elif(name == "network.protocol"):
for pair in get['payload']['headers']:
if(pair['name'] == 'Received'):
string = pair['value']
s = string.split("with")[1]
return [s.split()[0].lower()]
elif(name == "email.envelope.sender"):
return [None]
elif(name == "email.envelope.recipient"):
return [None]
elif(name == "email.header.sender"):
for pair in get['payload']['headers']:
if(pair['name'] == 'From'):
emails_and_extra = pair['value']
return get_emails(emails_and_extra)
elif(name == "email.header.recipient"):
for pair in get['payload']['headers']:
if(pair['name'] == 'Delivered-To'):
return [pair['value']]
def get_emails(s):
# modified from https://gist.github.com/dideler/5219706
regex = re.compile(("([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`"
"{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|"
"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)"))
return [email[0] for email in re.findall(regex, s) if not email[0].startswith('//')]
def context(data, get):
data['context'] = []
context = data['context']
context_names = ["common.dataType", "common.authsender", "network.protocol",
"email.envelope.sender", "email.envelope.recipient",
"email.header.sender", "email.header.recipient"]
for i in range(len(context_names)):
context.append({})
context[i]['name'] = context_names[i]
context[i]['value'] = get_context_value(data, context_names[i], get)
def body(data, get):
data["body"] = []
body = data["body"]
body_names = ["contentBlockId", "mimeType", "data"]
i = 0
body.append({})
body[i][body_names[i]] = get["id"]
i+=1
body.append({})
body[i][body_names[i]] = get["payload"]["mimeType"]
i+=1
body.append({})
try:
#for files without attachments
body[i][body_names[i]] = get["payload"]["body"]["data"]
except:
#for files with attachments
body_value = ""
for p in get["payload"]["parts"][0]["parts"]:
body_value += p["body"]["data"]
body[i][body_names[i]] = body_value
i+=1
def subject(data, get):
data['subject'] = []
subject = data['subject']
subject_names = ["contentBlockId", "mimeType", "data"]
i = 0
subject.append({})
subject[i][subject_names[i]] = get["threadId"]
i+=1
subject.append({})
subject[i][subject_names[i]] = get["payload"]["mimeType"]
i+=1
subject.append({})
for p in get['payload']['headers']:
if(p['name'] == 'Subject'):
subject[i][subject_names[i]] = p["value"]
break
i+=1
def main(json_file):
get = load_json(json_file)
data = {}
context(data, get)
body(data, get)
subject(data, get)
request = json.dumps(data)
print(request)
return request
if __name__ == '__main__':
#cmd line: python g2s_parser.py file.json
main(sys.argv[1])