-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract_zhuhui.py
209 lines (199 loc) · 10.6 KB
/
extract_zhuhui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
from datetime import datetime
import html
from bs4 import BeautifulSoup
import shutil
import warnings
import sqlite3
import re
warnings.filterwarnings("ignore", message=".*HTML parser.*")
input_root = 'C:/Users/zhaofeng/Apple/MobileSync/Backup/00008030-000C64C63446402E'
my_id = 'fc1faa899a951bf80f7755c8e40ca392'
micro_msg_key = 'Documents/{0}/DB/WCDB_Contact.sqlite'.format(my_id)
message_sqlite_list = []
conn = sqlite3.connect('{0}/Manifest.db'.format(input_root))
c = conn.cursor()
c.execute("SELECT fileID,relativePath FROM Files WHERE domain='AppDomain-com.tencent.xin'")
micro_msg_path = ''
for entry in c.fetchall():
match_result = re.match('^Documents/fc1faa899a951bf80f7755c8e40ca392/DB/message_([1-9]).sqlite$', entry[1])
if match_result is not None:
message_sqlite_list.append((match_result.group(), entry[0]))
continue
if entry[1] == micro_msg_key:
micro_msg_path = entry[0]
# print(message_sqlite_list, micro_msg_path)
message_sqlite_list.sort(key=lambda x:x[0])
cursor_list = []
for index, message in message_sqlite_list:
data_base_filename = input_root + "/" + message[:2] + "/" + message
print(index, message)
conn = sqlite3.connect(data_base_filename)
cursor_list.append(conn.cursor())
table = 'a008711ad1fdc9f567f38778552cbbd3'
statement = 'SELECT CreateTime,Message,Des,Type,MesLocalID FROM Chat_a008711ad1fdc9f567f38778552cbbd3;'
displayname = '赵丰与朱慧'
_html = "<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">"
_html += "<html xmlns=""http://www.w3.org/1999/xhtml""><head><meta http-equiv=""Content-Type"" content=""text/html; charset=utf-8"" /><title>" + displayname + " - 微信聊天记录</title></head>"
_html += "<body><table width=""600"" border=""0"" style=""font-size:12px;border-collapse:separate;border-spacing:0px 20px;word-break:break-all;table-layout:fixed;word-wrap:break-word;"" align=""center"">"
my_portaint = 'Feishu20220613-201140.jpg'
target_portaint = 'Feishu20220613-201131.jpg'
myself_DisplayName = '赵丰'
friend_DisplayName = '朱慧'
_id = 'zh2512382436'
root_dir = 'E:/wechat_out/fc1faa899a951bf80f7755c8e40ca392/'
def RemoveCdata(st):
return st
for index, cursor in enumerate(cursor_list):
print(index)
try:
cursor.execute(statement)
except sqlite3.OperationalError:
continue
for entry in cursor.fetchall():
unixtime = entry[0]
message = entry[1]
des = entry[2]
type = entry[3]
msgid = str(entry[4])
if type == 10002:
# revoke message
continue
if type == 10000:
_html += "<tr><td width=""80""> </td><td width=""100""> </td><td>系统消息: " + message + "</td></tr>"
continue
ts = ""
if des == 0:
ts += "<tr><td width=""80"" align=""center""><img src=""Portrait/" + my_portaint + ' width="50" height="50" /><br />' + myself_DisplayName + "</td>"
else:
ts += "<tr><td width=""80"" align=""center""><img src=""Portrait/" + target_portaint + ' width="50" height="50" /><br />' + friend_DisplayName + "</td>"
if type == 34:
audio_filename = root_dir + _id + "_files/" + msgid + ".mp3"
if not os.path.exists(audio_filename):
message = "[语音]"
else:
message = "<audio controls><source src=\"" + _id + "_files/" + msgid + ".mp3\" type=\"audio/mpeg\"><a href=\"" + _id + "_files/" + msgid + ".mp3\">播放</a></audio>"
elif type == 47:
match = BeautifulSoup(message).find("emoji").get('cdnurl')
if (match):
localfile = RemoveCdata(match)
match2 = localfile.split('/')[-2]
if (not match2):
import pdb
pdb.set_trace()
# localfile = RandomString(10)
else:
localfile = match2
# emoji_file_name = root_dir + "Emoji/" + localfile + ".gif"
# emoji_target_directory = root_dir + "Emoji_2/" + localfile + ".gif"
# shutil.copy(emoji_file_name, emoji_target_directory)
# emojidown.Add(new DownloadTask() { url = match.group(1), filename = localfile + ".gif" })
message = "<img src=\"Emoji_2/" + localfile + ".gif\" style=\"max-width:100px;max-height:60px\" />"
else:
message = "[表情]"
elif type == 62 or type == 43:
hasthum = os.path.exists(root_dir + _id + "_files/" + msgid + "_thum.jpg")
hasvid = os.path.exists(root_dir + _id + "_files/" + msgid + ".mp4")
if (hasthum and hasvid):
message = "<video controls poster=\"" + _id + "_files/" + msgid + "_thum.jpg\"><source src=\"" + _id + "_files/" + msgid + ".mp4\" type=\"video/mp4\"><a href=\"" + _id + "_files/" + msgid + ".mp4\">播放</a></video>"
elif (hasthum):
message = "<img src=\"" + _id + "_files/" + msgid + "_thum.jpg\" /> (视频丢失)"
elif (hasvid):
message = "<video controls><source src=\"" + _id + "_files/" + msgid + ".mp4\" type=\"video/mp4\"><a href=\"" + _id + "_files/" + msgid + ".mp4\">播放</a></video>"
else:
message = "[视频]"
elif type == 50:
message = "[视频/语音通话]"
elif type == 3:
hasthum = os.path.exists(root_dir + _id + "_files/" + msgid + "_thum.jpg")
haspic = os.path.exists(root_dir + _id + "_files/" + msgid + ".jpg")
if (hasthum and haspic):
message = "<a href=\"" + _id + "_files/" + msgid + ".jpg\"><img src=\"" + _id + "_files/" + msgid + "_thum.jpg\" style=\"max-width:100px;max-height:60px\" /></a>"
elif (hasthum):
message = "<img src=\"" + _id + "_files/" + msgid + "_thum.jpg\" style=\"max-width:100px;max-height:60px\" />"
elif (haspic):
message = "<img src=\"" + _id + "_files/" + msgid + ".jpg\" style=\"max-width:100px;max-height:60px\" />"
else:
message = "[图片]"
elif type == 48:
match1 = re.search("x ?= ?""(.+?)""", message)
match2 = re.search("y ?= ?""(.+?)""", message)
match3 = re.search("label ?= ?""(.+?)""", message)
if (match1 and match2 and match3):
message = "[位置 (" + RemoveCdata( match2.group(1)) + "," + RemoveCdata(match1.group(1)) + ") " + RemoveCdata(match3.group(1)) + "]"
else:
message = "[位置]"
elif type == 49:
if (message.find("<type>2001<") >= 0):
message = "[红包]"
elif (message.find("<type>2000<") >= 0):
message = "[转账]"
elif (message.find("<type>17<") >= 0):
message = "[实时位置共享]"
elif (message.find("<type>6<") >= 0):
match1 = re.search("<fileext>(.+?)<\/fileext>", message)
match2 = re.search("<title>(.+?)<\/title>", message)
if (match1 and match2):
hasfile = os.path.exists(root_dir + _id + "_files/" + match2.group(1))
if (hasfile):
message = "<a href=\"" + _id + "_files/" + match2.group(1) + "\">" + match2.group(1) + "</a>"
else:
message = match2.group(1) + "(文件丢失)"
else:
message = "[文件]"
else:
match1 = BeautifulSoup(message).find_all("title")[0].text
match2 = re.search("<des>(.*?)<\/des>", message)
match3 = re.search("<url>(.+?)<\/url>", message)
match4 = re.search("<thumburl>(.+?)<\/thumburl>", message)
if match1 and match3:
message = ""
if match4:
message += "<img src=\"" + RemoveCdata(match4.group(1)) + "\" style=\"float:left;max-width:100px;max-height:60px\" />"
message += "<a href=\"" + RemoveCdata(match3.group(1)) + "\"><b>" + RemoveCdata(match1) + "</b></a>"
if match2:
message += "<br />" + RemoveCdata(match2.group(1))
else:
try:
sub_message = html.unescape(BeautifulSoup(message).find_all("content")[-1].text)
if sub_message.find('xml') >= 0 or sub_message.find('appmsg') >= 0:
try:
if BeautifulSoup(sub_message).find("img"):
quote = "[图片]"
elif BeautifulSoup(sub_message).find('videomsg'):
quote = "[视频]"
else:
quote = BeautifulSoup(sub_message).find_all("title")[0].text
except Exception as e:
import pdb
pdb.set_trace()
else:
quote = sub_message
message = quote + "<br />------------<br />" + match1 + "<br/> "
except Exception as e:
import pdb
pdb.set_trace()
message = "[链接]"
elif type == 42:
match1 = re.search("nickname ?= ?\"(.+?)\"", message)
match2=re.search("smallheadimgurl ?= ?\"(.+?)\"", message)
if match1:
message = ""
if(match2):
message+= "<img src=\"" + RemoveCdata(match2.group(1)) + "\" style=\"float:left;max-width:100px;max-height:60px\" />"
message += "[名片] " + RemoveCdata(match1.group(1))
else:
message = "[名片]"
else:
message = html.escape(message)
if message.find("- - - - - - - - - - - - - - -") >= 0:
message = message.replace("- - - - - - - - - - - - - - -", "<br/>- - - - - - - - - - - - - - -<br/>")
if message.find("我们换本别的书吧x") >= 0:
import pdb
pdb.set_trace()
ts += "<td width=""100"" align=""center"">" + datetime.fromtimestamp(unixtime).strftime("%Y/%m/%d %H:%M:%S").replace(" ","<br />") + "</td>"
ts += "<td>" + message + "</td></tr>"
_html += ts
_html += "</body></html>"
with open(os.path.join(root_dir, "test.html"), 'w', encoding='utf-8') as f:
f.write(_html)