-
Notifications
You must be signed in to change notification settings - Fork 0
/
check.py
68 lines (59 loc) · 2.08 KB
/
check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import logging
from crawlers.settings import DATA_PATH
current_dir = os.path.split(os.path.abspath(__file__))[0]
output_path = os.path.join(current_dir,"output")
digit = {'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9}
def _trans(s):
num = 0
if s:
idx_q, idx_b, idx_s = s.find('千'), s.find('百'), s.find('十')
if idx_q != -1:
num += digit[s[idx_q - 1:idx_q]] * 1000
if idx_b != -1:
num += digit[s[idx_b - 1:idx_b]] * 100
if idx_s != -1:
# 十前忽略一的处理
num += digit.get(s[idx_s - 1:idx_s], 1) * 10
if s[-1] in digit:
num += digit[s[-1]]
return num
def trans(chn):
chn = chn.replace('零', '')
idx_y, idx_w = chn.rfind('亿'), chn.rfind('万')
if idx_w < idx_y:
idx_w = -1
num_y, num_w = 100000000, 10000
if idx_y != -1 and idx_w != -1:
return trans(chn[:idx_y]) * num_y + _trans(chn[idx_y + 1:idx_w]) * num_w + _trans(chn[idx_w + 1:])
elif idx_y != -1:
return trans(chn[:idx_y]) * num_y + _trans(chn[idx_y + 1:])
elif idx_w != -1:
return _trans(chn[:idx_w]) * num_w + _trans(chn[idx_w + 1:])
return _trans(chn)
for root,dirs,files in os.walk(output_path):
if not files:
continue
for fname in files:
count = 0
flag = True
path = os.path.join(root,fname)
with open(path,"r",encoding="utf-8") as f:
lines = f.readlines()
f.close()
titles = list(filter(lambda x:not x.startswith(" "), lines))
for title in titles:
t = title[title.find("第")+1:title.find("章")]
t = t.strip()
num = -1
try:
num = int(t)
except:
num = trans(t)
if num - count == 1:
count = count + 1
else:
flag = False
print(title)
break
print("[{}] {}".format(flag,path))