-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
266 lines (242 loc) · 10.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
"""
主程序,传参确定需要计算的数据文件路径以及指数。
尽量保持各子函数的结构不变。
"""
import os
import sys
import argparse
import importlib
import pandas as pd
from functools import reduce
# import zs222
# import zs232
# import zs321
# import zs322
import utils
p = os.path.dirname(os.path.abspath(__file__))
os.chdir(p)
# print(os.path.abspath(os.curdir))
INDEX_GT_PATHS = {
"zs222": "../source_data/ZS222 - 处置效能指数.xlsx",
"zs232": "../source_data/ZS232 - 服务效能指数.xlsx",
"zs321": "../source_data/ZS321 - 环境问题指数.xlsx",
"zs322": "../source_data/ZS322 - 废弃物指数.xlsx",
"zs341": "../source_data/ZS341 - 服务需求指数.xlsx",
"zs342": "../source_data/ZS342 - 矛盾纠纷指数.xlsx",
}
MODEL_PATHS = {
"zs222": "./models/zs222.sav",
"zs232": "./models/zs232.sav",
"zs321": "./models/zs321_poly.sav",
"zs322": "./models/zs322_poly.sav",
"zs341": "./models/zs341.sav",
"zs342": "./models/zs342.sav",
}
"""
zs222: 生成新指数,生成后缩放至原范围(缩放参数由全年数据计算得出)
zs232: 生成新指数,生成后缩放至原范围(缩放参数由全年数据计算得出)
zs321: 调用全年数据训练的模型进行计算,然后缩放至原范围(缩放参数由全年数据计算得出)
zs322: 调用全年数据训练的模型进行计算,然后缩放至原范围(缩放参数由全年数据计算得出)
zs341: 生成新指数,生成后缩放至原范围(缩放参数由全年数据计算得出)
zs342: 生成新指数,生成后缩放至原范围(缩放参数由全年数据计算得出)
"""
class Processing(object):
def __init__(self, source_file_path, write_daily_data_to_disk=True):
self.source_file_path = source_file_path
self.write_daily_data_to_disk = write_daily_data_to_disk
self.allowed_ics = ['zs222', 'zs232', 'zs321', 'zs322', 'zs341', 'zs342']
# def cal_zs222(self):
# """
#
# :return: pd.DataFrame (日期,街道,评分)
# """
# ic = 'zs222'
# source_file = self.source_file_path
# write_path = '../tmp_{}'.format(ic) if self.write_daily_data_to_disk else ''
# gt_file = INDEX_GT_PATHS[ic]
#
# df = zs222.convert_to_new_dataframe(source_file, gt_file, write_path=write_path)
#
# df2_file_path = '../{}_20190923.xlsx'.format(ic)
# df2 = zs222.cal_index(df2_file_path)
# df2 = df2[["日期", "街道", "新评分"]]
# df2.to_excel(df2_file_path)
#
# return df2
#
# def cal_zs232(self):
# """
#
# :return: pd.DataFrame (日期,街道,评分)
# """
# ic = "zs232"
# source_file = self.source_file_path
# write_path = '../tmp_zs232' if self.write_daily_data_to_disk else ''
# gt_file = INDEX_GT_PATHS["zs232"]
#
# df2 = zs232.convert_to_new_dataframe(source_file, gt_file, write_path=write_path)
# df2.to_excel('../zs232_20190923.xlsx')
#
# df3_file_path = '../zs232_20190923.xlsx'
# df3 = zs232.cal_index(df3_file_path)
# df3 = df3[["日期", "街道", "新评分"]]
# df3.to_excel(df3_file_path)
#
# return df3
#
# def cal_zs321(self):
# """
#
# :return: pd.Series
# """
# source_file = self.source_file_path
# write_path = '../tmp_zs321' if self.write_daily_data_to_disk else ''
# gt_file = INDEX_GT_PATHS["zs321"] # TODO: 新数据没有历史评分
#
# df2 = zs321.convert_to_new_dataframe(source_file, gt_file, write_path=write_path)
#
# # cal
# X = df2.drop(["Unnamed: 0", "日期", "街道", "原指标"], axis=1)
# y = utils.model_predict(X, MODEL_PATHS['zs321'])
#
# df3 = df2.assign(score=y)
# df3 = df3.loc[["日期", "街道", 'score']]
# df3 = df3.rename(columns={"score": "新评分"})
#
# df3.to_excel('../zs321_20190923.xlsx')
#
# return df3
#
# def cal_zs322(self):
# source_file = self.source_file_path
# write_path = '../tmp_zs322' if self.write_daily_data_to_disk else ''
# gt_file = INDEX_GT_PATHS["zs322"]
#
# df2 = zs322.convert_to_new_dataframe(source_file, gt_file, write_path=write_path)
# df2.to_excel('../zs322_20190923.xlsx')
#
# # cal
# X = df2.drop(["Unnamed: 0", "日期", "街道", "原指标"], axis=1)
# y = utils.model_predict(X, MODEL_PATHS['zs322'])
#
# return y
#
# def cal_zs34x(self, ic=None):
# """
#
# :param ic: index code, zs341 or zs342
# :return: pd.DataFrame
# """
# daily_data_write_path = '../tmp_{}'.format(ic) if self.write_daily_data_to_disk else ''
# gt_file = INDEX_GT_PATHS[ic]
# summary_data_write_path = './output/{}.xlsx'.format(ic)
#
# # TODO: 将两个 convert_to_new_dataframe 合并为一个
# zs34x = importlib.import_module(ic)
# df = zs34x.convert_to_new_dataframe(self.source_file_path, gt_file, write_path=daily_data_write_path)
#
# # cal
# df = df.drop(["('日期', '')", "('街道', '')", "('原指标', '')"], axis=1)
# y = utils.model_predict(df, MODEL_PATHS[ic])
#
# df = df.assign(score=y)
# df.to_excel(summary_data_write_path)
#
# return df
def cal_x(self, ic):
"""
:param ic: index code
:return: pd.Series, pd.DataFrame
"""
print("该功能暂时只用于测试!!!")
assert ic in self.allowed_ics, "Mode must be in {}, not {}".format(self.allowed_ics, ic)
mod = importlib.import_module(ic)
daily_data_write_path = '../tmp_{}'.format(ic) if self.write_daily_data_to_disk else ''
gt_file = INDEX_GT_PATHS[ic]
summary_data_write_path = './output/{}.xlsx'.format(ic)
# TODO: 将两个 convert_to_new_dataframe 合并为一个
df = mod.convert_to_new_dataframe(self.source_file_path, gt_file, write_path=daily_data_write_path)
if ic in ['zs321', 'zs322']:
# cal
df_copy = df.copy()
try:
df_copy = df_copy.drop(["日期", "街道", "原指标"], axis=1)
except KeyError:
df_copy = df_copy.drop(["('日期', '')", "('街道', '')", "('原指标', '')"], axis=1)
# TODO: complete model file
y = utils.model_predict(df_copy, MODEL_PATHS[ic])
else:
if ic in ['zs341', 'zs342']:
"""
基础分100, 按期完成 +10 / d, 延期完成 -5 /d
"""
w1 = 10
w2 = -5
y = 100 + \
df[[c for c in df if '按时完成' in c]].iloc[:, 0] * w1 + \
df[[c for c in df if '延期完成' in c]].iloc[:, 0] * w2
elif ic in ['zs222', 'zs232']:
"""
根据"强制结案总数", "计划内耗时总长(分钟)", "计划外耗时总长(分钟)"三个指标得到基础分;
根据"自行处理案件总数", 奖励一定分数;
根据"强制结案总数", 扣除一定分数;
"其他案件总数"不作为打分依据(一定程度上,已经在"计划内(外)耗时总长中得到体现);
"立案耗时总长(分钟)"不作为打分依据, 因本指标为"处置效能指数", 不牵涉从上报到立案
"""
# TODO: 增加当天内完成案件的权重(比如自行处理案件因当天完成, 相比第二天完成的案件, 少了一晚上的执行分数)
w1 = 60 # 自行处理案件总数, 认为w1分钟为完成一个自行处理案件所需的平均时间
w2 = 0 # 其他案件总数, 不考评
w3 = 0 # 强制结案总数, 暂不考评(没有好的思路, 且强制结案会同时生成一个新的案件)
w4 = 0 # 立案耗时总长(分钟), 不考评
w5 = 1 # 计划内耗时总长(分钟)
w6 = -1 # 计划外耗时总长(分钟)
y = (df["自行处理案件总数"] * w1 +
df["其他案件总数"] * w2 +
df["强制结案总数"] * w3 +
df["立案耗时总长(分钟)"] * w4 +
df["计划内耗时总长(分钟)"] * w5 +
df["计划外耗时总长(分钟)"] * w6
) / 1000
else:
raise IndexError("不可能出现的错误。")
df = df.assign(score=y)
df = df.rename(columns={"score": ic})
df.to_excel(summary_data_write_path)
return y, df
@staticmethod
def merge_by_year_and_date(input_list):
"""为防止错位,将各指标生成的表格按年和日期将得分对齐
:param input_list: list of pd.DataFrame
:return: pd.DataFrame
"""
# https://stackoverflow.com/questions/41815079/pandas-merge-join-two-data-frames-on-multiple-columns
# https://stackoverflow.com/questions/23668427/pandas-three-way-joining-multiple-dataframes-on-columns/23671390
res = reduce(lambda left, right:
pd.merge(left, right, how='outer', left_on=["日期", "街道"], right_on=["日期", "街道"]),
input_list)
return res
def cal_all(self, save_path):
df_list = []
# calculate
for ic in self.allowed_ics:
_, df = self.cal_x(ic)
df_list.append(df)
# merge
df_final = self.merge_by_year_and_date(df_list)
# save to disk
df_final.to_excel(save_path)
return df_final
def main(args):
p = Processing(args.source_file_path, write_daily_data_to_disk=False)
if args.mode == 'all':
p.cal_all(args.output_path)
else:
p.cal_x(args.mode)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('source_file_path', type=str, help='source file to be processed')
parser.add_argument('output_path', type=str, help='result file to be saved')
parser.add_argument('--mode', choices=['all', 'zs222', 'zs232', 'zs321', 'zs322', 'zs341', 'zs342'],
default='all')
args = parser.parse_args()
main(args)