-
Notifications
You must be signed in to change notification settings - Fork 0
/
bayes_optM.py
342 lines (267 loc) · 9.85 KB
/
bayes_optM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import asyncio
import math
import os
# import opentuner
import re
import threading
import time
from bayes_opt import BayesianOptimization
from bayes_opt.util import UtilityFunction
import ClusterInfo
import RunHPL
# from colorama import Fore
try:
import json
import tornado.ioloop
import tornado.httpserver
from tornado.web import RequestHandler
import requests
except ImportError:
raise ImportError(
"In order to run this example you must have the libraries: " +
"`tornado` and `requests` installed."
)
# hyper parameter
Number_of_iter = 30
file_path = '/work/ssc-laihb/haibin/hpl-2.3/testing'
WAITING_TIME = 320
COMPILE_TIME = 60
LSF_TIME = 20
# for bayesian
Kappa = 3
Xi = 1
def black_box_function(N_rate, NBs_rate, NBMIN, BCAST):
"""Function with unknown internals we wish to maximize.
This is just serving as an example, however, for all intents and
purposes think of the internals of this function, i.e.: the process
which generates its outputs values, as unknown.
"""
alpha_rate = N_rate
HPL_value = 20
# read HPL's max mem --------------------------------
free_memory = 180000
NBs_max = 512
NBs_min = 10
# 读取文件内容并解析参数
parameters = {}
# 打开文件
with open('HPL_Parameter.txt', 'r') as file:
# 逐行读取文件内容
for line in file:
# 去除行末尾的换行符并按空格分割
parts = line.strip().split()
# 如果行不为空
if parts:
# 第一个部分是参数名称,第二个部分是浮点数值
param_name = parts[0]
param_value = float(parts[1]) # 转换为浮点数
if param_name == 'N_max':
free_memory = param_value
if param_name == 'N_min':
N_min = param_value
if param_name == 'NBs_min':
NBs_min = param_value
if param_name == 'NBs_max':
NBs_max = param_value
# print(f"Parameter {param_name}: {param_value}")
parameters[param_name] = param_value
# calculate HPL's para -----------------------------
if 100 - alpha_rate < 2:
print("May need bigger max_N!")
# alpha_rate += 2
next_N = round(math.sqrt((free_memory * 1024 * alpha_rate / 8)) / 10)
next_NBs = round(NBs_min + ((NBs_max - NBs_min) * NBs_rate) / 100)
next_NBmin = round(NBMIN)
next_BCAST = round(BCAST)
print("next_N:", next_N)
print("next_NBs:", next_NBs)
print("next_NBmin:", next_NBmin)
print("next_BCAST:", next_BCAST)
# write HPL.dat ------------------------------------
# 将修改后的参数写回文件
with open(file_path + '/HPL.dat', 'r+') as file:
lines = file.readlines()
lines[5] = str(next_N) + " Ns\n"
lines[7] = str(next_NBs) + " NBs\n"
lines[16] = str(next_NBmin) + " NBMINs (>= 1)\n"
lines[22] = str(next_BCAST) + " BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)" + "\n"
# 将更新后的内容写回文件
file.seek(0) # 将文件指针移到文件开头
file.writelines(lines) # 将修改后的内容写回文件
file.truncate() # 截断文件,删除原有内容之后的部分(如果有)
# 关闭文件
file.close()
# run ----------------------------------------------
RunHPL.hpl()
# wait for complete
time.sleep(WAITING_TIME)
# -- For multi process, this is not good since bjobs may find other thread's process --#
count = 0
for i in range(300):
res = os.popen(f"bjobs").readlines()
if len(res) >= 1:
time.sleep(180)
else:
time.sleep(4)
count += 1
if count == 15:
break
# read and return result ---------------------------
isPassed = False
results = []
# 使用正则表达式查找浮点数
float_pattern = r"\d+\.\d+e[+-]\d+"
# 打开文件并查找包含 "PASSED" 的行
with open(file_path + '/bayes.txt', 'r') as file2:
for line in file2:
if 'PASSED' in line:
isPassed = True
print(line.strip()) # 输出包含 "PASSED" 的行,并去除首尾空白字符
matches = re.findall(float_pattern, line)
# 取得匹配到的第一个浮点数并转换为 float
if matches:
float_value = float(matches[0])
if HPL_value < float_value:
# 将所有匹配项添加到结果列表中
results.append(float_value)
# Result -----------------------------------------------
# return N_rate**2+N_rate*NBs_rate-NBMIN**2+BCAST**2
if isPassed:
HPL_value = (results[-1])
print("last HPL value:", HPL_value) # 输出转换后的浮点数
return HPL_value
else:
print("HPL DID NOT PASSED")
return 0
class BayesianOptimizationHandler(RequestHandler):
"""Basic functionality for NLP handlers."""
HPL_para = {"N_rate": (80, 100), "NBs_rate": (0, 100), "NBMIN": (2, 15), "BCAST": (0, 5)}
_bo = BayesianOptimization(
f=black_box_function,
pbounds=HPL_para
)
_uf = UtilityFunction(kind="ucb", kappa=Kappa, xi=Xi)
def post(self):
"""Deal with incoming requests."""
body = tornado.escape.json_decode(self.request.body)
try:
self._bo.register(
params=body["params"],
target=body["target"],
)
print("BO has registered: {} points.".format(len(self._bo.space)), end="\n\n")
except KeyError:
pass
finally:
suggested_params = self._bo.suggest(self._uf)
self.write(json.dumps(suggested_params))
def run_optimization_app():
asyncio.set_event_loop(asyncio.new_event_loop())
handlers = [
(r"/bayesian_optimization", BayesianOptimizationHandler),
]
server = tornado.httpserver.HTTPServer(
tornado.web.Application(handlers)
)
server.listen(9009)
tornado.ioloop.IOLoop.instance().start()
def run_optimizer():
name = "HPL Optimizer"
# colour = Fore.GREEN
register_data = {}
max_target = None
for _ in range(Number_of_iter):
status = name + " wants to register: {}.\n".format(register_data)
resp = requests.post(
url="http://localhost:9009/bayesian_optimization",
json=register_data,
).json()
target = black_box_function(**resp)
register_data = {
"params": resp,
"target": target,
}
if max_target is None or target > max_target:
max_target = target
status += name + " got {} as target.\n".format(target)
status += name + " will to register next: {}.\n".format(register_data)
print(status, end="\n")
global results
results.append((name, max_target))
print(name + " is done!", end="\n\n")
def run_on_single_node(node_name):
print("Try to run bayesian optimization on HPL at node {}\n".format(node_name))
ClusterInfo.clean_output()
# Find cpu info
ClusterInfo.cpu_info(node_name)
time.sleep(LSF_TIME)
cpu_cores = ClusterInfo.read_cpu_info()
# Compile
RunHPL.compile_hpl_node(node_name, cpu_cores)
time.sleep(COMPILE_TIME)
# Change node
RunHPL.change_hpl_node(node_name, cpu_cores)
ClusterInfo.memory_size(node_name)
free_mem = ClusterInfo.read_memory_size()
# 输出参数到另一个文件
output_filename = 'HPL_Parameter.txt'
with open(output_filename, 'w') as output_file:
output_file.write(f"N_max {free_mem}\n")
output_file.write(f"N_min 20000\n")
output_file.write(f"NBs_min 128\n")
output_file.write(f"NBs_max 512\n")
P, Q = RunHPL.find_closest_factors(cpu_cores)
# write HPL.dat ------------------------------------
# 将修改后的参数写回文件
with open(file_path + '/HPL.dat', 'r+') as file:
lines = file.readlines()
lines[10] = str(P) + " Ps\n"
lines[11] = str(Q) + " Qs\n"
# 将更新后的内容写回文件
file.seek(0) # 将文件指针移到文件开头
file.writelines(lines) # 将修改后的内容写回文件
file.truncate() # 截断文件,删除原有内容之后的部分(如果有)
# Run bayesian
ioloop = tornado.ioloop.IOLoop.instance()
app_thread = threading.Thread(target=run_optimization_app)
app_thread.daemon = True
app_thread.start()
targets = (
run_optimizer,
)
optimizer_threads = []
for target in targets:
optimizer_threads.append(threading.Thread(target=target))
optimizer_threads[-1].daemon = True
optimizer_threads[-1].start()
results = []
for optimizer_thread in optimizer_threads:
optimizer_thread.join()
for result in results:
print(result[0], "found a maximum value of: {}".format(result[1]))
ioloop.stop()
if __name__ == "__main__":
print("welcome to bayesian_optimization on HPL")
RunHPL.compile_hpl()
time.sleep(COMPILE_TIME)
ioloop = tornado.ioloop.IOLoop.instance()
optimizers_config = [
{"name": "HPL Optimizer"},
]
app_thread = threading.Thread(target=run_optimization_app)
app_thread.daemon = True
app_thread.start()
targets = (
run_optimizer,
)
optimizer_threads = []
for target in targets:
optimizer_threads.append(threading.Thread(target=target))
optimizer_threads[-1].daemon = True
optimizer_threads[-1].start()
results = []
for optimizer_thread in optimizer_threads:
optimizer_thread.join()
for result in results:
print(result[0], "found a maximum value of: {}".format(result[1]))
ioloop.stop()