From ac38c15d1a9be01ceaf52a018c02502ae00ac7a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20Stra=C3=9Fenburg?= Date: Tue, 27 Feb 2024 15:51:55 +0100 Subject: [PATCH] plot fixes + use JOSN instead of pickle (#23) * changed db type from bytes to string latency now displays #entries TTA looks more like TTA but would need to change epochs in x-axis for actual time * Fixed Memory representation of different timed runs * Changed datatype from Byte to JSON * Changed datatype from Byte to JSON * Added Timed TTA * Added JSON support * Added Timed TTA Tracker * Added timed TTA tracker --------- Co-authored-by: sjoze Co-authored-by: sjoze <49590442+sjoze@users.noreply.github.com> --- pipelines/github_example/sample_db_file.db | Bin 24576 -> 28672 bytes pipelines/sample_pipeline/main.py | 16 +++- pipelines/stock_market_pipeline/train.py | 1 - umlaut/umlaut/benchmark.py | 8 +- umlaut/umlaut/datamodel.py | 7 +- umlaut/umlaut/metrics/__init__.py | 3 +- umlaut/umlaut/metrics/supervised_metrics.py | 19 +++-- umlaut/umlaut/metrics/valued_metrics.py | 82 +++++++++++++++++--- umlaut/umlaut/visualization.py | 21 +++-- 9 files changed, 125 insertions(+), 32 deletions(-) diff --git a/pipelines/github_example/sample_db_file.db b/pipelines/github_example/sample_db_file.db index b13ee845172544f82c6e763d9046c551c7c4085e..15055d3046de3a3cfa69f3e76c79370a96dce77f 100644 GIT binary patch literal 28672 zcmeI4e{dA#8OL|`c5m-?Z}%wZp_W`WA|#iX%-!uJxkS{2aD+(0kB~$nKjP(blpAtM z%w4LH(SoA3N^PgaqSQaI)8E=3iq+PttwkBDlq#ZyQW>cNirQk+qNvsGZn$OnE-*pC znasTJB)RuK`#kTv?|1j!=Y8J0%YylhscceN*4v+sXBCwiN6|D@qbL+bxwKzi`^^WLQiuXX0ipm=fG9u|APNu#hyp|bq5x6g zcm-~1ckmM@Po~!lX5*dR$#gQF8R*v@da{-(Th|<`ZHXx@wbL78isgV(B)C&uO2eF% zSbeNnnKQRVnbX?Xc(Ee52L`kQZMDsHvud05=N7Myu6Q)vt#W_QK~DT zw>WcdbF86$j?RkoVFi@t*vwdSY))Nlfzp}mNi0vt`&V?Nli9d7vbZv0Bm`Dh(fq_v zh`uS$(AI8gC(Ln5+mGvlTa|@zW#b)PcIie`(T-)Z@&0UwP5I--Yz$Bc)G^M9SB7XR z)zg){E^|${mcMtz2eQ4!^A7uob%gB9q1|eFeosl-uS0%;^8Uhmx%NN`QGh5w6d(!^ z1&9Jf0ipm=fG9u|APNu##)JZC2iuycO;!cFDk8yPX{f6rUK$C8uE&v0j?v`RaH(okizYnd9V2zEwQZ3LF>24eun zS`1YOtI8^)mBEUzpj}mm_g2cg)B8*9ffS+uQGh5w6d(!^1&9Jf0ipm=fG9u|APNu# z&YA*6j#hd!dlnAEa1^@@OrfLIJ(_(2P8$>BJ*+cK>I@qQGh5g1{GNAXWUe4?UeJ^ zkDD->T|e9BfkKh8%BrAR9`5Y!)&37iI+^b69~}9+ioNSmeV0%xHw+c674+{YY2Q{# z_hp8bXwO}_Z;TCDs_Ta81pWYb{+G^mi`zaNx@D+&XgOyhsqF|NdCzwM3H0RW5i$4g zLd2@gh`7hzM#N9gAaXr79+5DA6(Vt$fJnM}B_jDv8IVYqE=I&W{2U^7+8jjOW}6j1 zdn#JFwtp9qaMdJ4;@)+Lq?FA{KJ+qJdFYCZ5HU}D9}ye701>zKc0_!O&C0cV6Iuz0 z5F+v5!-%AQCnEXCj{!;aW!n)kTjnBSo1Q?#y^N5=SiZe!yu9mc7T#Q= zzx3;C+F4Q=gCZmc%9>Ks*ZOEZK;%zYKhFC(0z|0tXn;uc4QM}~J2c?D=9b!SfxFW! z?^y;&pf9=y5%a(uh}a0ekOc1W$!NvTs7B;^{_lu{1sf5GyKN^SC6=R={B|E8kq%La znBP2th^^Uxhd-h+d9XmO#22EqLL6ex3Gu4IJ;3+C?gD1TxM=zWf8m5r^UtisjLKGOw3as_BX8xZX z#m9pn{|{HIYFRW~rIuHo;r!n>$+Pr^k&Dx_!JS3>9Q}+ebvQiCG5Q!kta0{k#uM^AU{?9Yee)D&4?T`euRj99+EMi-?`x1 zXvOcgwdL-F^@7hY9Q+|Vdiohe(vbl``P5~Q2>bHsEwFU<rr2U z$c^-P!bRS~b;iV~<^K~_=`~UrTO#E0e}|d>e~thVRzo8J;tb^fT4-2O0e9ylXJrfosQKK%e9XJZES{!e2G|Go`PuE`(8}2kX{pc8@5Nc!?@qmlj>3DeCE)XW2ET?@GL4rbpPIQ5tn%q? z2}F*z6^PjV|3l>L`5Pks@GeB|8`mQeon44LcSaG(=l&OvLM_5rr_irof>w?du-C1H zhGC4Git~EN#(j#5`2n`&zWqK#;`mjFJR81+NWO3epaSYEKSf0UD}%^!Ek@S@_LC3M z%6S{?FZl{wUX0WQ?)A0kDEdd%QYQ=zr^sg8+H=WgF3mPG)ni_{%>p|yjZaZAcOUHe zwn>h)Y JWxGx5u&NhrJ0o+nVj7>tN^V=k?9_f@=01yg$UL7@c@3Xr6eR!G7c`_0 z1;)ApYyHk+`Tr>X_lfNvnnYCX{r~c+^5~h&|2^X!=S&rU-)0Fa29oM$Ba*jSGke{t zkUqy6jSR%xYYo)~Vy9T&p)e5lb8BXAAikW39t&NMKZHn_u^ExL^$tW*OBo`0_bY&k z=nJjycIYQj#Qdxdt=O;^5%-vgh_AISZH%^De>#Fz!hGxM(6ADBJ%d(KJcUSpYdoL; zUG^(P%p*4;VlOK}#69yCB7QD~$o0zGh=is0BNF%9dX##sNwq%4fPDB|a4e=Tv2Gmb zR>jO_Yc60QcD8j*WFYRPJFzW(5njh)*XuSUtXP1K;_xCw(#l3eUWaveOh1Vdy1}{> zG!XOR1>ji1wogRFz4i$rK6M@<*LyD^5(b|&&d&yMvqOG2bF21Ur_|JQi=?#mV-H62^5J@L4A9f=M#MaWb5Ve8vSz({TLJDBH#+i5 ztr@go3~fuR+8eycH3j!7Ehk`nVdc zgf)LeK6sC=NNc~KznqEiS(ewZR1BmCk#{d8T delta 87 zcmZp8z}Rqrae_1}8v_Fa+e8I>AVbgo=Ejr-{2WaD_6+>%`0Y0fDj4%mwwFH&6j{%} oe}jMhW serialized[i+1]: + serialized[i+1] = serialized[i] self.benchmark.log(description, self.MEASURE_TYPE, serialized, unit='accuracy') def serialize(self, accuracies): @@ -146,7 +160,10 @@ def serialize(self, accuracies): pickle object Serialized data. """ - return pickle.dumps(accuracies) + #return pickle.dumps(accuracies) + accuracies = accuracies.tolist() + return accuracies + class LossTracker: MEASURE_TYPE = "loss" @@ -184,4 +201,51 @@ def serialize(self, loss_values): pickle object Serialized data. """ - return pickle.dumps(loss_values) + #return pickle.dumps(loss_values) + loss_values = loss_values.tolist() + return loss_values + + +class TimedTTATracker: + MEASURE_TYPE = "timed tta" + + def __init__(self, benchmark, target_acc): + """ + Parameters + ---------- + benchmark : Benchmark + Benchmark object to store the data in / retrieve it from. + """ + self.benchmark = benchmark + self.target_acc = target_acc + self.time = time.perf_counter() + self.logged = False + + def track(self, accuracy, description): + """ + Parameters + ---------- + accuracy : int + Current accuracy of the run. + description : str + Description of tracked timed TTA. + """ + if accuracy >= self.target_acc and not self.logged: + self.benchmark.log(description + " (Target: " + str(self.target_acc) + ")", self.MEASURE_TYPE, time.perf_counter() - self.time, unit='time for target accuracy') + self.logged = True + + def serialize(self, accuracies): + """ + Parameters + ---------- + accuracies : list of ints + List of tracked accuracies of the run. + + Returns + ------- + pickle object + Serialized data. + """ + #return pickle.dumps(accuracies) + accuracies = accuracies.tolist() + return accuracies diff --git a/umlaut/umlaut/visualization.py b/umlaut/umlaut/visualization.py index a632a6a5..fd4bfa4b 100644 --- a/umlaut/umlaut/visualization.py +++ b/umlaut/umlaut/visualization.py @@ -2,6 +2,7 @@ from math import floor import pickle import sys +import ast import matplotlib.pyplot as plt import matplotlib.ticker as ticker @@ -11,6 +12,7 @@ import plotly.express as px import plotly.graph_objects as go import seaborn as sns +import json class Visualizer: def __init__(self, df_from_cli, plotting_backend): @@ -62,8 +64,9 @@ def plot_with_matplotlib(self): figs = [] for _, row in self.df.iterrows(): # every row needs to be visualized individually since each row corresponds to one confusion matrix - conf_mat_np = row['measurement_data']['matrix'] - labels = row['measurement_data']['labels'] + measurement_dict = json.loads(row['measurement_data']) + conf_mat_np = json.loads(measurement_dict['matrix'].replace(" ", ",")) + labels = measurement_dict['labels'] conf_mat_df = pd.DataFrame(conf_mat_np, index=pd.Index(labels, name="predicted"), columns=pd.Index(labels, name="actual") @@ -115,10 +118,11 @@ def __init__(self, df_from_cli, plotting_backend): for _, row in self.df.iterrows(): measurement_dict = row['measurement_data'] + measurement_dict = json.loads(measurement_dict) timestamps = pd.to_datetime(measurement_dict.pop('timestamps')) self.timedelta_lists.append(timestamps - timestamps[0]) - if len(timestamps) > len(self.x_tick_vals): - x_tick_idx = np.floor(np.linspace(0, len(self.timedelta_lists[-1])-1, 5)).astype(int) + x_tick_idx = np.floor(np.linspace(0, len(self.timedelta_lists[-1])-1, 5)).astype(int) + if len(self.x_tick_vals) == 0 or self.timedelta_lists[-1][x_tick_idx][4] > self.x_tick_vals[4]: self.x_tick_vals = self.timedelta_lists[-1][x_tick_idx] self.x_tick_labels = self.x_tick_vals.map(self._strfdelta) self.measurements_lists.append(measurement_dict.pop('measurements')) @@ -233,6 +237,7 @@ def plot_with_matplotlib(self): plt.rcParams.update({'font.size': 18}) fig, ax = plt.subplots() plt.tight_layout() + self.df["measurement_data"] = float(self.df["measurement_data"]) self.df.plot.barh(x='x_labels', y='measurement_data', stacked=False, legend=False, ax=ax) plt.title(self.title) @@ -313,6 +318,11 @@ class CPUVisualizer(TimebasedMultiLineChartVisualizer): xaxis_label = "Time elapsed since start of pipeline run" yaxis_label = "CPU usage in %" +class TimedTTAVisualizer(BarVisualizer): + title = "Metric: Time to target accuracy" + xaxis_label = "Time elapsed since until target accuracy reached" + yaxis_label = "Time taken in seconds" + type_to_visualizer_class_mapper = { "throughput" : ThroughputVisualizer, "latency" : LatencyVisualizer, @@ -324,5 +334,6 @@ class CPUVisualizer(TimebasedMultiLineChartVisualizer): "tta" : TTAVisualizer, "confusion-matrix" : ConfusionMatrixVisualizer, "hyperparameters" : HyperparemeterVisualizer, - "cpu": CPUVisualizer + "cpu": CPUVisualizer, + "timed tta": TimedTTAVisualizer }