SP-4984 Add performance test

Signed-off-by: Maarit Härkönen <[email protected]>
tiiuae · Sep 24, 2024 · e09150c · e09150c
1 parent e6bd120
commit e09150c
Show file tree

Hide file tree

Showing 5 changed files with 395 additions and 2 deletions.
diff --git a/Robot-Framework/__framework__.resource b/Robot-Framework/__framework__.resource
@@ -0,0 +1,8 @@
+*** Settings ***
+Library  Collections
+Library  OperatingSystem
+Library  Process
+Library  String
+Library  Telnet
+Library  XML
+Library  DateTime
diff --git a/Robot-Framework/lib/PerformanceDataProcessing.py b/Robot-Framework/lib/PerformanceDataProcessing.py
@@ -3,11 +3,14 @@
 
 import csv
 import os
+import shutil
+import pandas
 import json
 import matplotlib.pyplot as plt
 import logging
 from robot.api.deco import keyword
 from performance_thresholds import *
+import parse_perfbench
 
 
 class PerformanceDataProcessing:
@@ -59,6 +62,12 @@ def write_cpu_to_csv(self, test_name, cpu_data):
                 self.device]
         self._write_to_csv(test_name, data)
 
+    @keyword("Parse and Copy Perfbench To Csv")
+    def parse_and_copy_perfbench_to_csv(self):
+
+        perf_result_heading, perf_bit_result_heading = parse_perfbench.parse_perfbench_data(self.build_number,self.device, self.data_dir)
+        return perf_result_heading, perf_bit_result_heading
+
     @keyword
     def write_mem_to_csv(self, test_name, mem_data):
         data = [self.build_number,
@@ -289,6 +298,149 @@ def read_cpu_csv_and_plot(self, test_name):
         plt.savefig(f'../test-suites/{self.device}_{test_name}.png')
         return statistics
 
+    def normalize_columns(self, csv_file_name, normalize_to):
+        # Set the various results to the same range.
+        # This makes it easier to notice significant change in any of the result parameters with one glimpse
+        # If columns are plotted later on the whole picture is well displayed
+        build_info_size = 1 # First columns containing buildata
+        file_path = os.path.join(self.data_dir, f"{self.device}_{csv_file_name}")
+        print("Normalizing results from file: ", file_path)
+        data = pandas.read_csv(file_path)
+        column_max = data.max(numeric_only=True)
+        # Cut away the index column which is numeric but not measurement data to be normalized
+        max_values = column_max[1:]
+        data_rows = len(data.axes[0])
+        # print(len(data.axes[1]))
+        data_columns = len(max_values)
+        # Normalize all columns between 0...normalize_to
+        for i in range(build_info_size, (build_info_size + data_columns)):
+            for j in range(data_rows):
+                # import pdb; pdb.set_trace()
+                normalized = data.iat[j, i] / max_values[i - build_info_size]
+                # print("normalized * normalize_to", normalized * normalize_to)
+                # print("data.iloc[[j],[i]]", data.iloc[[j],[i]])
+                data.iloc[[j],[i]] = normalized * normalize_to
+        data.to_csv(self.data_dir + "/" + f"{self.device}_normalized_{csv_file_name}", index=False)
+
+    def calc_statistics(self, csv_file_name):
+        build_info_size = 1 # First columns containing buildata
+        data = pandas.read_csv(self.data_dir + "/" + csv_file_name)
+
+        # Calculate column averages
+        column_avgs = data.mean(numeric_only=True)
+        column_stds = data.std(numeric_only=True)
+        column_min = data.min(numeric_only=True)
+        column_max = data.max(numeric_only=True)
+
+        # Cut away the index column which is numeric but not measurement data to be included in calculations
+        avgs = column_avgs.tolist()[1:]
+        stds = column_stds.tolist()[1:]
+        min_values = column_min.tolist()[1:]
+        max_values = column_max.tolist()[1:]
+
+        data_rows = len(data.axes[0])
+        # print(len(data.axes[1]))
+        data_columns = len(avgs)
+
+        # Detect significant deviations from column mean
+
+        # Find the result which is furthest away from the column mean.
+        # Not taking into account those results which are within 1 std from column mean.
+        max_deviations = ['-'] * (data_columns + build_info_size)
+        for i in range(build_info_size, (build_info_size + data_columns)):
+            for j in range(data_rows):
+                if abs(data.iat[j, i] - avgs[i - build_info_size]) > stds[i - build_info_size]:
+                    distance = abs(data.iat[j, i] - avgs[i - build_info_size]) / stds[i - build_info_size]
+                    if max_deviations[i] == '-':
+                        max_deviations[i] = distance
+                    elif distance > max_deviations[i]:
+                        max_deviations[i] = distance
+
+        # Check if values of the last data row are 1 std away from their column mean.
+        last_row_deviations = ['-'] * (data_columns + build_info_size)
+        last_row_deviations[build_info_size - 1] = "LRD"
+        for i in range(build_info_size, build_info_size + data_columns):
+            if abs(data.iat[data_rows - 1, i] - avgs[i - build_info_size]) > stds[i - build_info_size]:
+                distance = (data.iat[data_rows - 1, i] - avgs[i - build_info_size]) / stds[i - build_info_size]
+                last_row_deviations[i] = distance
+
+        shutil.copyfile(self.data_dir + "/" + csv_file_name, self.data_dir + "/raw_" + csv_file_name)
+
+        with open(self.data_dir + "/" + csv_file_name, 'a') as f:
+
+            writer_object = csv.writer(f)
+
+            writer_object.writerow([])
+            writer_object.writerow(last_row_deviations)
+            writer_object.writerow(self.create_stats_row(build_info_size - 1, "average", avgs))
+            writer_object.writerow(self.create_stats_row(build_info_size - 1, "std", stds))
+            writer_object.writerow([])
+            writer_object.writerow(self.create_stats_row(build_info_size - 1, "max", max_values))
+            writer_object.writerow(self.create_stats_row(build_info_size - 1, "min", min_values))
+
+            f.close()
+
+    def create_stats_row(self, shift, label, value_list):
+        row = ['-'] * shift
+        row.append(label)
+        row = row + value_list
+        return row
+
+
+    @keyword("Read Perfbench Csv And Plot")
+    def read_perfbench_csv_and_plot(self, test_name, file_name, headers):
+        self.normalize_columns(file_name, 100)
+        fname = "normalized_" + file_name
+        data = {}
+        file_path = os.path.join(self.data_dir, f"{self.device}_{fname}")
+        with open(file_path ,'r') as csvfile:
+            lines = csv.reader(csvfile)
+            heading = next(lines)
+            logging.info("Reading data from csv file..." )
+            logging.info(file_path)
+
+            data_lines = []
+            for row in lines:
+                data_lines.append(row)
+
+            build_counter = {}  # To keep track of duplicate builds
+            index = 0
+            data = {"build_numbers":[]}
+
+            for header in headers:
+                data.update({
+                header:[]})
+                for row in data_lines:
+                    if header == "build_numbers":
+                        build = str(row[0])
+                        if build in build_counter:
+                            build_counter[build] += 1
+                            modified_build = f"{build}-{build_counter[build]}"
+                        else:
+                            build_counter[build] = 0
+                            modified_build = build
+                        data['build_numbers'].append(modified_build)
+                    else:
+                        data[header].append(float(row[index]))
+                index +=1
+
+        plt.figure(figsize=(20, 10))
+        plt.set_loglevel('WARNING')
+        plt.subplot(1, 1, 1)
+        plt.ticklabel_format(axis='y', style='plain')
+
+        for key, value in data.items():
+            if key not in ['build_numbers']:
+                plt.plot(data['build_numbers'], value, marker='o', linestyle='-', label=key)
+        plt.legend(title="Perfbench measurements")
+
+        plt.yticks(fontsize=8)
+        plt.title(f'Perfbench results: {file_name}', loc='right', fontweight="bold", fontsize=16)
+        plt.grid(True)
+        plt.xticks(data['build_numbers'], rotation=45, fontsize=14)
+        plt.tight_layout()
+        plt.savefig(f'../test-suites/{self.device}_{test_name}_{file_name}.png')
+
     @keyword
     def read_mem_csv_and_plot(self, test_name):
         data = {
@@ -720,6 +872,24 @@ def read_vms_data_csv_and_plot(self, test_name, vms_dict):
             plt.savefig(f'../test-suites/{self.device}_{test_name}_{test}.png')
             plt.close()
 
+    @keyword("Combine Normalized Data")
+    def combine_normalized_data(self, test_name, src):
+        """ Copy latest normalized perfbench results to combined result file. """
+        file_path = os.path.join(self.data_dir, f"{self.device}_{test_name}.csv")
+        with open(src, 'r') as src_f:
+            src_lines = csv.reader(src_f)
+            src_heading = next(src_lines)
+            with open(file_path, 'a+', newline='') as dst_f:
+                writer_object = csv.writer(dst_f)
+                try:
+                    data = pandas.read_csv(file_path)
+                except:
+                    writer_object.writerow(src_heading)
+                for row in src_lines:
+                    writer_object.writerow(row)
+                dst_f.close()
+            src_f.close()
+
     @keyword
     def save_cpu_data(self, test_name, cpu_data):
 

diff --git a/Robot-Framework/lib/output_parser.py b/Robot-Framework/lib/output_parser.py
@@ -4,7 +4,6 @@
 import re
 from datetime import datetime
 
-
 def get_systemctl_status(output):
     output = re.sub(r'\033\[.*?m', '', output)   # remove colors from serial console output
     match = re.search(r'State: (\w+)', output)

diff --git a/Robot-Framework/lib/parse_perfbench.py b/Robot-Framework/lib/parse_perfbench.py
@@ -0,0 +1,156 @@
+# SPDX-FileCopyrightText: 2022-2024 Technology Innovation Institute (TII)
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import csv
+import json
+from robot.api.deco import keyword
+
+# How many columns are reserved for information extracted from the file name
+build_info_size = 1
+
+def list_files(path):
+    file_list = []
+    for path, subdirs, files in os.walk(path):
+        for name in files:
+            if name.find("perf_results") != -1 and name.find("csv") == -1:
+                file_list.append(os.path.join(path, name))
+
+    # file_list.sort(key=os.path.getctime)
+    # The file creation time may differ from actual build date.
+    # Let's sort according to file name (perf_results_YYYY-MM-DD_BuildMachine-BuildID) simply in ascending order.
+    ordered_file_list = sorted(file_list)
+
+    return ordered_file_list
+
+
+def extract_value(file, detect_str, offset, str1, str2):
+
+    with open(file, 'r') as f:
+
+        # read all lines using readline()
+        lines = f.readlines()
+
+        row_index = 0
+        match_index = -1
+
+        for row in lines:
+            # find() method returns -1 if the value is not found,
+            # if found it returns index of the first occurrence of the substring
+            if row.find(detect_str) != -1:
+                match_index = row_index
+            row_index += 1
+
+        if match_index < 0:
+            print("Error in extracting '{}': Result value not found.".format(detect_str))
+            return ''
+
+        line = lines[match_index + offset]
+        res = ''
+
+        try:
+            # getting index of substrings
+            idx1 = line.index(str1)
+            idx2 = line.index(str2)
+
+            # getting elements in between
+            for idx in range(idx1 + len(str1), idx2):
+                res = res + line[idx]
+            res = float(res)
+            return res
+        except:
+            print("Error in extracting '{}': Result value not found.".format(detect_str))
+            return res
+
+
+
+def save_to_csv(build, path_to_data, file, config, csv_file_name):
+
+    results = [build]
+    with open(path_to_data + "/" + csv_file_name, 'a') as f:
+        writer_object = csv.writer(f)
+        for i in range(len(config)):
+            results.append(
+                extract_value(file, config[i][0], config[i][1], config[i][2], config[i][3])
+            )
+        writer_object.writerow(results)
+        f.close()
+
+
+def create_csv_file(path_to_data, config, csv_file_name):
+
+    header = []
+    for i in range(len(config)):
+        header.append(config[i][0])
+
+    with open(path_to_data + "/" + csv_file_name, 'w') as f:
+        writer = csv.writer(f, delimiter=',', lineterminator='\n')
+        writer.writerow(header)
+        f.close()
+
+
+def parse_perfbench_data(build, device, path_to_data):
+
+    # Dictionary defining locations where to extract each result value.
+    parse_config = [
+        ('sched/pipe', 5, ' ', 'usecs/op'),
+        ('syscall/basic', 4, ' ', 'usecs/op'),
+        ('mem/memcpy', 4, ' ', 'MB/sec'),
+        ('mem/memset', 4, ' ', 'MB/sec'),
+        ('numa-mem', 8, ' ', ' GB/sec/thread'),
+        ('futex/hash', 8, 'Averaged', ' operations/sec'),
+        ('futex/wake ', 13, 'threads in ', ' ms '),
+        ('futex/wake-parallel', 13, '(waking 1/4 threads) in ', ' ms '),
+        ('futex/requeue', 13, 'threads in ', ' ms '),
+        ('futex/lock-pi', 8, 'Averaged ', ' operations/sec'),
+        ('epoll/wait', 7, 'Averaged ', ' operations/sec'),
+        ('ADD operations', 0, 'Averaged ', ' ADD operations'),
+        ('MOD operations', 0, 'Averaged ', ' MOD operations'),
+        ('DEL operations', 0, 'Averaged ', ' DEL operations'),
+        ('internals/synthesize', 5, 'time per event ', ' usec'),
+        ('internals/kallsyms-parse', 1, 'took: ', ' ms ')
+    ]
+
+    # Separate config for the test 'mem/find_bit' which has multiple output values.
+    find_bit_parse_config = []
+    bits = 1
+    while bits < 2050:
+        bits_set = 1
+        while bits_set < bits + 1:
+            find_bit_parse_config.append(
+                ('{} bits set of {} bits'.format(bits_set, bits), 1, 'Average for_each_set_bit took:', ' usec (+-')
+            )
+            bits_set *= 2
+        bits *= 2
+
+    print("Extracting " + str(len(find_bit_parse_config)) + " separate results from find bit tests.")
+    print("Extracting " + str(len(parse_config)) + " separate results from other tests.")
+
+
+    file_list = list_files(os.getcwd())
+    print("Going to extract result values from these files: ")
+    print(file_list)
+    print()
+
+    perf_results = ["build_numbers"]
+    for i in range(len(parse_config)):
+        perf_results.append(parse_config[i][0])
+
+    perf_bit_results = ["build_numbers"]
+    for i in range(len(find_bit_parse_config)):
+        perf_bit_results.append(find_bit_parse_config[i][0])
+
+    file_index = 0
+    for f in file_list:
+        save_to_csv(build, path_to_data, f, parse_config, device + "_perf_results.csv")
+        save_to_csv(build, path_to_data, f, find_bit_parse_config, device + "_perf_find_bit_results.csv")
+        file_index += 1
+    return perf_results, perf_bit_results
+
+@keyword("Convert Output To Json")
+def convert_output_to_json(output):
+    """Convert given output to json format
+    """
+    json_output = json.loads(output)
+    json.dumps(json_output)
+    return(json_output)