Update prediction scripts

- Fix imports - Fix requirement for joblib - Updates for Python 3.10 - Add sm_80 option for A100
cp2k · Jun 27, 2023 · 719db05 · 719db05
1 parent cceb3f9
commit 719db05
Show file tree

Hide file tree

Showing 5 changed files with 143 additions and 99 deletions.
diff --git a/src/acc/libsmm_acc/predict/predict_collect.py b/src/acc/libsmm_acc/predict/predict_collect.py
@@ -17,11 +17,12 @@
 import pandas as pd
 
 sys.path.append("../")
-from kernels.cusmm_predict import (  # noqa: E402
+
+from kernels.smm_acc import (  # noqa: E402
     to_string,
     kernel_algorithm,
-    parameter_types,
-)  # noqa: E402
+    parameter_types
+)
 
 
 # ===============================================================================
@@ -32,6 +33,13 @@ def main(tunedir):
     - dump them to CSV files for data analysis and training of a predictive model
     """
     # ===============================================================================
+    # Check for old data files first
+    for algorithm in kernel_algorithm.keys():
+        training_data_file = os.path.join(tunedir, f"raw_training_data_{algorithm}.csv")
+        if os.path.exists(training_data_file):
+           print(f"WARNING: Found old data file {training_data_file}, re(move) it first ... exiting")
+           sys.exit(1)
+
     # Find all the 'tune_MxNxK' folders
     kernel_folder_pattern = re.compile(r"tune_(\d+)x(\d+)x(\d+)$")
     kernel_folders = [
@@ -46,7 +54,7 @@ def main(tunedir):
         + " in folder "
         + tunedir
     )
-    print("Found {:,} kernel folders".format(n_kernels))
+    print(f"Found {n_kernels} kernel folders")
 
     # Collect information and write to csv
     collect_training_data(kernel_folders, kernel_folder_pattern)
@@ -79,12 +87,15 @@ def read_log_file(log_folder, m, n, k):
     # Parse the log files and collect data
     data = list()
     for log_file in log_files:
-        print("Processing log file", log_file)
+
+        print(f"Processing log file {log_file}")
         with open(os.path.join(log_folder, log_file), "r") as f:
             log_file_content = f.read().splitlines()
 
         for line in log_file_content:
+
             if "OK" in line:  # this line contains autotuning data
+
                 # Parse the line
                 match = autotuning_line.match(line)
                 assert match is not None, "Found null match: " + line
@@ -111,7 +122,7 @@ def read_log_file(log_folder, m, n, k):
                     }
                 )
 
-    print("Autotuning lines found: ", len(data))
+    print(f"{len(data)} autotuning lines found")
 
     # Merge dictionaries into a pandas dataframe
     dataframe = pd.DataFrame(data)
@@ -130,7 +141,8 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):
     # For each folder:
     n_kernels = len(kernel_folders)
     for i, kernel_folder in enumerate(kernel_folders):
-        print("\nProcess folder {} ({}/{:,})".format(kernel_folder, i + 1, n_kernels))
+
+        print(f"\nProcess folder {kernel_folder} ({i+1}/{n_kernels})")
 
         # Find (m, n, k)
         # Each folder contains data for just one (m, n, k) but potentially mutliple algorithms
@@ -141,13 +153,20 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):
 
         # ===============================================================================
         # Collect info from log files
-        data = read_log_file(kernel_folder, m, n, k)
+        log_files = [f for f in os.listdir(kernel_folder) if f[-4:] == ".log"]
+        if len(log_files) > 0:
+           data = read_log_file(kernel_folder, m, n, k)
+        else:
+           print(f"No log files found in folder {kernel_folder} ... skipping")
+           continue
 
         # ===============================================================================
         # Write parameters to CSV
         for name_algo, kernel_algo in kernel_algorithm.items():
+
             # if applicable to this mnk
             if name_algo in data["algorithm"].values:
+
                 # Does collected csv file exist already?
                 raw_parameters_file_name = os.path.join(
                     kernel_folder,
@@ -159,18 +178,14 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):
                 )
 
                 if os.path.exists(raw_parameters_file_name):
-                    print(
-                        "\tFound csv file:", raw_parameters_file_name, ", skipping ..."
-                    )
-
+                    print(f"Found csv file {raw_parameters_file_name} ... skipping")
                 else:
                     # Get the data corresponding to this algorithm
                     data_algo = data[data["algorithm"] == name_algo]
-
                     # Write raw parameters
                     pars_to_get = kernel_algo.launch_parameters + ["perf (Gflop/s)"]
                     data_algo[pars_to_get].to_csv(raw_parameters_file_name, index=False)
-                    print("\tWrote", raw_parameters_file_name)
+                    print("Wrote", raw_parameters_file_name)
 
 
 # ===============================================================================
@@ -179,57 +194,58 @@ def merge_data_files(tunedir):
     Merge CSV files
     """
     for algorithm in kernel_algorithm.keys():
+
         training_data_file = os.path.join(
             tunedir, "raw_training_data_{algorithm}.csv".format(algorithm=algorithm)
         )
 
         if os.path.exists(training_data_file):
-            print("\nFound {}, skipping ... ".format(training_data_file))
+            print(f"\nFound {training_data_file} ... skipping")
+            os.rename(training_data_file, f"{training_data_file}.bak")
+
+        print(f"\nMerging partial CSV files into {training_data_file} ... ")
+
+        filenames_pattern = os.path.join(
+            tunedir,
+            "tune_*/raw_training_data_*_{algorithm}.csv".format(
+                algorithm=algorithm
+            ),
+        )
+        print("Merging all files with pattern:", filenames_pattern)
+        filenames = glob.glob(filenames_pattern)
+        if len(filenames) == 0:
+            print("Found no files matching this pattern ... skipping")
 
         else:
-            print("\nMerging partial CSV files into {} ... ".format(training_data_file))
-
-            filenames_pattern = os.path.join(
-                tunedir,
-                "tune_*/raw_training_data_*_{algorithm}.csv".format(
-                    algorithm=algorithm
-                ),
-            )
-            print("Merging all files with pattern:", filenames_pattern)
-            filenames = glob.glob(filenames_pattern)
-            if len(filenames) == 0:
-                print("Found no files matching this pattern, skipping ...")
-
-            else:
-                print("Found {} files matching this pattern".format(len(filenames)))
-
-                with open(training_data_file, "w") as out:
-                    # Write the first file, including its header
-                    fn_1 = filenames.pop(0)
-                    with open(fn_1) as f:
-                        header_line_ref = next(f)  # read header line
-                        out.write(header_line_ref)  # write header line
-                        out.write(f.read())  # write the rest of the file
-                    # Write the rest of the files, skipping the header line each time
-                    for i, fn in enumerate(filenames):
-                        print(
-                            "writing from {} ({}/{})".format(fn, i + 1, len(filenames))
+            print(f"Found {len(filenames)} files matching this pattern")
+
+            with open(training_data_file, "w") as out:
+                # Write the first file, including its header
+                fn_1 = filenames.pop(0)
+                with open(fn_1) as f:
+                    header_line_ref = next(f)  # read header line
+                    out.write(header_line_ref)  # write header line
+                    out.write(f.read())  # write the rest of the file
+                # Write the rest of the files, skipping the header line each time
+                for i, fn in enumerate(filenames):
+                    print(
+                        "writing from {} ({}/{})".format(fn, i + 1, len(filenames))
+                    )
+                    with open(fn) as f:
+                        header_line = next(f)  # skip header line
+                        assert header_line == header_line_ref, (
+                            'Cannot merge file "'
+                            + fn
+                            + '", because its header line:\n'
+                            + header_line
+                            + 'is different from the header line of file "'
+                            + fn_1
+                            + '":\n'
+                            + header_line_ref
                         )
-                        with open(fn) as f:
-                            header_line = next(f)  # skip header line
-                            assert header_line == header_line_ref, (
-                                'Cannot merge file "'
-                                + fn
-                                + '", because its header line:\n'
-                                + header_line
-                                + 'is different from the header line of file "'
-                                + fn_1
-                                + '":\n'
-                                + header_line_ref
-                            )
-                            out.write(f.read())
-
-                print("Wrote to {}".format(training_data_file))
+                        out.write(f.read())
+
+            print("Wrote to {}".format(training_data_file))
 
 
 # ===============================================================================
@@ -257,8 +273,8 @@ def merge_data_files(tunedir):
         "--arch",
         metavar="ARCHITECTURE_NUMBER",
         type=int,
-        default=60,
-        help="GPU architecture code. Options: sm_35, sm_37, sm_60, sm_70, gfx906",
+        default=80,
+        help="GPU architecture code. Options: sm_35, sm_37, sm_60, sm_70, sm_80, gfx906",
     )
 
     args = parser.parse_args()

diff --git a/src/acc/libsmm_acc/predict/predict_genpars.py b/src/acc/libsmm_acc/predict/predict_genpars.py
@@ -18,17 +18,20 @@
 import argparse
 from joblib import Parallel, delayed
 from predict_helpers import safe_pickle_load
+from warnings import simplefilter
+simplefilter(action='ignore', category=UserWarning)
 
 sys.path.append("../")
-from kernels.smm_acc_predict import (  # noqa: E402
+from kernels.smm_acc import to_tuple, to_string  # noqa: E402
+from kernels.smm_acc_predict import (
     gpu_architectures,
     kernel_algorithm,
-    to_string,
-    to_tuple,
     params_dict_to_kernel,
     PredictiveParameters,
 )
 
+# The joblib backend spawns additional processes, which do not inherit the warning filters applied using warnings.filterwarnings
+os.environ['PYTHONWARNINGS']='ignore::UserWarning'
 
 # ===============================================================================
 def main(params, njobs, baseline, paths_to_models, chunk_size):
@@ -137,6 +140,7 @@ def find_optimal_kernel(
         optimal_kernels = dict()
 
     else:
+
         # Get predictor features from raw parameters
         parameter_sets = PredictiveParameters(
             parameter_space, gpu_properties, autotuning_properties, None
@@ -226,16 +230,18 @@ def get_optimal_kernels(
         os.mkdir(ckpt_folder_name)
     print("Caching intermediate results to:", ckpt_folder_name)
 
-    for i in range(0, num_mnks_by_algo + 1, chunk_size):
+    for i in range(0, num_mnks_by_algo, chunk_size):
+
         # Chunk up tasks
         start_chunk = i
-        end_chunk = int(min(start_chunk + chunk_size, num_mnks_by_algo + 1))
-        print("Completed {:,} tasks out of {:,}".format(i, num_mnks_by_algo))
+        end_chunk = int(min(start_chunk + chunk_size, num_mnks_by_algo))
+        print(f"Completed {i} tasks out of {num_mnks_by_algo}")
 
         # Create checkpoint file or load checkpointed data from it
         checkpoint_file_name = os.path.join(
-            ckpt_folder_name, "chunk_{}-{}.json".format(start_chunk, end_chunk)
-        )
+              ckpt_folder_name, f"chunk_{start_chunk}-{end_chunk - 1}.json"
+              )
+
         if os.path.exists(checkpoint_file_name):
             with open(checkpoint_file_name, "r") as f:
                 optimal_kernels_list__ = json.load(f)
@@ -247,14 +253,17 @@ def get_optimal_kernels(
                         optimal_kernels_list_[i][to_tuple(k)] = kernel_algorithm[algo](
                             **v
                         )
-            print("Read chunk {}-{}\n".format(start_chunk, end_chunk))
+            print(f"Read chunk {start_chunk}-{end_chunk - 1}\n")
 
         else:
+
             if njobs == 1:
+                j = i
                 # Ignore joblib and run serially:
                 for mnk, algo in mnks_by_algo:
+                    j += 1
                     gc.collect()
-                    print("Find optimal kernels for mnk=", mnk, ", algo=", algo)
+                    print(f"{j:6d} of {num_mnks_by_algo}: Find optimal kernels for mnk = {mnk} algo = {algo}")
                     optimal_kernels_list_ = find_optimal_kernel(
                         mnk,
                         algo,
@@ -264,6 +273,7 @@ def get_optimal_kernels(
                         autotuning_properties,
                     )
             else:
+
                 # Run prediction tasks in parallel with joblib
                 optimal_kernels_list_ = Parallel(n_jobs=njobs, verbose=2)(
                     delayed(find_optimal_kernel, check_pickle=True)(
@@ -313,6 +323,7 @@ def get_optimal_kernels(
 
 
 def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):
+
     print("Getting baseline kernels")
     baseline_algorithm = "medium"
     baseline_kernels = list()
@@ -340,7 +351,7 @@ def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):
         "-p",
         "--params",
         metavar="parameters_GPU.json",
-        default="../parameters/parameters_P100.json",
+        default="../parameters/parameters_A100.json",
         help="Parameter file to read and update with predictions",
     )
     parser.add_argument(
@@ -380,7 +391,7 @@ def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):
         "-c",
         "--chunk_size",
         type=int,
-        default=2000,
+        default=5000,
         help="Chunk size for dispatching joblib jobs. If memory errors are experienced, reduce this number",
     )
 

diff --git a/src/acc/libsmm_acc/predict/predict_helpers.py b/src/acc/libsmm_acc/predict/predict_helpers.py
@@ -16,7 +16,7 @@
 import matplotlib.pyplot as plt
 
 sys.path.append("../")
-from kernels.smm_acc_predict import to_string  # noqa: E402
+from kernels.smm_acc import to_string  # noqa: E402
 
 
 # ===============================================================================
@@ -116,6 +116,7 @@ def plot_absolute_performance_gain(
         pp.savefig()
     else:
         plt.show()
+    plt.close()
 
 
 def plot_relative_performance_gain(
@@ -144,6 +145,7 @@ def plot_relative_performance_gain(
         pp.savefig()
     else:
         plt.show()
+    plt.close()
 
 
 def plot_performance_gains(
@@ -179,6 +181,7 @@ def plot_performance_gains(
         pp.savefig()
     else:
         plt.show()
+    plt.close()
 
 
 def plot_scaled_performance_gains(
@@ -214,6 +217,7 @@ def plot_scaled_performance_gains(
         pp.savefig()
     else:
         plt.show()
+    plt.close()
 
 
 def plot_choice_goodness(
@@ -228,6 +232,7 @@ def plot_choice_goodness(
     pp,
     scaled=True,
 ):
+
     # Sort in ascending performances
     data_mnk = pd.DataFrame()
     if scaled:
@@ -294,3 +299,4 @@ def plot_choice_goodness(
 
     plt.legend(loc="lower right")
     pp.savefig()
+    plt.close()