Skip to content

Commit

Permalink
Update prediction scripts
Browse files Browse the repository at this point in the history
- Fix imports
- Fix requirement for joblib
- Updates for Python 3.10
- Add sm_80 option for A100
  • Loading branch information
mkrack committed Jun 27, 2023
1 parent cceb3f9 commit 719db05
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 99 deletions.
134 changes: 75 additions & 59 deletions src/acc/libsmm_acc/predict/predict_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import pandas as pd

sys.path.append("../")
from kernels.cusmm_predict import ( # noqa: E402

from kernels.smm_acc import ( # noqa: E402
to_string,
kernel_algorithm,
parameter_types,
) # noqa: E402
parameter_types
)


# ===============================================================================
Expand All @@ -32,6 +33,13 @@ def main(tunedir):
- dump them to CSV files for data analysis and training of a predictive model
"""
# ===============================================================================
# Check for old data files first
for algorithm in kernel_algorithm.keys():
training_data_file = os.path.join(tunedir, f"raw_training_data_{algorithm}.csv")
if os.path.exists(training_data_file):
print(f"WARNING: Found old data file {training_data_file}, re(move) it first ... exiting")
sys.exit(1)

# Find all the 'tune_MxNxK' folders
kernel_folder_pattern = re.compile(r"tune_(\d+)x(\d+)x(\d+)$")
kernel_folders = [
Expand All @@ -46,7 +54,7 @@ def main(tunedir):
+ " in folder "
+ tunedir
)
print("Found {:,} kernel folders".format(n_kernels))
print(f"Found {n_kernels} kernel folders")

# Collect information and write to csv
collect_training_data(kernel_folders, kernel_folder_pattern)
Expand Down Expand Up @@ -79,12 +87,15 @@ def read_log_file(log_folder, m, n, k):
# Parse the log files and collect data
data = list()
for log_file in log_files:
print("Processing log file", log_file)

print(f"Processing log file {log_file}")
with open(os.path.join(log_folder, log_file), "r") as f:
log_file_content = f.read().splitlines()

for line in log_file_content:

if "OK" in line: # this line contains autotuning data

# Parse the line
match = autotuning_line.match(line)
assert match is not None, "Found null match: " + line
Expand All @@ -111,7 +122,7 @@ def read_log_file(log_folder, m, n, k):
}
)

print("Autotuning lines found: ", len(data))
print(f"{len(data)} autotuning lines found")

# Merge dictionaries into a pandas dataframe
dataframe = pd.DataFrame(data)
Expand All @@ -130,7 +141,8 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):
# For each folder:
n_kernels = len(kernel_folders)
for i, kernel_folder in enumerate(kernel_folders):
print("\nProcess folder {} ({}/{:,})".format(kernel_folder, i + 1, n_kernels))

print(f"\nProcess folder {kernel_folder} ({i+1}/{n_kernels})")

# Find (m, n, k)
# Each folder contains data for just one (m, n, k) but potentially mutliple algorithms
Expand All @@ -141,13 +153,20 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):

# ===============================================================================
# Collect info from log files
data = read_log_file(kernel_folder, m, n, k)
log_files = [f for f in os.listdir(kernel_folder) if f[-4:] == ".log"]
if len(log_files) > 0:
data = read_log_file(kernel_folder, m, n, k)
else:
print(f"No log files found in folder {kernel_folder} ... skipping")
continue

# ===============================================================================
# Write parameters to CSV
for name_algo, kernel_algo in kernel_algorithm.items():

# if applicable to this mnk
if name_algo in data["algorithm"].values:

# Does collected csv file exist already?
raw_parameters_file_name = os.path.join(
kernel_folder,
Expand All @@ -159,18 +178,14 @@ def collect_training_data(kernel_folders, kernel_folder_pattern):
)

if os.path.exists(raw_parameters_file_name):
print(
"\tFound csv file:", raw_parameters_file_name, ", skipping ..."
)

print(f"Found csv file {raw_parameters_file_name} ... skipping")
else:
# Get the data corresponding to this algorithm
data_algo = data[data["algorithm"] == name_algo]

# Write raw parameters
pars_to_get = kernel_algo.launch_parameters + ["perf (Gflop/s)"]
data_algo[pars_to_get].to_csv(raw_parameters_file_name, index=False)
print("\tWrote", raw_parameters_file_name)
print("Wrote", raw_parameters_file_name)


# ===============================================================================
Expand All @@ -179,57 +194,58 @@ def merge_data_files(tunedir):
Merge CSV files
"""
for algorithm in kernel_algorithm.keys():

training_data_file = os.path.join(
tunedir, "raw_training_data_{algorithm}.csv".format(algorithm=algorithm)
)

if os.path.exists(training_data_file):
print("\nFound {}, skipping ... ".format(training_data_file))
print(f"\nFound {training_data_file} ... skipping")
os.rename(training_data_file, f"{training_data_file}.bak")

print(f"\nMerging partial CSV files into {training_data_file} ... ")

filenames_pattern = os.path.join(
tunedir,
"tune_*/raw_training_data_*_{algorithm}.csv".format(
algorithm=algorithm
),
)
print("Merging all files with pattern:", filenames_pattern)
filenames = glob.glob(filenames_pattern)
if len(filenames) == 0:
print("Found no files matching this pattern ... skipping")

else:
print("\nMerging partial CSV files into {} ... ".format(training_data_file))

filenames_pattern = os.path.join(
tunedir,
"tune_*/raw_training_data_*_{algorithm}.csv".format(
algorithm=algorithm
),
)
print("Merging all files with pattern:", filenames_pattern)
filenames = glob.glob(filenames_pattern)
if len(filenames) == 0:
print("Found no files matching this pattern, skipping ...")

else:
print("Found {} files matching this pattern".format(len(filenames)))

with open(training_data_file, "w") as out:
# Write the first file, including its header
fn_1 = filenames.pop(0)
with open(fn_1) as f:
header_line_ref = next(f) # read header line
out.write(header_line_ref) # write header line
out.write(f.read()) # write the rest of the file
# Write the rest of the files, skipping the header line each time
for i, fn in enumerate(filenames):
print(
"writing from {} ({}/{})".format(fn, i + 1, len(filenames))
print(f"Found {len(filenames)} files matching this pattern")

with open(training_data_file, "w") as out:
# Write the first file, including its header
fn_1 = filenames.pop(0)
with open(fn_1) as f:
header_line_ref = next(f) # read header line
out.write(header_line_ref) # write header line
out.write(f.read()) # write the rest of the file
# Write the rest of the files, skipping the header line each time
for i, fn in enumerate(filenames):
print(
"writing from {} ({}/{})".format(fn, i + 1, len(filenames))
)
with open(fn) as f:
header_line = next(f) # skip header line
assert header_line == header_line_ref, (
'Cannot merge file "'
+ fn
+ '", because its header line:\n'
+ header_line
+ 'is different from the header line of file "'
+ fn_1
+ '":\n'
+ header_line_ref
)
with open(fn) as f:
header_line = next(f) # skip header line
assert header_line == header_line_ref, (
'Cannot merge file "'
+ fn
+ '", because its header line:\n'
+ header_line
+ 'is different from the header line of file "'
+ fn_1
+ '":\n'
+ header_line_ref
)
out.write(f.read())

print("Wrote to {}".format(training_data_file))
out.write(f.read())

print("Wrote to {}".format(training_data_file))


# ===============================================================================
Expand Down Expand Up @@ -257,8 +273,8 @@ def merge_data_files(tunedir):
"--arch",
metavar="ARCHITECTURE_NUMBER",
type=int,
default=60,
help="GPU architecture code. Options: sm_35, sm_37, sm_60, sm_70, gfx906",
default=80,
help="GPU architecture code. Options: sm_35, sm_37, sm_60, sm_70, sm_80, gfx906",
)

args = parser.parse_args()
Expand Down
35 changes: 23 additions & 12 deletions src/acc/libsmm_acc/predict/predict_genpars.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@
import argparse
from joblib import Parallel, delayed
from predict_helpers import safe_pickle_load
from warnings import simplefilter
simplefilter(action='ignore', category=UserWarning)

sys.path.append("../")
from kernels.smm_acc_predict import ( # noqa: E402
from kernels.smm_acc import to_tuple, to_string # noqa: E402
from kernels.smm_acc_predict import (
gpu_architectures,
kernel_algorithm,
to_string,
to_tuple,
params_dict_to_kernel,
PredictiveParameters,
)

# The joblib backend spawns additional processes, which do not inherit the warning filters applied using warnings.filterwarnings
os.environ['PYTHONWARNINGS']='ignore::UserWarning'

# ===============================================================================
def main(params, njobs, baseline, paths_to_models, chunk_size):
Expand Down Expand Up @@ -137,6 +140,7 @@ def find_optimal_kernel(
optimal_kernels = dict()

else:

# Get predictor features from raw parameters
parameter_sets = PredictiveParameters(
parameter_space, gpu_properties, autotuning_properties, None
Expand Down Expand Up @@ -226,16 +230,18 @@ def get_optimal_kernels(
os.mkdir(ckpt_folder_name)
print("Caching intermediate results to:", ckpt_folder_name)

for i in range(0, num_mnks_by_algo + 1, chunk_size):
for i in range(0, num_mnks_by_algo, chunk_size):

# Chunk up tasks
start_chunk = i
end_chunk = int(min(start_chunk + chunk_size, num_mnks_by_algo + 1))
print("Completed {:,} tasks out of {:,}".format(i, num_mnks_by_algo))
end_chunk = int(min(start_chunk + chunk_size, num_mnks_by_algo))
print(f"Completed {i} tasks out of {num_mnks_by_algo}")

# Create checkpoint file or load checkpointed data from it
checkpoint_file_name = os.path.join(
ckpt_folder_name, "chunk_{}-{}.json".format(start_chunk, end_chunk)
)
ckpt_folder_name, f"chunk_{start_chunk}-{end_chunk - 1}.json"
)

if os.path.exists(checkpoint_file_name):
with open(checkpoint_file_name, "r") as f:
optimal_kernels_list__ = json.load(f)
Expand All @@ -247,14 +253,17 @@ def get_optimal_kernels(
optimal_kernels_list_[i][to_tuple(k)] = kernel_algorithm[algo](
**v
)
print("Read chunk {}-{}\n".format(start_chunk, end_chunk))
print(f"Read chunk {start_chunk}-{end_chunk - 1}\n")

else:

if njobs == 1:
j = i
# Ignore joblib and run serially:
for mnk, algo in mnks_by_algo:
j += 1
gc.collect()
print("Find optimal kernels for mnk=", mnk, ", algo=", algo)
print(f"{j:6d} of {num_mnks_by_algo}: Find optimal kernels for mnk = {mnk} algo = {algo}")
optimal_kernels_list_ = find_optimal_kernel(
mnk,
algo,
Expand All @@ -264,6 +273,7 @@ def get_optimal_kernels(
autotuning_properties,
)
else:

# Run prediction tasks in parallel with joblib
optimal_kernels_list_ = Parallel(n_jobs=njobs, verbose=2)(
delayed(find_optimal_kernel, check_pickle=True)(
Expand Down Expand Up @@ -313,6 +323,7 @@ def get_optimal_kernels(


def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):

print("Getting baseline kernels")
baseline_algorithm = "medium"
baseline_kernels = list()
Expand Down Expand Up @@ -340,7 +351,7 @@ def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):
"-p",
"--params",
metavar="parameters_GPU.json",
default="../parameters/parameters_P100.json",
default="../parameters/parameters_A100.json",
help="Parameter file to read and update with predictions",
)
parser.add_argument(
Expand Down Expand Up @@ -380,7 +391,7 @@ def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties):
"-c",
"--chunk_size",
type=int,
default=2000,
default=5000,
help="Chunk size for dispatching joblib jobs. If memory errors are experienced, reduce this number",
)

Expand Down
8 changes: 7 additions & 1 deletion src/acc/libsmm_acc/predict/predict_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import matplotlib.pyplot as plt

sys.path.append("../")
from kernels.smm_acc_predict import to_string # noqa: E402
from kernels.smm_acc import to_string # noqa: E402


# ===============================================================================
Expand Down Expand Up @@ -116,6 +116,7 @@ def plot_absolute_performance_gain(
pp.savefig()
else:
plt.show()
plt.close()


def plot_relative_performance_gain(
Expand Down Expand Up @@ -144,6 +145,7 @@ def plot_relative_performance_gain(
pp.savefig()
else:
plt.show()
plt.close()


def plot_performance_gains(
Expand Down Expand Up @@ -179,6 +181,7 @@ def plot_performance_gains(
pp.savefig()
else:
plt.show()
plt.close()


def plot_scaled_performance_gains(
Expand Down Expand Up @@ -214,6 +217,7 @@ def plot_scaled_performance_gains(
pp.savefig()
else:
plt.show()
plt.close()


def plot_choice_goodness(
Expand All @@ -228,6 +232,7 @@ def plot_choice_goodness(
pp,
scaled=True,
):

# Sort in ascending performances
data_mnk = pd.DataFrame()
if scaled:
Expand Down Expand Up @@ -294,3 +299,4 @@ def plot_choice_goodness(

plt.legend(loc="lower right")
pp.savefig()
plt.close()
Loading

0 comments on commit 719db05

Please sign in to comment.