Skip to content

Commit

Permalink
Updating rename benchmark script to make calls to vm extraction script (
Browse files Browse the repository at this point in the history
#2448)

* added check directory functions and its unit tests

* adding function to parse config file and generate dir in bucket

* exit_code set to 1

* correct format

* refactors

* test formatting

* function to avoid code repitition

* added check directory functions and its unit tests

* moving mount functions to utils

* compute metrics from time of operation

* uploading metrics to gsheet

* nits

* added logic to accumulate nested rename folder metrics

* getting VM metrics relevant to rename operation

* extracting vm metrics for rename operation

* correcting upload values for vm metrics

* added unit test and function description

* updating mount flag for flat bucket

* splitting upload function

* updating mount flags for flat bucket mounting
  • Loading branch information
anushka567 authored Sep 9, 2024
1 parent 9fe2602 commit bd70467
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 28 deletions.
111 changes: 98 additions & 13 deletions perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# where dir-config.json file contains the directory structure details for the test.

import os
import socket
import sys
import argparse
import logging
Expand All @@ -32,10 +33,15 @@
from utils.mount_unmount_util import mount_gcs_bucket, unmount_gcs_bucket
from utils.checks_util import check_dependencies
from gsheet import gsheet
from vm_metrics import vm_metrics

WORKSHEET_NAME_FLAT = 'rename_metrics_flat'
WORKSHEET_NAME_HNS = 'rename_metrics_hns'
WORKSHEET_VM_METRICS_FLAT = 'vm_metrics_flat'
WORKSHEET_VM_METRICS_HNS = 'vm_metrics_hns'
SPREADSHEET_ID = '1UVEvsf49eaDJdTGLQU1rlNTIAxg8PZoNQCy_GX6Nw-A'
INSTANCE=socket.gethostname()
PERIOD_SEC=120

logging.basicConfig(
level=logging.INFO,
Expand Down Expand Up @@ -244,6 +250,7 @@ def _record_time_for_folder_rename(parent_dir,folder,num_samples):
"""
folder_name= '{}/{}'.format(parent_dir,folder["name"])
folder_rename = folder_name+"_renamed"
time_intervals_list=[]
time_op = []
for iter in range(num_samples):
# For the even iterations, we rename from folder_name to folder_name_renamed.
Expand All @@ -257,6 +264,7 @@ def _record_time_for_folder_rename(parent_dir,folder,num_samples):
start_time_sec = time.time()
subprocess.call('mv ./{} ./{}'.format(rename_from, rename_to), shell=True)
end_time_sec = time.time()
time_intervals_list.append([start_time_sec,end_time_sec])
time_op.append(end_time_sec - start_time_sec)

# If the number of samples is odd, we need another unrecorded rename operation
Expand All @@ -266,7 +274,7 @@ def _record_time_for_folder_rename(parent_dir,folder,num_samples):
rename_to = folder_name
subprocess.call('mv ./{} ./{}'.format(rename_from, rename_to), shell=True)

return time_op
return time_op,time_intervals_list


def _record_time_of_operation(mount_point, dir, num_samples):
Expand All @@ -284,15 +292,18 @@ def _record_time_of_operation(mount_point, dir, num_samples):
corresponding to each folder.
"""
results = dict()
time_interval_for_vm_metrics={}
# Collecting metrics for non-nested folders.
for folder in dir["folders"]["folder_structure"]:
results[folder["name"]] = _record_time_for_folder_rename(mount_point,folder,num_samples)
results[folder["name"]],time_interval = _record_time_for_folder_rename(mount_point,folder,num_samples)
time_interval_for_vm_metrics[folder["name"]]=[time_interval[0][0],time_interval[-1][-1]]

nested_folder={
"name": dir["nested_folders"]["folder_name"]
}
results[dir["nested_folders"]["folder_name"]] = _record_time_for_folder_rename(mount_point,nested_folder,num_samples)
return results
results[dir["nested_folders"]["folder_name"]],time_interval = _record_time_for_folder_rename(mount_point,nested_folder,num_samples)
time_interval_for_vm_metrics[dir["nested_folders"]["folder_name"]]=[time_interval[0][0],time_interval[-1][-1]]
return results,time_interval_for_vm_metrics


def _perform_testing(dir, test_type, num_samples):
Expand Down Expand Up @@ -333,18 +344,21 @@ def _perform_testing(dir, test_type, num_samples):
# Creating config file for mounting with hns enabled.
with open("/tmp/config.yml",'w') as mount_config:
mount_config.write("enable-hns: true")
mount_flags="--config-file=/tmp/config.yml"
mount_flags="--config-file=/tmp/config.yml --stackdriver-export-interval=30s"
else :
mount_flags = "--implicit-dirs --rename-dir-limit=1000000"
# Creating config file for mounting with hns disabled.
with open("/tmp/config.yml",'w') as mount_config:
mount_config.write("enable-hns: false")
mount_flags = "--config-file=/tmp/config.yml --implicit-dirs --rename-dir-limit=1000000 --stackdriver-export-interval=30s"

# Mounting the gcs bucket.
bucket_name = mount_gcs_bucket(dir["name"], mount_flags, log)
# Record time of operation and populate the results dict.
results = _record_time_of_operation(bucket_name, dir, num_samples)
results,time_intervals = _record_time_of_operation(bucket_name, dir, num_samples)
# Unmounting the bucket.
unmount_gcs_bucket(dir["name"], log)

return results
return results,time_intervals


def _parse_arguments(argv):
Expand Down Expand Up @@ -381,6 +395,54 @@ def _parse_arguments(argv):
return parser.parse_args(argv[1:])


def _extract_vm_metrics(time_intervals_list,folders_list):
"""
Function to extract the VM metrics given the timestamps from the rename operations.
Args:
time_intervals_list : List of time intervals for each folder on which rename
operation is performed, each interval contains [ start time for the first
sample , end time of the last sample]
folders_list : List of names of folders on which the rename operation is done.
Returns:
vm_metrics_data : Dictionary of VM metrics. For examples:
{
'folder_name': [CPU_UTI_PEAK, CPU_UTI_MEAN,REC_BYTES_PEAK, REC_BYTES_MEAN,
SENT_BYTES_PEAK,SENT_BYTES_MEAN,OPS_ERROR_COUNT,MEMORY_USAGE_PEAK,
MEMORY_USAGE_MEAN,LOAD_AVG_OS_THREADS_MEAN]
}
"""
vm_metrics_obj = vm_metrics.VmMetrics()
vm_metrics_data = {}

for folder in folders_list:
start_time = time_intervals_list[folder][0]
end_time = time_intervals_list[folder][1]
vm_metrics_data[folder] = vm_metrics_obj.fetch_metrics(start_time,
end_time,
INSTANCE,
PERIOD_SEC,
'rename')[0]

return vm_metrics_data


def _get_upload_value_for_vm_metrics(vm_metrics):
"""
Function to take input of dictionary of Vm metrics and returns a list of values
which can be uploaded to google sheet.
Args:
vm_metrics: Dictionary of Vm metrics corresponding to each folder renamed.
Returns:
upload_values: List of values to upload .For example:
['folder_name',CPU_UTI_PEAK, CPU_UTI_MEAN,REC_BYTES_PEAK,...etc.]
"""
upload_values = []
for key, values in vm_metrics.items():
row = [key] + values
upload_values.append(row)
return upload_values


def _run_rename_benchmark(test_type,dir_config,num_samples,upload_gs):
with open(os.path.abspath(dir_config)) as file:
dir_str = json.load(file)
Expand All @@ -397,24 +459,47 @@ def _run_rename_benchmark(test_type,dir_config,num_samples,upload_gs):
python3 generate_folders_and_files.py {} ".format(dir_config))
sys.exit(1)

results=_perform_testing(dir_str, test_type, num_samples)
# Getting latency related metrics
results,time_intervals=_perform_testing(dir_str, test_type, num_samples)
parsed_metrics = _parse_results(dir_str, results, num_samples)
upload_values = _get_values_to_export(dir_str, parsed_metrics,
test_type)

print('Waiting for 360 seconds for metrics to be updated on VM...')
# It takes up to 240 seconds for sampled data to be visible on the VM metrics graph
# So, waiting for 360 seconds to ensure the returned metrics are not empty.
# Intermittently custom metrics are not available after 240 seconds, hence
# waiting for 360 secs instead of 240 secs
time.sleep(360)

# Getting VM related metrics
folders_list=[]
for folder in dir_str["folders"]["folder_structure"]:
folders_list.append(folder["name"])
folders_list.append(dir_str["nested_folders"]["folder_name"])

vm_metrics_data= _extract_vm_metrics(time_intervals,folders_list)
upload_values_vm_metrics= _get_upload_value_for_vm_metrics(vm_metrics_data)

if upload_gs:
log.info('Uploading files to the Google Sheet\n')
if test_type == "flat":
worksheet= WORKSHEET_NAME_FLAT
vm_worksheet= WORKSHEET_VM_METRICS_FLAT
else:
worksheet= WORKSHEET_NAME_HNS
vm_worksheet= WORKSHEET_VM_METRICS_HNS

exit_code = _upload_to_gsheet(worksheet, upload_values, SPREADSHEET_ID)
if exit_code != 0 :
log.error("Upload to gsheet failed!")

exit_code = _upload_to_gsheet(worksheet, upload_values,
SPREADSHEET_ID)
if exit_code != 0:
exit_code = _upload_to_gsheet(vm_worksheet, upload_values_vm_metrics, SPREADSHEET_ID)
if exit_code != 0 :
log.error("Upload to gsheet failed!")
else:
print(upload_values)
print('Latency related metrics: {}'.format(upload_values))
print('VM metrics: {}'.format(upload_values_vm_metrics))


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ def test_record_time_for_folder_rename(self,mock_time,mock_subprocess):
num_samples=2
mock_time.side_effect = [1.0, 2.0, 3.0, 4.0]
expected_time_of_operation=[1.0,1.0]
expected_time_intervals = [[1.0,2.0],[3.0,4.0]]
expected_subprocess_calls=[call("mv ./gcs_bucket/test_folder ./gcs_bucket/test_folder_renamed",shell=True),
call("mv ./gcs_bucket/test_folder_renamed ./gcs_bucket/test_folder",shell=True)]

time_op=renaming_benchmark._record_time_for_folder_rename(mount_point,folder,num_samples)
time_op,time_intervals=renaming_benchmark._record_time_for_folder_rename(mount_point,folder,num_samples)

self.assertEqual(time_op,expected_time_of_operation)
self.assertEqual(time_intervals,expected_time_intervals)
mock_subprocess.assert_has_calls(expected_subprocess_calls)

@patch('subprocess.call')
Expand Down Expand Up @@ -98,14 +100,16 @@ def test_record_time_of_operation(self,mock_time,mock_subprocess):
num_samples=2
mock_time.side_effect = [1.0, 2.0, 3.0, 4.0,1.0, 2.0, 3.0, 4.0]
expected_time_of_operation={'test_folder1':[1.0,1.0] ,'nested_folder':[1.0,1.0]}
expected_time_interval={'test_folder1':[1.0,4.0] ,'nested_folder':[1.0,4.0]}
expected_subprocess_calls=[call("mv ./gcs_bucket/test_folder1 ./gcs_bucket/test_folder1_renamed",shell=True),
call("mv ./gcs_bucket/test_folder1_renamed ./gcs_bucket/test_folder1",shell=True),
call("mv ./gcs_bucket/nested_folder ./gcs_bucket/nested_folder_renamed",shell=True),
call("mv ./gcs_bucket/nested_folder_renamed ./gcs_bucket/nested_folder",shell=True),]

time_op=renaming_benchmark._record_time_of_operation(mount_point,dir,num_samples)
time_op,time_interval=renaming_benchmark._record_time_of_operation(mount_point,dir,num_samples)

self.assertEqual(time_op,expected_time_of_operation)
self.assertEqual(time_interval,expected_time_interval)
mock_subprocess.assert_has_calls(expected_subprocess_calls)

@patch('renaming_benchmark.unmount_gcs_bucket')
Expand All @@ -129,14 +133,16 @@ def test_perform_testing_flat(self, mock_log, mock_record_time_of_operation,
test_type = "flat"
num_samples = 4
results = {}
mount_flags = "--implicit-dirs --rename-dir-limit=1000000"
mount_flags = "--config-file=/tmp/config.yml --implicit-dirs --rename-dir-limit=1000000 --stackdriver-export-interval=30s"
mock_mount_gcs_bucket.return_value="flat_bucket"
mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
mock_record_time_of_operation.return_value = [{"test_folder": [0.1, 0.2, 0.3, 0.4]},[[0.1,0.4]]]
expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
expected_time_intervals=[[0.1,0.4]]

results= renaming_benchmark._perform_testing(dir, test_type, num_samples)
results,time_intervals= renaming_benchmark._perform_testing(dir, test_type, num_samples)

self.assertEqual(results, expected_results)
self.assertEqual(time_intervals,expected_time_intervals)
# Verify calls to other functions.
mock_mount_gcs_bucket.assert_called_once_with(dir["name"], mount_flags, mock_log)
mock_record_time_of_operation.assert_called_once_with(mock_mount_gcs_bucket.return_value, dir, num_samples)
Expand Down Expand Up @@ -164,14 +170,16 @@ def test_perform_testing_hns(self, mock_log, mock_record_time_of_operation,
test_type = "hns"
num_samples = 4
results = {}
mount_flags = "--config-file=/tmp/config.yml"
mount_flags = "--config-file=/tmp/config.yml --stackdriver-export-interval=30s"
mock_mount_gcs_bucket.return_value="hns_bucket"
mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
mock_record_time_of_operation.return_value = [{"test_folder": [0.1, 0.2, 0.3, 0.4]},[[0.1,0.4]]]
expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]}
expected_time_intervals=[[0.1,0.4]]

results= renaming_benchmark._perform_testing(dir, test_type, num_samples)
results,time_intervals= renaming_benchmark._perform_testing(dir, test_type, num_samples)

self.assertEqual(results, expected_results)
self.assertEqual(time_intervals,expected_time_intervals)
# Verify calls to other functions.
mock_mount_gcs_bucket.assert_called_once_with(dir["name"], mount_flags, mock_log)
mock_record_time_of_operation.assert_called_once_with(mock_mount_gcs_bucket.return_value, dir, num_samples)
Expand Down Expand Up @@ -339,37 +347,89 @@ def test_run_rename_benchmark_error_dir_does_not_exist(self,mock_json,mock_check
mock_log.error.assert_called_once_with("Test data does not exist.To create test data, run : \
python3 generate_folders_and_files.py {} ".format(dir_config))

@patch('renaming_benchmark._extract_vm_metrics')
@patch('time.sleep')
@patch('renaming_benchmark.SPREADSHEET_ID','temp-gsheet-id')
@patch('renaming_benchmark.WORKSHEET_NAME_FLAT','flat-sheet')
@patch('renaming_benchmark.WORKSHEET_VM_METRICS_FLAT','vm-sheet')
@patch('builtins.open', new_callable=mock_open)
@patch('renaming_benchmark.log')
@patch('renaming_benchmark._check_for_config_file_inconsistency')
@patch('renaming_benchmark._check_if_dir_structure_exists')
@patch('renaming_benchmark._perform_testing')
@patch('renaming_benchmark._parse_results')
@patch('renaming_benchmark._get_values_to_export')
@patch('renaming_benchmark._upload_to_gsheet')
@patch('renaming_benchmark.json.load')
def test_run_rename_benchmark_upload_true(self,mock_json,mock_upload,mock_get_values,mock_parse_results,mock_perform_testing,mock_check_dir_exists,mock_inconsistency,mock_log,mock_open):
def test_run_rename_benchmark_upload_true(self,mock_json,mock_upload,
mock_get_values,mock_perform_testing,mock_check_dir_exists,
mock_inconsistency,mock_log,mock_open,mock_time_sleep,mock_extract_vm_metrics):
test_type="flat"
dir_config="test-config.json"
num_samples=10
num_samples=3
results={'flat':''}
upload_gs=True
worksheet= 'flat-sheet'
vm_worksheet= 'vm-sheet'
spreadsheet_id='temp-gsheet-id'
mock_inconsistency.return_value=0
mock_check_dir_exists.return_value=True
mock_parse_results.return_value={'key':'val'}
mock_perform_testing.return_value=[
{
'test_folder1_0':[1.0,1.0,1.0],
'nested_folder':[1.0,1.0,1.0]
},
{
'test_folder1_0':[0.1,0.4],
'nested_folder':[0.1,0.4]
}
]
mock_get_values.return_value=[['testdata','testdata2']]
mock_upload.return_value=0
mock_json.return_value={}

mock_json.return_value={
"name": "gcs_bucket",
"folders": {
"folder_structure": [
{
'name': "test_folder1_0",
"num_files": 1,
"file_name_prefix": "file",
"file_size": "1kb"
}
]
},
"nested_folders": {
"folder_name": "nested_folder",
"num_folders":1,
"folder_structure": [
{
'name': "test_nfolder1",
"num_files": 1,
"file_name_prefix": "file",
"file_size": "1kb"
}
]
}
}
mock_extract_vm_metrics.return_value={'test key':['some vm metrics']}
expected_upload_calls= [call(worksheet,[['testdata','testdata2']],spreadsheet_id),
call(vm_worksheet,[['test key','some vm metrics']],spreadsheet_id)]

renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs)

mock_log.info.assert_called_with('Uploading files to the Google Sheet\n')
mock_upload.assert_called_with(worksheet,[['testdata','testdata2']],spreadsheet_id)
mock_upload.assert_has_calls(expected_upload_calls)


def test_get_upload_value_for_vm_metrics(self):
vm_metrics = {
'test_folder1': [1,2,3],
'test_folder2': [1,2,3]
}
expected_values= [['test_folder1',1,2,3],['test_folder2',1,2,3]]

upload_values = renaming_benchmark._get_upload_value_for_vm_metrics(vm_metrics)

self.assertEqual(upload_values,expected_values)


if __name__ == '__main__':
Expand Down

0 comments on commit bd70467

Please sign in to comment.