python file changes

GoogleCloudPlatform · Oct 18, 2023 · 59a376c · 59a376c
1 parent ebb0d61
commit 59a376c
Show file tree

Hide file tree

Showing 9 changed files with 414 additions and 179 deletions.
diff --git a/perfmetrics/scripts/continuous_test/gcp_ubuntu/build.sh b/perfmetrics/scripts/continuous_test/gcp_ubuntu/build.sh
@@ -30,6 +30,9 @@ chmod +x perfmetrics/scripts/build_and_install_gcsfuse.sh
 # Mounting gcs bucket
 cd "./perfmetrics/scripts/"
 
+echo Installing requirements..
+pip install --require-hashes -r bigquery/requirements.txt --user
+
 # Upload data to the gsheet only when it runs through kokoro.
 UPLOAD_FLAGS=""
 if [ "${KOKORO_JOB_TYPE}" == "RELEASE" ] || [ "${KOKORO_JOB_TYPE}" == "CONTINUOUS_INTEGRATION" ] || [ "${KOKORO_JOB_TYPE}" == "PRESUBMIT_GITHUB" ];
@@ -46,7 +49,7 @@ chmod +x run_load_test_and_fetch_metrics.sh
 ./run_load_test_and_fetch_metrics.sh "$GCSFUSE_FIO_FLAGS" "$UPLOAD_FLAGS"
 
 # ls_metrics test. This test does gcsfuse mount with the passed flags first and then does the testing.
-LOG_FILE_LIST_TESTS=gcsfuse-list-logs.txt
+LOG_FILE_LIST_TESTS=${KOKORO_ARTIFACTS_DIR}/gcsfuse-list-logs.txt
 GCSFUSE_LIST_FLAGS="$GCSFUSE_FLAGS --log-file $LOG_FILE_LIST_TESTS"
 cd "./ls_metrics"
 chmod +x run_ls_benchmark.sh

diff --git a/perfmetrics/scripts/fetch_metrics.py b/perfmetrics/scripts/fetch_metrics.py
@@ -1,3 +1,17 @@
+# Copyright 2023 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Executes fio_metrics.py and vm_metrics.py by passing appropriate arguments.
 """
 import socket
@@ -7,6 +21,8 @@
 from fio import fio_metrics
 from vm_metrics import vm_metrics
 from gsheet import gsheet
+from bigquery import constants
+from bigquery import experiments_gcsfuse_bq
 
 INSTANCE = socket.gethostname()
 PERIOD_SEC = 120
@@ -40,6 +56,27 @@ def _parse_arguments(argv):
       default=False,
       required=False,
   )
+  parser.add_argument(
+      '--upload_bq',
+      help='Upload the results to the BigQuery.',
+      action='store_true',
+      default=False,
+      required=False,
+  )
+  parser.add_argument(
+      '--config_id',
+      help='Configuration ID of the experiment.',
+      action='store',
+      nargs=1,
+      required=False,
+  )
+  parser.add_argument(
+      '--start_time_build',
+      help='Start time of the build.',
+      action='store',
+      nargs=1,
+      required=False,
+  )
   return parser.parse_args(argv[1:])
 
 
@@ -51,15 +88,23 @@ def _parse_arguments(argv):
 
   args = _parse_arguments(argv)
 
+  temp = fio_metrics_obj.get_metrics(args.fio_json_output_path)
+  metrics_data = fio_metrics_obj.get_values_to_upload(temp)
+
   if args.upload_gs:
     temp = fio_metrics_obj.get_metrics(args.fio_json_output_path, FIO_WORKSHEET_NAME)
-  else:
-    temp = fio_metrics_obj.get_metrics(args.fio_json_output_path)
+
+  if args.upload_bq:
+    if not args.config_id or not args.start_time_build:
+      raise Exception("Pass required arguments experiments configuration ID and start time of build for uploading to BigQuery")
+    bigquery_obj = experiments_gcsfuse_bq.ExperimentsGCSFuseBQ(constants.PROJECT_ID, constants.DATASET_ID)
+    fio_metrics_obj.upload_metrics_to_bigquery(metrics_data, args.config_id[0], args.start_time_build[0], constants.FIO_TABLE_ID)
+
 
   print('Waiting for 360 seconds for metrics to be updated on VM...')
   # It takes up to 240 seconds for sampled data to be visible on the VM metrics graph
   # So, waiting for 360 seconds to ensure the returned metrics are not empty.
-  # Intermittenly custom metrics are not available after 240 seconds, hence
+  # Intermittently custom metrics are not available after 240 seconds, hence
   # waiting for 360 secs instead of 240 secs
   time.sleep(360)
 
@@ -96,3 +141,9 @@ def _parse_arguments(argv):
 
   if args.upload_gs:
     gsheet.write_to_google_sheet(VM_WORKSHEET_NAME, vm_metrics_data)
+
+  if args.upload_bq:
+    if not args.config_id or not args.start_time_build:
+      raise Exception("Pass required arguments experiments configuration ID and start time of build for uploading to BigQuery")
+    bigquery_obj = experiments_gcsfuse_bq.ExperimentsGCSFuseBQ(constants.PROJECT_ID, constants.DATASET_ID)
+    bigquery_obj.upload_metrics_to_table(constants.VM_TABLE_ID, args.config_id[0], args.start_time_build[0], vm_metrics_data)
diff --git a/perfmetrics/scripts/fio/fio_metrics.py b/perfmetrics/scripts/fio/fio_metrics.py
@@ -1,3 +1,17 @@
+# Copyright 2023 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Extracts required metrics from fio output file and writes to google sheet.
 
    Takes fio output json filepath as command-line input
@@ -25,7 +39,7 @@ class JobParam:
 
   name: Can be any suitable value, it refers to the output dictionary key for
   the parameter. To be used when creating parameter dict for each job.
-  json_name: Must match the FIO job specification key. Key for parameter inside 
+  json_name: Must match the FIO job specification key. Key for parameter inside
   'global options'/'job options' dictionary
     Ex: For output json = {"global options": {"filesize":"50M"}, "jobs": [
     "job options": {"rw": "read"}]}
@@ -48,7 +62,7 @@ class JobParam:
 class JobMetric:
   """Dataclass for a FIO job metric.
 
-  name: Can be any suitable value, it is used as key for the metric 
+  name: Can be any suitable value, it is used as key for the metric
   when creating metric dict for each job
   levels: Keys for the metric inside 'read'/'write' dictionary in each job.
   Each value in the list must match the key in the FIO output JSON
@@ -241,7 +255,7 @@ def _get_start_end_times(self, out_json, job_params) -> List[Tuple[int]]:
 
       # for multiple jobs, end time of one job = start time of next job
       end_time_ms = next_end_time_ms if next_end_time_ms > 0 else out_json[
-          consts.TIMESTAMP_MS]
+        consts.TIMESTAMP_MS]
       # job start time = job end time - job runtime - ramp time
       start_time_ms = end_time_ms - job_rw[consts.RUNTIME] - ramptime_ms
       next_end_time_ms = start_time_ms - startdelay_ms
@@ -412,12 +426,13 @@ def _extract_metrics(self, fio_out) -> List[Dict[str, Any]]:
 
     return all_jobs
 
-  def _add_to_gsheet(self, jobs, worksheet_name):
-    """Add the metric values to respective columns in a google sheet.
+  def get_values_to_upload(self, jobs):
+    """Get the metrics values in a list to export to Google Spreadsheet and BigQuery.
 
     Args:
-      jobs: list of dicts, contains required metrics for each job
-      worksheet_name: str, worksheet where job metrics should be written.
+      jobs: List of dicts, contains required metrics for each job
+    Returns:
+      list: A 2-d list consisting of metrics values for each job
     """
 
     values = []
@@ -431,31 +446,40 @@ def _add_to_gsheet(self, jobs, worksheet_name):
       for metric_val in job[consts.METRICS].values():
         row.append(metric_val)
       values.append(row)
+    return values
 
-    gsheet.write_to_google_sheet(worksheet_name, values)
-
-  def get_metrics(self,
-                  filepath,
-                  worksheet_name=None) -> List[Dict[str, Any]]:
-    """Returns job metrics obtained from given filepath and writes to gsheets.
+  def get_metrics(self, filepath) -> List[Dict[str, Any]]:
+    """Returns job metrics obtained from given filepath.
 
     Args:
-      filepath : str
-        Path of the json file to be parsed
-      worksheet_name: str, optional, default:None
-        Worksheet where job metrics should be written.
-        Pass '' or None to skip writing to Google sheets
+      filepath (str): Path of the json file to be parsed
 
     Returns:
       List of dicts, contains list of jobs and required metrics for each job
     """
     fio_out = self._load_file_dict(filepath)
     job_metrics = self._extract_metrics(fio_out)
-    if worksheet_name:
-      self._add_to_gsheet(job_metrics, worksheet_name)
-
     return job_metrics
 
+  def upload_metrics_to_gsheet(self, metrics_data, worksheet_name):
+    """Uploads metrics data for load tests to Google Spreadsheets
+    Args:
+      metrics_data (list): List of metric values for each job
+      worksheet_name (str): Name of Google sheet to which metrics data will be uploaded
+    """
+    gsheet.write_to_google_sheet(worksheet_name, metrics_data)
+
+  def upload_metrics_to_bigquery(self, metrics_data, config_id, start_time_build, table_id_bq):
+    """Uploads metrics data for load tests to Google Spreadsheets
+    Args:
+      metrics_data (list): List of metric values for each job
+      config_id (str): configuration ID of the experiment
+      start_time_build (int): Start time of the build
+      table_id_bq (str): ID of table in BigQuery to which metrics data will be uploaded
+    """
+    bigquery_obj = experiments_gcsfuse_bq.ExperimentsGCSFuseBQ(constants.PROJECT_ID, constants.DATASET_ID)
+    bigquery_obj.upload_metrics_to_table(table_id_bq, config_id, start_time_build, metrics_data)
+
 if __name__ == '__main__':
   argv = sys.argv
   if len(argv) != 2:
@@ -466,4 +490,3 @@ def get_metrics(self,
   fio_metrics_obj = FioMetrics()
   temp = fio_metrics_obj.get_metrics(argv[1], 'fio_metrics_expt')
   print(temp)
-