From 0fae3073771bf79b94abfed4009c424204db6b13 Mon Sep 17 00:00:00 2001 From: Nitin Garg <113666283+gargnitingoogle@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:45:48 +0530 Subject: [PATCH] [testing-on-gke] Support per-workload gcsfuse-mount-options (#2348) This adds the following changes. * Now, gcsfuse_mount_options is no longer taken on the run-gke-tests.sh script, and is instead taken per workload in the ${workload_config} file. * helm chart names and pod names have been shortened by using a hash of the workload rather than the long names earlier used which encapsulated the workload parameters such as blockSize/numThreads/batchSize etc. It still keeps the name of the scenario and file-size in the pod/chart name for some information for easy spotting though. * Adds validation checks and unit tests for dlio_workload and fio_workload. List of intermediate squashed commits: * Support per-workload gcsfuse mount options * add default gcsfuseMountOptions in workloads config * add/update copyright headers * remove default gcsfuse_mount_options from run-script --- .../examples/dlio/dlio_workload.py | 73 +++++++++-- .../examples/dlio/dlio_workload_test.py | 101 +++++++++++++++- .../examples/dlio/parse_logs.py | 13 +- .../testing_on_gke/examples/dlio/run_tests.py | 54 +++------ .../templates/dlio-tester.yaml | 6 +- .../dlio/unet3d-loading-test/values.yaml | 2 + .../examples/fio/fio_workload.py | 82 +++++++++++-- .../examples/fio/fio_workload_test.py | 113 +++++++++++++++++- .../loading-test/templates/fio-tester.yaml | 7 +- .../examples/fio/loading-test/values.yaml | 2 + .../testing_on_gke/examples/fio/parse_logs.py | 18 +-- .../testing_on_gke/examples/fio/run_tests.py | 54 +++------ .../testing_on_gke/examples/run-gke-tests.sh | 15 +-- .../testing_on_gke/examples/utils/utils.py | 2 +- .../testing_on_gke/examples/workloads.json | 13 +- 15 files changed, 420 insertions(+), 135 deletions(-) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py index 68accd78af..25873d72c9 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py @@ -1,3 +1,18 @@ +# Copyright 2018 The Kubernetes Authors. +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file defines a DlioWorkload (a DLIO Unet3d workload) and provides utility for parsing a json test-config file for a list of them. @@ -8,13 +23,23 @@ def validateDlioWorkload(workload: dict, name: str): """Validates the given json workload object.""" - if 'dlioWorkload' not in workload: - print(f"{name} does not have 'dlioWorkload' key in it.") - return False - - if 'bucket' not in workload: - print(f"{name} does not have 'bucket' key in it.") - return False + for requiredWorkloadAttribute, expectedType in { + 'bucket': str, + 'gcsfuseMountOptions': str, + 'dlioWorkload': dict, + }.items(): + if requiredWorkloadAttribute not in workload: + print(f"{name} does not have '{requiredWorkloadAttribute}' key in it.") + return False + if not type(workload[requiredWorkloadAttribute]) is expectedType: + print( + f"In {name}, the type of '{requiredWorkloadAttribute}' is of type" + f" '{type(workload[requiredWorkloadAttribute])}', not {expectedType}" + ) + return False + if expectedType == str and ' ' in workload[requiredWorkloadAttribute]: + print(f"{name} has space in the value of '{requiredWorkloadAttribute}'") + return False if 'fioWorkload' in workload: print(f"{name} has 'fioWorkload' key in it, which is unexpected.") @@ -73,6 +98,14 @@ class DlioWorkload: 4. bucket (str): Name of a GCS bucket to read input files from. 5. batchSizes (set of ints): a set of ints representing multiple batchsize values to test. + 6. gcsfuseMountOptions (str): gcsfuse mount options as a single + string in compact stringified format, to be used for the + test scenario "gcsfuse-generic". The individual config/cli flag values should + be separated by comma. Each cli flag should be of the form "[=]", + while each config-file flag should be of form + "[:[:[...]]]:". For example, a legal + value would be: + "implicit-dirs,file_mode=777,file-cache:enable-parallel-downloads:true,metadata-cache:ttl-secs:true". """ def __init__( @@ -82,12 +115,14 @@ def __init__( recordLength: int, bucket: str, batchSizes: list, + gcsfuseMountOptions: str, ): self.scenario = scenario self.numFilesTrain = numFilesTrain self.recordLength = recordLength self.bucket = bucket self.batchSizes = set(batchSizes) + self.gcsfuseMountOptions = gcsfuseMountOptions def ParseTestConfigForDlioWorkloads(testConfigFileName: str): @@ -119,6 +154,30 @@ def ParseTestConfigForDlioWorkloads(testConfigFileName: str): dlioWorkload['recordLength'], workload['bucket'], dlioWorkload['batchSizes'], + workload['gcsfuseMountOptions'], ) ) return dlioWorkloads + + +def DlioChartNamePodName( + dlioWorkload: DlioWorkload, instanceID: str, batchSize: int +) -> (str, str, str): + shortenScenario = { + 'local-ssd': 'ssd', + 'gcsfuse-generic': 'gcsfuse', + } + shortForScenario = ( + shortenScenario[dlioWorkload.scenario] + if dlioWorkload.scenario in shortenScenario + else 'other' + ) + + hashOfWorkload = str(hash((instanceID, batchSize, dlioWorkload))).replace( + '-', '' + ) + return ( + f'dlio-unet3d-{shortForScenario}-{dlioWorkload.recordLength}-{hashOfWorkload}', + f'dlio-tester-{shortForScenario}-{dlioWorkload.recordLength}-{hashOfWorkload}', + f'{instanceID}/{dlioWorkload.numFilesTrain}-{dlioWorkload.recordLength}-{batchSize}-{hashOfWorkload}/{dlioWorkload.scenario}', + ) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py index ba85a7b3bc..62780d12a4 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py @@ -1,3 +1,18 @@ +# Copyright 2018 The Kubernetes Authors. +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file defines unit tests for functionalities in dlio_workload.py""" import unittest @@ -9,9 +24,20 @@ class DlioWorkloadTest(unittest.TestCase): def test_validate_dlio_workload_empty(self): self.assertFalse(validateDlioWorkload(({}), "empty-dlio-workload")) - def test_validate_dlio_workload_invalid_no_bucket(self): + def test_validate_dlio_workload_invalid_missing_bucket(self): self.assertFalse( - validateDlioWorkload(({"dlioWorkload": {}}), "invalid-dlio-workload-1") + validateDlioWorkload( + ({"dlioWorkload": {}, "gcsfuseMountOptions": ""}), + "invalid-dlio-workload-missing-bucket", + ) + ) + + def test_validate_dlio_workload_invalid_bucket_contains_space(self): + self.assertFalse( + validateDlioWorkload( + ({"dlioWorkload": {}, "gcsfuseMountOptions": "", "bucket": " "}), + "invalid-dlio-workload-bucket-contains-space", + ) ) def test_validate_dlio_workload_invalid_no_dlioWorkloadSpecified(self): @@ -22,7 +48,11 @@ def test_validate_dlio_workload_invalid_no_dlioWorkloadSpecified(self): def test_validate_dlio_workload_invalid_commented_out_dlioWorkload(self): self.assertFalse( validateDlioWorkload( - ({"_dlioWorkload": {}, "bucket": "dummy-bucket"}), + ({ + "_dlioWorkload": {}, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", + }), "commented-out-dlio-workload", ) ) @@ -34,6 +64,7 @@ def test_validate_dlio_workload_invalid_mixed_dlioWorkload_fioWorkload(self): "dlioWorkload": {}, "fioWorkload": {}, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }), "mixed-dlio/fio-workload", ) @@ -46,6 +77,7 @@ def test_validate_dlio_workload_invalid_missing_numFilesTrain(self): "batchSizes": [100, 200], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -62,6 +94,7 @@ def test_validate_dlio_workload_invalid_unsupported_numFilesTrain(self): "batchSizes": [100, 200], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -77,6 +110,7 @@ def test_validate_dlio_workload_invalid_missing_recordLength(self): "batchSizes": [100, 200], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -93,6 +127,7 @@ def test_validate_dlio_workload_invalid_unsupported_recordLength(self): "batchSizes": [100, 200], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -101,6 +136,61 @@ def test_validate_dlio_workload_invalid_unsupported_recordLength(self): ) pass + def test_validate_dlio_workload_invalid_missing_gcsfuseMountOptions(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 100, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-missing-gcsfuseMountOptions" + ) + ) + pass + + def test_validate_dlio_workload_invalid_unsupported_gcsfuseMountOptions( + self, + ): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": 100, + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-unsupported-gcsfuseMountOptions1" + ) + ) + pass + + def test_validate_dlio_workload_invalid_gcsfuseMountOptions_contains_space( + self, + ): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": "abc def", + }) + self.assertFalse( + validateDlioWorkload( + workload, + "invalid-dlio-workload-unsupported-gcsfuseMountOptions-contains-space", + ) + ) + pass + def test_validate_dlio_workload_invalid_missing_batchSizes(self): workload = dict({ "dlioWorkload": { @@ -108,6 +198,7 @@ def test_validate_dlio_workload_invalid_missing_batchSizes(self): "recordLength": 10000, }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -124,6 +215,7 @@ def test_validate_dlio_workload_invalid_unsupported_batchSizes1(self): "batchSizes": ["100"], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -140,6 +232,7 @@ def test_validate_dlio_workload_invalid_unsupported_batchSizes2(self): "batchSizes": [0, -1], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateDlioWorkload( @@ -156,6 +249,7 @@ def test_validate_dlio_workload_valid_single_batchSize(self): "batchSizes": [100], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertTrue(validateDlioWorkload(workload, "valid-dlio-workload-2")) pass @@ -168,6 +262,7 @@ def test_validate_dlio_workload_valid_multiple_batchSizes(self): "batchSizes": [100, 200], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertTrue(validateDlioWorkload(workload, "valid-dlio-workload-2")) pass diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py index 936423c065..3ed9a66ca8 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright 2018 The Kubernetes Authors. -# Copyright 2022 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -168,15 +168,14 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str): continue for i in range(summary_data["epochs"]): - test_name = summary_data["hostname"] - part_list = test_name.split("-") - key = "-".join(part_list[2:5]) + key = root.split("/")[-2] + key_split = key.split("-") if key not in output: output[key] = { - "num_files_train": part_list[-3], - "mean_file_size": part_list[-2], - "batch_size": part_list[-1], + "num_files_train": key_split[-4], + "mean_file_size": key_split[-3], + "batch_size": key_split[-2], "records": { "local-ssd": [], "gcsfuse-generic": [], diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py index 01d5cbb265..780b35c3b2 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright 2018 The Kubernetes Authors. -# Copyright 2022 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,13 +27,6 @@ import dlio_workload -# The default value of gcsfuse-mount-options to be used -# for "gcsfuse-generic" scenario. -# For description of how to specify the value for this, -# look at the description of the argparser argument for gcsfuse-mount-options. -_DEFAULT_GCSFUSE_MOUNT_OPTIONS = 'implicit-dirs' - - def run_command(command: str): """Runs the given string command as a subprocess.""" result = subprocess.run(command.split(' '), capture_output=True, text=True) @@ -49,20 +42,17 @@ def escapeCommasInString(unescapedStr: str) -> str: def createHelmInstallCommands( dlioWorkloads: set, instanceId: str, - gcsfuseMountOptions: str, machineType: str, ) -> list: """Creates helm install commands for the given dlioWorkload objects.""" helm_commands = [] - if not gcsfuseMountOptions: - gcsfuseMountOptions = _DEFAULT_GCSFUSE_MOUNT_OPTIONS for dlioWorkload in dlioWorkloads: for batchSize in dlioWorkload.batchSizes: + chartName, podName, outputDirPrefix = dlio_workload.DlioChartNamePodName( + dlioWorkload, instanceId, batchSize + ) commands = [ - ( - 'helm install' - f' dlio-unet3d-{dlioWorkload.scenario}-{dlioWorkload.numFilesTrain}-{dlioWorkload.recordLength}-{batchSize} unet3d-loading-test' - ), + f'helm install {chartName} unet3d-loading-test', f'--set bucketName={dlioWorkload.bucket}', f'--set scenario={dlioWorkload.scenario}', f'--set dlio.numFilesTrain={dlioWorkload.numFilesTrain}', @@ -71,9 +61,11 @@ def createHelmInstallCommands( f'--set instanceId={instanceId}', ( '--set' - f' gcsfuse.mountOptions={escapeCommasInString(gcsfuseMountOptions)}' + f' gcsfuse.mountOptions={escapeCommasInString(dlioWorkload.gcsfuseMountOptions)}' ), f'--set nodeType={machineType}', + f'--set podName={podName}', + f'--set outputDirPrefix={outputDirPrefix}', ] helm_command = ' '.join(commands) @@ -88,7 +80,6 @@ def main(args) -> None: helmInstallCommands = createHelmInstallCommands( dlioWorkloads, args.instance_id, - args.gcsfuse_mount_options, args.machine_type, ) for helmInstallCommand in helmInstallCommands: @@ -121,21 +112,6 @@ def main(args) -> None: ), required=True, ) - parser.add_argument( - '--gcsfuse-mount-options', - metavar='GCSFuse mount options', - help=( - 'GCSFuse mount-options, in a compact stringified' - ' format, to be set for the ' - ' scenario "gcsfuse-generic". The individual config/cli flag values' - ' should be separated by comma. Each cli flag should be of the form' - ' "[=]". Each config-file flag should be of form' - ' "[:[:[...]]]:". For' - ' example, a sample value would be:' - ' "implicit-dirs,file_mode=777,file-cache:enable-parallel-downloads:true,metadata-cache:ttl-secs:-1".' - ), - required=False, - ) parser.add_argument( '--machine-type', metavar='Machine-type of the GCE VM or GKE cluster node', @@ -153,19 +129,17 @@ def main(args) -> None: ) args = parser.parse_args() - for argument in ['instance_id', 'gcsfuse_mount_options', 'machine_type']: - value = getattr(args, argument) - if ' ' in value: - raise Exception( - f'Argument {argument} (value="{value}") contains space in it, which' - ' is not supported.' - ) - for argument in ['machine_type', 'instance_id']: + for argument in ['instance_id', 'machine_type']: value = getattr(args, argument) if len(value) == 0 or str.isspace(value): raise Exception( f'Argument {argument} (value="{value}") is empty or contains only' ' spaces.' ) + if ' ' in value: + raise Exception( + f'Argument {argument} (value="{value}") contains space in it, which' + ' is not supported.' + ) main(args) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/templates/dlio-tester.yaml b/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/templates/dlio-tester.yaml index 74d36bd820..75829fc93b 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/templates/dlio-tester.yaml +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/templates/dlio-tester.yaml @@ -16,7 +16,7 @@ apiVersion: v1 kind: Pod metadata: - name: dlio-tester-{{ .Values.scenario }}-{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }} + name: {{ .Values.podName }} {{- if ne .Values.scenario "local-ssd" }} annotations: gke-gcsfuse/volumes: "true" @@ -73,7 +73,7 @@ spec: sleep 300 {{ end }} - outputDir=/logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }} + outputDir=/logs/{{ .Values.outputDirPrefix }} echo "Testing {{ .Values.scenario }}" mpirun -np 8 dlio_benchmark workload=unet3d_a100 \ @@ -93,7 +93,7 @@ spec: echo "{{ .Values.gcsfuse.mountOptions }}" > ${outputDir}/gcsfuse_mount_options {{ end }} - gsutil -m cp -R /logs/{{ .Values.instanceId }} gs://{{ .Values.bucketName }}/logs/{{ .Values.instanceId }}/$(date +"%Y-%m-%d-%H-%M") + gsutil -m cp -R /logs/* gs://{{ .Values.bucketName }}/logs/ volumeMounts: - name: dshm mountPath: /dev/shm diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/values.yaml b/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/values.yaml index ef0b3a20ef..359185fcb9 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/values.yaml +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/values.yaml @@ -23,6 +23,8 @@ bucketName: gke-dlio-test-data scenario: local-ssd nodeType: n2-standard-96 instanceId: ldap-yyyymmdd-hhmmss +podName: +outputDirPrefix: resourceLimits: cpu: 0 diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py index 581df02142..84a5257487 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py @@ -1,3 +1,18 @@ +# Copyright 2018 The Kubernetes Authors. +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file defines a FioWorkload and provides utility for parsing a json test-config file for a list of them. @@ -8,13 +23,23 @@ def validateFioWorkload(workload: dict, name: str): """Validates the given json workload object.""" - if 'fioWorkload' not in workload: - print(f"{name} does not have 'fioWorkload' key in it.") - return False - - if 'bucket' not in workload: - print(f"{name} does not have 'bucket' key in it.") - return False + for requiredWorkloadAttribute, expectedType in { + 'bucket': str, + 'gcsfuseMountOptions': str, + 'fioWorkload': dict, + }.items(): + if requiredWorkloadAttribute not in workload: + print(f"{name} does not have '{requiredWorkloadAttribute}' key in it.") + return False + if not type(workload[requiredWorkloadAttribute]) is expectedType: + print( + f"In {name}, the type of '{requiredWorkloadAttribute}' is of type" + f" '{type(workload[requiredWorkloadAttribute])}', not {expectedType}" + ) + return False + if expectedType == str and ' ' in workload[requiredWorkloadAttribute]: + print(f"{name} has space in the value of '{requiredWorkloadAttribute}'") + return False if 'dlioWorkload' in workload: print(f"{name} has 'dlioWorkload' key in it, which is unexpected.") @@ -84,6 +109,14 @@ class FioWorkload: 6. bucket (string): Name of a GCS bucket to read input files from. 7. readTypes (set of strings): a set containing multiple values out of 'read', 'randread'. + 8. gcsfuseMountOptions (str): gcsfuse mount options as a single + string in compact stringified format, to be used for the + test scenario "gcsfuse-generic". The individual config/cli flag values should + be separated by comma. Each cli flag should be of the form "[=]", + while each config-file flag should be of form + "[:[:[...]]]:". For example, a legal + value would be: + "implicit-dirs,file_mode=777,file-cache:enable-parallel-downloads:true,metadata-cache:ttl-secs:true". """ def __init__( @@ -95,6 +128,7 @@ def __init__( numThreads: int, bucket: str, readTypes: list, + gcsfuseMountOptions: str, ): self.scenario = scenario self.fileSize = fileSize @@ -103,13 +137,15 @@ def __init__( self.numThreads = numThreads self.bucket = bucket self.readTypes = set(readTypes) + self.gcsfuseMountOptions = gcsfuseMountOptions def PPrint(self): print( f'scenario:{self.scenario}, fileSize:{self.fileSize},' f' blockSize:{self.blockSize}, filesPerThread:{self.filesPerThread},' f' numThreads:{self.numThreads}, bucket:{self.bucket},' - f' readTypes:{self.readTypes}' + f' readTypes:{self.readTypes}, gcsfuseMountOptions:' + f' {gcsfuseMountOptions}' ) @@ -148,6 +184,36 @@ def ParseTestConfigForFioWorkloads(fioTestConfigFile: str): if 'readTypes' in fioWorkload else ['read', 'randread'] ), + workload['gcsfuseMountOptions'], ) ) return fioWorkloads + + +def FioChartNamePodName( + fioWorkload: FioWorkload, instanceID: str, readType: str +) -> (str, str, str): + shortenScenario = { + 'local-ssd': 'ssd', + 'gcsfuse-generic': 'gcsfuse', + } + shortForScenario = ( + shortenScenario[fioWorkload.scenario] + if fioWorkload.scenario in shortenScenario + else 'other' + ) + readTypeToShortReadType = {'read': 'sr', 'randread': 'rr'} + shortForReadType = ( + readTypeToShortReadType[readType] + if readType in readTypeToShortReadType + else 'ur' + ) + + hashOfWorkload = str(hash((fioWorkload, instanceID, readType))).replace( + '-', '' + ) + return ( + f'fio-load-{shortForScenario}-{shortForReadType}-{fioWorkload.fileSize.lower()}-{hashOfWorkload}', + f'fio-tester-{shortForScenario}-{shortForReadType}-{fioWorkload.fileSize.lower()}-{hashOfWorkload}', + f'{instanceID}/{fioWorkload.fileSize}-{fioWorkload.blockSize}-{fioWorkload.numThreads}-{fioWorkload.filesPerThread}-{hashOfWorkload}/{fioWorkload.scenario}/{readType}', + ) diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py index c6b44ddcca..0766b7acbc 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py @@ -1,3 +1,18 @@ +# Copyright 2018 The Kubernetes Authors. +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file defines unit tests for functionalities in fio_workload.py""" import unittest @@ -9,9 +24,20 @@ class FioWorkloadTest(unittest.TestCase): def test_validate_fio_workload_empty(self): self.assertFalse(validateFioWorkload(({}), "empty-fio-workload")) - def test_validate_fio_workload_invalid_no_bucket(self): + def test_validate_fio_workload_invalid_missing_bucket(self): self.assertFalse( - validateFioWorkload(({"fioWorkload": {}}), "invalid-fio-workload-1") + validateFioWorkload( + ({"fioWorkload": {}, "gcsfuseMountOptions": ""}), + "invalid-fio-workload-missing-bucket", + ) + ) + + def test_validate_fio_workload_invalid_bucket_contains_space(self): + self.assertFalse( + validateFioWorkload( + ({"fioWorkload": {}, "gcsfuseMountOptions": "", "bucket": " "}), + "invalid-fio-workload-bucket-contains-space", + ) ) def test_validate_fio_workload_invalid_no_fioWorkloadSpecified(self): @@ -22,7 +48,11 @@ def test_validate_fio_workload_invalid_no_fioWorkloadSpecified(self): def test_validate_fio_workload_invalid_commented_out_fioWorkload(self): self.assertFalse( validateFioWorkload( - ({"_fioWorkload": {}, "bucket": "dummy-bucket"}), + ({ + "_fioWorkload": {}, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", + }), "commented-out-fio-workload", ) ) @@ -30,7 +60,12 @@ def test_validate_fio_workload_invalid_commented_out_fioWorkload(self): def test_validate_fio_workload_invalid_mixed_fioWorkload_dlioWorkload(self): self.assertFalse( validateFioWorkload( - ({"fioWorkload": {}, "dlioWorkload": {}, "bucket": "dummy-bucket"}), + ({ + "fioWorkload": {}, + "dlioWorkload": {}, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", + }), "mixed-fio/dlio-workload", ) ) @@ -43,6 +78,7 @@ def test_validate_fio_workload_invalid_missing_fileSize(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload(workload, "invalid-fio-workload-missing-fileSize") @@ -58,6 +94,7 @@ def test_validate_fio_workload_invalid_unsupported_fileSize(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -74,6 +111,7 @@ def test_validate_fio_workload_invalid_missing_blockSize(self): "numThreads": 100, }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload(workload, "invalid-fio-workload-missing-blockSize") @@ -89,6 +127,7 @@ def test_validate_fio_workload_invalid_unsupported_blockSize(self): "numThreads": 100, }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -105,6 +144,7 @@ def test_validate_fio_workload_invalid_missing_filesPerThread(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -122,6 +162,7 @@ def test_validate_fio_workload_invalid_unsupported_filesPerThread(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -138,6 +179,7 @@ def test_validate_fio_workload_invalid_missing_numThreads(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload(workload, "invalid-fio-workload-missing-numThreads") @@ -153,6 +195,42 @@ def test_validate_fio_workload_invalid_unsupported_numThreads(self): "numThreads": "1k", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-numThreads" + ) + ) + pass + + def test_validate_fio_workload_invalid_missing_gcsfuseMountOptions(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": "1k", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-missing-gcsfuseMountOptions" + ) + ) + pass + + def test_validate_fio_workload_invalid_unsupported_gcsfuseMountOptions(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": "1k", + }, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": 100, }) self.assertFalse( validateFioWorkload( @@ -161,6 +239,27 @@ def test_validate_fio_workload_invalid_unsupported_numThreads(self): ) pass + def test_validate_fio_workload_invalid_gcsfuseMountOptions_contains_space( + self, + ): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": "1k", + }, + "bucket": "dummy-bucket", + "gcsfuseMountOptions": "abc def", + }) + self.assertFalse( + validateFioWorkload( + workload, + "invalid-fio-workload-unsupported-gcsfuseMountOptions-contains-space", + ) + ) + pass + def test_validate_fio_workload_invalid_unsupported_readTypes_1(self): workload = dict({ "fioWorkload": { @@ -171,6 +270,7 @@ def test_validate_fio_workload_invalid_unsupported_readTypes_1(self): "readTypes": True, }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -189,6 +289,7 @@ def test_validate_fio_workload_invalid_unsupported_readTypes_2(self): "readTypes": ["read", 1], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -207,6 +308,7 @@ def test_validate_fio_workload_invalid_unsupported_readTypes_3(self): "readTypes": ["read", "write"], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertFalse( validateFioWorkload( @@ -224,6 +326,7 @@ def test_validate_fio_workload_valid_without_readTypes(self): "blockSize": "1kb", }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-1")) pass @@ -238,6 +341,7 @@ def test_validate_fio_workload_valid_with_readTypes(self): "readTypes": ["read", "randread"], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-2")) pass @@ -252,6 +356,7 @@ def test_validate_fio_workload_valid_with_single_readType(self): "readTypes": ["randread"], }, "bucket": "dummy-bucket", + "gcsfuseMountOptions": "implicit-dirs,cache-max-size:-1", }) self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-2")) pass diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/templates/fio-tester.yaml b/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/templates/fio-tester.yaml index d9c79eb8d4..a8fdf95147 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/templates/fio-tester.yaml +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/templates/fio-tester.yaml @@ -16,7 +16,7 @@ apiVersion: v1 kind: Pod metadata: - name: fio-tester-{{ .Values.instanceId }}-{{ .Values.scenario }}-{{ .Values.fio.readType }}-{{ lower .Values.fio.fileSize }}-{{ lower .Values.fio.blockSize }}-{{ .Values.fio.numThreads }}-{{ .Values.fio.filesPerThread }} + name: {{ .Values.podName }} {{- if ne .Values.scenario "local-ssd" }} annotations: gke-gcsfuse/volumes: "true" @@ -126,13 +126,14 @@ spec: time ls -R $workload_dir 1> /dev/null echo "Run fio tests..." - output_dir=/data/fio-output/{{ .Values.instanceId }}/${file_size}-{{ lower .Values.fio.blockSize}}-${num_of_threads}-${no_of_files_per_thread}/{{ .Values.scenario }}/$read_type + output_dir=/data/fio-output/{{ .Values.outputDirPrefix }} mkdir -p ${output_dir} # dump the gcsfuse-mount-configuration to a file in output-directory. {{ if eq .Values.scenario "gcsfuse-generic" }} echo "{{ .Values.gcsfuse.mountOptions }}" > ${output_dir}/gcsfuse_mount_options {{ end }} + echo "{{ .Values.podName }}" > ${output_dir}/pod_name for i in $(seq $epoch); do echo "[Epoch ${i}] start time:" `date +%s` @@ -155,7 +156,7 @@ spec: done {{ if eq .Values.scenario "local-ssd" }} - gsutil -m cp -R /data/fio-output/{{ .Values.instanceId }}/* gs://{{ .Values.bucketName }}/fio-output/{{ .Values.instanceId }}/ + gsutil -m cp -R /data/fio-output/* gs://{{ .Values.bucketName }}/fio-output/ {{ end }} echo "fio job completed!" diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/values.yaml b/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/values.yaml index efd0b6f4a1..6723168b88 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/values.yaml +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/loading-test/values.yaml @@ -23,6 +23,8 @@ bucketName: gke-dlio-test-data scenario: local-ssd nodeType: n2-standard-96 instanceId: ldap-yyyymmdd-hhmmss +podName: +outputDirPrefix: resourceLimits: cpu: 0 diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py index ea5e6391c0..c9196fa727 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright 2018 The Kubernetes Authors. -# Copyright 2022 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -149,6 +149,7 @@ def downloadFioOutputs(fioWorkloads: set, instanceId: str): for root, _, files in os.walk(_LOCAL_LOGS_LOCATION + "/" + args.instance_id): for file in files: + print(f"Parsing directory {root} ...") per_epoch_output = root + f"/{file}" if not per_epoch_output.endswith(".json"): print(f"ignoring file {per_epoch_output} as it's not a json file") @@ -159,6 +160,13 @@ def downloadFioOutputs(fioWorkloads: set, instanceId: str): if os.path.isfile(gcsfuse_mount_options_file): with open(gcsfuse_mount_options_file) as f: gcsfuse_mount_options = f.read().strip() + print(f"gcsfuse_mount_options={gcsfuse_mount_options}") + + pod_name = "" + pod_name_file = root + "/pod_name" + with open(pod_name_file) as f: + pod_name = f.read().strip() + print(f"pod_name={pod_name}") with open(per_epoch_output, "r") as f: try: @@ -194,9 +202,7 @@ def downloadFioOutputs(fioWorkloads: set, instanceId: str): numjobs = int(global_options["numjobs"]) bs = per_epoch_output_data["jobs"][0]["job options"]["bs"] - key = "-".join( - [read_type, mean_file_size, bs, str(numjobs), str(nrfiles)] - ) + key = root_split[-3] if key not in output: output[key] = { "mean_file_size": mean_file_size, @@ -211,9 +217,7 @@ def downloadFioOutputs(fioWorkloads: set, instanceId: str): r = record.copy() bs = per_epoch_output_data["jobs"][0]["job options"]["bs"] - r["pod_name"] = ( - f"fio-tester-{args.instance_id}-{scenario}-{read_type}-{mean_file_size.lower()}-{bs.lower()}-{numjobs}-{nrfiles}" - ) + r["pod_name"] = pod_name r["epoch"] = epoch r["scenario"] = scenario r["duration"] = int( diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py b/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py index 2e3cab0b66..6ef8031e89 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright 2018 The Kubernetes Authors. -# Copyright 2022 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,13 +26,6 @@ import fio_workload -# The default value of gcsfuse-mount-options to be used -# for "gcsfuse-generic" scenario. -# For description of how to specify the value for this, -# look at the description of the argparser argument for gcsfuse-mount-options. -_DEFAULT_GCSFUSE_MOUNT_OPTIONS = 'implicit-dirs' - - def run_command(command: str): """Runs the given string command as a subprocess.""" result = subprocess.run(command.split(' '), capture_output=True, text=True) @@ -48,20 +41,17 @@ def escapeCommasInString(unescapedStr: str) -> str: def createHelmInstallCommands( fioWorkloads: set, instanceId: str, - gcsfuseMountOptions: str, machineType: str, ) -> list: """Creates helm install commands for the given fioWorkload objects.""" helm_commands = [] - if not gcsfuseMountOptions: - gcsfuseMountOptions = _DEFAULT_GCSFUSE_MOUNT_OPTIONS for fioWorkload in fioWorkloads: for readType in fioWorkload.readTypes: + chartName, podName, outputDirPrefix = fio_workload.FioChartNamePodName( + fioWorkload, instanceId, readType + ) commands = [ - ( - 'helm install' - f' fio-load-{fioWorkload.scenario}-{readType}-{fioWorkload.fileSize.lower()}-{fioWorkload.blockSize.lower()}-{fioWorkload.numThreads}-{fioWorkload.filesPerThread} loading-test' - ), + f'helm install {chartName} loading-test', f'--set bucketName={fioWorkload.bucket}', f'--set scenario={fioWorkload.scenario}', f'--set fio.readType={readType}', @@ -72,9 +62,11 @@ def createHelmInstallCommands( f'--set instanceId={instanceId}', ( '--set' - f' gcsfuse.mountOptions={escapeCommasInString(gcsfuseMountOptions)}' + f' gcsfuse.mountOptions={escapeCommasInString(fioWorkload.gcsfuseMountOptions)}' ), f'--set nodeType={machineType}', + f'--set podName={podName}', + f'--set outputDirPrefix={outputDirPrefix}', ] helm_command = ' '.join(commands) @@ -89,7 +81,6 @@ def main(args) -> None: helmInstallCommands = createHelmInstallCommands( fioWorkloads, args.instance_id, - args.gcsfuse_mount_options, args.machine_type, ) for helmInstallCommand in helmInstallCommands: @@ -122,21 +113,6 @@ def main(args) -> None: ), required=True, ) - parser.add_argument( - '--gcsfuse-mount-options', - metavar='GCSFuse mount options', - help=( - 'GCSFuse mount-options, in a compact stringified' - ' format, to be set for the ' - ' scenario "gcsfuse-generic". The individual config/cli flag values' - ' should be separated by comma. Each cli flag should be of the form' - ' "[=]". Each config-file flag should be of form' - ' "[:[:[...]]]:". For' - ' example, a sample value would be:' - ' "implicit-dirs,file_mode=777,file-cache:enable-parallel-downloads:true,metadata-cache:ttl-secs:-1".' - ), - required=False, - ) parser.add_argument( '--machine-type', metavar='Machine-type of the GCE VM or GKE cluster node', @@ -154,19 +130,17 @@ def main(args) -> None: ) args = parser.parse_args() - for argument in ['instance_id', 'gcsfuse_mount_options', 'machine_type']: - value = getattr(args, argument) - if ' ' in value: - raise Exception( - f'Argument {argument} (value="{value}") contains space in it, which' - ' is not supported.' - ) - for argument in ['machine_type', 'instance_id']: + for argument in ['instance_id', 'machine_type']: value = getattr(args, argument) if len(value) == 0 or str.isspace(value): raise Exception( f'Argument {argument} (value="{value}") is empty or contains only' ' spaces.' ) + if ' ' in value: + raise Exception( + f'Argument {argument} (value="{value}") contains space in it, which' + ' is not supported.' + ) main(args) diff --git a/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh b/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh index bf0891732c..f814ba5ac2 100755 --- a/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh +++ b/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh @@ -57,8 +57,6 @@ readonly csi_driver_github_path=https://github.com/googlecloudplatform/gcs-fuse- readonly csi_driver_branch=main readonly gcsfuse_github_path=https://github.com/googlecloudplatform/gcsfuse readonly gcsfuse_branch=garnitin/add-gke-load-testing/v1 -# GCSFuse configuration related -readonly DEFAULT_GCSFUSE_MOUNT_OPTIONS="implicit-dirs" # Test runtime configuration readonly DEFAULT_INSTANCE_ID=${USER}-$(date +%Y%m%d-%H%M%S) # 5 minutes @@ -87,8 +85,6 @@ function printHelp() { echo "src_dir=<\"directory/to/clone/github/repos/if/needed\", default=\"${DEFAULT_SRC_DIR}\">" echo "gcsfuse_src_dir=<\"/path/of/gcsfuse/src/to/use/if/available\", default=\"${DEFAULT_SRC_DIR}/gcsfuse\">" echo "csi_src_dir=<\"/path/of/gcs-fuse-csi-driver/to/use/if/available\", default=\"${DEFAULT_SRC_DIR}\"/gcs-fuse-csi-driver>" - # GCSFuse configuration related - echo "gcsfuse_mount_options=<\"comma-separated-gcsfuse-mount-options\" e.g. \""${DEFAULT_GCSFUSE_MOUNT_OPTIONS}"\">" # Test runtime configuration echo "pod_wait_time_in_seconds=" echo "pod_timeout_in_seconds=" @@ -132,8 +128,8 @@ test -d "${src_dir}" || mkdir -pv "${src_dir}" (test -n "${gcsfuse_src_dir}" && gcsfuse_src_dir="$(realpath "${gcsfuse_src_dir}")") || export gcsfuse_src_dir="${src_dir}"/gcsfuse export gke_testing_dir="${gcsfuse_src_dir}"/perfmetrics/scripts/testing_on_gke (test -n "${csi_src_dir}" && csi_src_dir="$(realpath "${csi_src_dir}")") || export csi_src_dir="${src_dir}"/gcs-fuse-csi-driver -# GCSFuse configuration related -test -n "${gcsfuse_mount_options}" || export gcsfuse_mount_options="${DEFAULT_GCSFUSE_MOUNT_OPTIONS}" +# GCSFuse configuration related - deprecated. Will cause error. +test -z "${gcsfuse_mount_options}" || (echo "gcsfuse_mount_options set by user is a deprecated option. Please set gcsfuseMountOptions in workload objects in workload configuration file in its place." && exit 1) # Test runtime configuration test -n "${pod_wait_time_in_seconds}" || export pod_wait_time_in_seconds="${DEFAULT_POD_WAIT_TIME_IN_SECONDS}" test -n "${pod_timeout_in_seconds}" || export pod_timeout_in_seconds="${DEFAULT_POD_TIMEOUT_IN_SECONDS}" @@ -178,9 +174,6 @@ function printRunParameters() { echo "src_dir=\"${src_dir}\"" echo "gcsfuse_src_dir=\"${gcsfuse_src_dir}\"" echo "csi_src_dir=\"${csi_src_dir}\"" - # GCSFuse configuration related - echo "gcsfuse_mount_options=\"${gcsfuse_mount_options}\"" - echo "${gcsfuse_mount_options}" >gcsfuse_mount_options # Test runtime configuration echo "pod_wait_time_in_seconds=\"${pod_wait_time_in_seconds}\"" echo "pod_timeout_in_seconds=\"${pod_timeout_in_seconds}\"" @@ -474,12 +467,12 @@ function deleteAllPods() { function deployAllFioHelmCharts() { echo "Deploying all fio helm charts ..." - cd "${gke_testing_dir}"/examples/fio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --gcsfuse-mount-options="${gcsfuse_mount_options}" --machine-type="${machine_type}" && cd - + cd "${gke_testing_dir}"/examples/fio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd - } function deployAllDlioHelmCharts() { echo "Deploying all dlio helm charts ..." - cd "${gke_testing_dir}"/examples/dlio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --gcsfuse-mount-options="${gcsfuse_mount_options}" --machine-type="${machine_type}" && cd - + cd "${gke_testing_dir}"/examples/dlio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd - } function listAllHelmCharts() { diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py b/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py index 766a5c8a38..6cf2428cbb 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py +++ b/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright 2018 The Kubernetes Authors. -# Copyright 2022 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/perfmetrics/scripts/testing_on_gke/examples/workloads.json b/perfmetrics/scripts/testing_on_gke/examples/workloads.json index e2ab942b4d..ee23487a2c 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/workloads.json +++ b/perfmetrics/scripts/testing_on_gke/examples/workloads.json @@ -6,7 +6,7 @@ "runOnSSD": true, "workloads": [ { - "_description": "This is a dummy fio workload (missing the 'fioWorkload' field), purely standing as a header and does not execute any workload. For it to execute a fio workload, it must have a valid 'fioWorkload' object and a valid 'bucket' attribute.", + "_description": "This is a dummy fio workload (missing the 'fioWorkload' field), purely standing as a header and does not execute any workload. For it to execute a fio workload, it must have a valid 'fioWorkload', a valid 'bucket' attribute, and a valid gcsfuseMountOption attribute.", "_fioWorkload": { "_description": "Every fioWorkload must have fileSize, filesPerThread, numThreads, and blockSize fields. readTypes is an array of string values 'read' and 'randread'. If readTypes is missing, then it defaults to [\"read\",\"randread\"].", "fileSize": "64k", @@ -15,6 +15,7 @@ "blockSize": "64K", "readTypes": ["read","randread"] }, + "gcsfuseMountOptions": "GCSFuse mount-options, in a compact stringified format, to be used for the test scenario gcsfuse-generic. The individual config/cli flag values should be separated by comma. Each cli flag should be of the form [=], while each config-file flag should be of form [:[:[...]]]:. For example, a legal value would be: implicit-dirs,file_mode=777,file-cache:enable-parallel-downloads:true,metadata-cache:ttl-secs:-1 .", "bucket":"The bucket must have objects with name Workload.{i}/{j} for every i,j where i:0-{numThreads}-1, j:0-{filesPerThread}-1, and each of these objects must be of size {fileSize}. The buckets gke-* are all in us-central1, are owned by GKE team and are in their GCP project(s)." }, { @@ -25,6 +26,7 @@ "blockSize": "64K", "readTypes": ["read"] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"fio-64k-1m-us-west1", "_bucket_alt2":"fio-64k-1m-us-central1", "_bucket_alt3":"gke-fio-64k-1m" @@ -37,6 +39,7 @@ "blockSize": "128K", "readTypes": ["read"] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"fio-128k-1m-us-west1", "_bucket_alt2":"fio-128k-1m-us-central1", "_bucket_alt3":"gke-fio-128k-1m" @@ -49,6 +52,7 @@ "blockSize": "256K", "readTypes": ["read","randread"] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"fio-1mb-1m-us-west1", "_bucket_alt2":"fio-1mb-1m-us-central1", "_bucket_alt3":"gke-fio-1mb-1m" @@ -60,6 +64,7 @@ "numThreads": 50, "blockSize": "1M" }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"fio-100mb-50k-us-west1", "_bucket_alt2":"fio-100mb-50k-us-central1", "_bucket_alt3":"gke-fio-100mb-50k" @@ -72,6 +77,7 @@ "numThreads": 100, "blockSize": "1M" }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"fio-200gb-1-us-west1", "_bucket_alt2":"fio-200gb-1-us-central1", "_bucket_alt3":"gke-fio-200gb-1" @@ -84,6 +90,7 @@ "recordLength": 102400, "batchSizes": [800,128] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"The bucket must have objects with name 'train/', 'valid/', and train/img_{i}_of_{numFilesTrain}.npz for every i where i:0-{numFilesTrain}-1 and each train/img_{i}_of_{numFilesTrain}.npz must be of size {recordLength} bytes. The buckets gke-* are all in us-central1, are owned by GKE team and are in their GCP project(s)." }, { @@ -92,6 +99,7 @@ "recordLength": 102400, "batchSizes": [800,128] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"dlio-unet3d-100kb-500k-us-west1", "_bucket_alt2":"dlio-unet3d-100kb-500k-us-central1", "_bucket_alt3":"gke-dlio-unet3d-100kb-500k" @@ -102,6 +110,7 @@ "recordLength": 512000, "batchSizes": [800,128] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"dlio-unet3d-500kb-1m-us-west1", "_bucket_alt2":"dlio-unet3d-500kb-1m-us-central1", "_bucket_alt3":"gke-dlio-unet3d-500kb-1m" @@ -112,6 +121,7 @@ "recordLength": 3145728, "batchSizes": [200] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"dlio-unet3d-3mb-100k-us-west1", "_bucket_alt2":"dlio-unet3d-3mb-100k-us-central1", "_bucket_alt3":"gke-dlio-unet3d-3mb-100k" @@ -122,6 +132,7 @@ "recordLength": 157286400, "batchSizes": [4] }, + "gcsfuseMountOptions": "implicit-dirs", "bucket":"dlio-unet3d-150mb-5k-us-west1", "_bucket_alt2":"dlio-unet3d-150mb-5k-us-central1", "_bucket_alt3":"gke-dlio-unet3d-150mb-5k"