Skip to content

Commit

Permalink
Access PVC access modes to storage config
Browse files Browse the repository at this point in the history
Signed-off-by: Andrey Velichkevich <[email protected]>
  • Loading branch information
andreyvelich committed Mar 15, 2024
1 parent 3e2e1a7 commit 519c11a
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 13 deletions.
8 changes: 4 additions & 4 deletions sdk/python/kubeflow/training/api/training_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,10 @@ def train(
namespace: Optional[str] = None,
num_workers: int = 1,
num_procs_per_worker: int = 1,
storage_config: Dict[str, Optional[str]] = {
"size": "10Gi",
storage_config: Dict[str, Optional[Union[str, List[str]]]] = {
"size": constants.STORAGE_INITIALIZER_DEFAULT_SIZE,
"storage_class": None,
"access_modes": ["ReadWriteOnce", "ReadOnlyMany"],
},
model_provider_parameters=None,
dataset_provider_parameters=None,
Expand Down Expand Up @@ -147,7 +148,6 @@ def train(
pvc_name=constants.STORAGE_INITIALIZER,
namespace=namespace,
storage_config=storage_config,
num_workers=num_workers,
),
)
except Exception as e:
Expand All @@ -161,7 +161,7 @@ def train(
)
break
else:
raise RuntimeError("failed to create pvc")
raise RuntimeError(f"failed to create PVC. Error: {e}")

if isinstance(model_provider_parameters, HuggingFaceModelParams):
mp = "hf"
Expand Down
3 changes: 3 additions & 0 deletions sdk/python/kubeflow/training/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@

# Constants for Train API.
STORAGE_INITIALIZER = "storage-initializer"
# The default value for dataset and model storage.
STORAGE_INITIALIZER_DEFAULT_SIZE = "10Gi"

# TODO (andreyvelich): We should add image tag for Storage Initializer and Trainer.
STORAGE_INITIALIZER_IMAGE = "docker.io/kubeflow/storage-initializer"

Expand Down
16 changes: 7 additions & 9 deletions sdk/python/kubeflow/training/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,28 +377,26 @@ def get_pytorchjob_template(
def get_pvc_spec(
pvc_name: str,
namespace: str,
storage_config: Dict[str, Optional[str]],
num_workers: int,
storage_config: Dict[str, Optional[Union[str, List[str]]]],
):
if pvc_name is None or namespace is None or "size" not in storage_config:
raise ValueError("One of the arguments is None")
if pvc_name is None or namespace is None:
raise ValueError("One of the required storage config argument is None")

if "size" not in storage_config:
storage_config["size"] = constants.STORAGE_INITIALIZER_DEFAULT_SIZE

pvc_spec = models.V1PersistentVolumeClaim(
api_version="v1",
kind="PersistentVolumeClaim",
metadata={"name": pvc_name, "namepsace": namespace},
spec=models.V1PersistentVolumeClaimSpec(
access_modes=["ReadWriteOnce", "ReadOnlyMany"],
access_modes=storage_config,
resources=models.V1ResourceRequirements(
requests={"storage": storage_config["size"]}
),
),
)

# If PyTorchJob has 1 worker, ReadWriteOnce access mode is sufficient for PVC.
if num_workers == 1:
pvc_spec.spec.access_modes = ["ReadWriteOnce"]

if "storage_class" in storage_config:
pvc_spec.spec.storage_class_name = storage_config["storage_class"]

Expand Down

0 comments on commit 519c11a

Please sign in to comment.