Skip to content

Commit

Permalink
[SDK] Allow customizing base trainer and storage images in Train API
Browse files Browse the repository at this point in the history
Allow customizing base storage_initializer and trainer images through
Env vars.

Signed-off-by: Varsha Prasad Narsing <[email protected]>
  • Loading branch information
varshaprasad96 committed Sep 18, 2024
1 parent 126110f commit 40c5c10
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
10 changes: 8 additions & 2 deletions sdk/python/kubeflow/training/api/training_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import logging
import multiprocessing
import os
import queue
import time
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
Expand Down Expand Up @@ -242,7 +243,9 @@ def train(
# create init container spec
init_container_spec = utils.get_container_spec(
name=constants.STORAGE_INITIALIZER,
base_image=constants.STORAGE_INITIALIZER_IMAGE,
base_image=os.getenv(
"STORAGE_INITIALIZER_IMAGE", constants.STORAGE_INITIALIZER_IMAGE_DEFAULT
),
args=[
"--model_provider",
mp,
Expand All @@ -259,7 +262,10 @@ def train(
# create app container spec
container_spec = utils.get_container_spec(
name=constants.JOB_PARAMETERS[constants.PYTORCHJOB_KIND]["container"],
base_image=constants.TRAINER_TRANSFORMER_IMAGE,
base_image=os.getenv(
"TRAINER_TRANSFORMER_IMAGE_DEFAULT",
constants.TRAINER_TRANSFORMER_IMAGE_DEFAULT,
),
args=[
"--model_uri",
model_provider_parameters.model_uri,
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/kubeflow/training/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@


# TODO (andreyvelich): We should add image tag for Storage Initializer and Trainer.
STORAGE_INITIALIZER_IMAGE = "docker.io/kubeflow/storage-initializer"
STORAGE_INITIALIZER_IMAGE_DEFAULT = "docker.io/kubeflow/storage-initializer"

STORAGE_INITIALIZER_VOLUME_MOUNT = models.V1VolumeMount(
name=STORAGE_INITIALIZER,
mount_path=INIT_CONTAINER_MOUNT_PATH,
)

TRAINER_TRANSFORMER_IMAGE = "docker.io/kubeflow/trainer-huggingface"
TRAINER_TRANSFORMER_IMAGE_DEFAULT = "docker.io/kubeflow/trainer-huggingface"

# TFJob constants.
TFJOB_KIND = "TFJob"
Expand Down

0 comments on commit 40c5c10

Please sign in to comment.