Skip to content

Commit

Permalink
build and verify images of storage-intializer and trainer
Browse files Browse the repository at this point in the history
Signed-off-by: helenxie-bit <[email protected]>
  • Loading branch information
helenxie-bit committed Sep 21, 2024
1 parent c3f04c3 commit 9f42449
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 2 deletions.
13 changes: 11 additions & 2 deletions .github/workflows/e2e-test-train-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ jobs:
cluster_name: training-operator-cluster
kubectl_version: ${{ matrix.kubernetes-version }}

- name: Build training-operator
- name: Build training-operator, storage-initializer, and trainer images
run: |
./scripts/gha/build-image.sh
env:
TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test
STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test

- name: Deploy training operator
run: |
Expand All @@ -50,5 +52,12 @@ jobs:

- name: Run tests
run: |
kind load docker-image ${{ env.STORAGE_INITIALIZER_IMAGE }} --name ${{ env.KIND_CLUSTER }}
kind load docker-image ${{ env.TRAINER_TRANSFORMER_IMAGE_DEFAULT }} --name ${{ env.KIND_CLUSTER }}
pip install pytest
python3 -m pip install -e sdk/python[huggingface]; pytest -s sdk/python/test/e2e-train-api/test_e2e_train_api.py --log-cli-level=debug
python3 -m pip install -e sdk/python[huggingface]
pytest -s sdk/python/test/e2e-train-api/test_e2e_train_api.py --log-cli-level=debug
env:
KIND_CLUSTER: training-operator-cluster
STORAGE_INITIALIZER_IMAGE: kubeflowtraining/storage-initializer:test
TRAINER_TRANSFORMER_IMAGE_DEFAULT: kubeflowtraining/trainer:test
2 changes: 2 additions & 0 deletions scripts/gha/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ set -o nounset
set -o pipefail

docker build . -t ${TRAINING_CI_IMAGE} -f build/images/training-operator/Dockerfile
docker build . -t ${STORAGE_INITIALIZER_CI_IMAGE} -f sdk/python/kubeflow/storage_initializer/Dockerfile
docker build . -t ${TRAINER_CI_IMAGE} -f sdk/python/kubeflow/trainer/Dockerfile
9 changes: 9 additions & 0 deletions sdk/python/kubeflow/training/api/training_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@ def train(
],
volume_mounts=[constants.STORAGE_INITIALIZER_VOLUME_MOUNT],
)
base_image1=os.getenv(
"STORAGE_INITIALIZER_IMAGE", constants.STORAGE_INITIALIZER_IMAGE_DEFAULT
)
print("base_image1: " + base_image1)

# create app container spec
container_spec = utils.get_container_spec(
Expand Down Expand Up @@ -287,6 +291,11 @@ def train(
volume_mounts=[constants.STORAGE_INITIALIZER_VOLUME_MOUNT],
resources=resources_per_worker,
)
base_image2=os.getenv(
"TRAINER_TRANSFORMER_IMAGE_DEFAULT",
constants.TRAINER_TRANSFORMER_IMAGE_DEFAULT,
)
print("base_image2: " + base_image2)

storage_initializer_volume = models.V1Volume(
name=constants.STORAGE_INITIALIZER,
Expand Down

0 comments on commit 9f42449

Please sign in to comment.