diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 0e255e145a6..88f3333f9c3 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -29,7 +29,7 @@ make build REGISTRY= TAG= To use your custom images for the Katib components, modify [Kustomization file](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/installs/katib-standalone/kustomization.yaml) -and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/components/katib-config/katib-config.yaml) +and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/installs/katib-standalone/katib-config.yaml) You can deploy Katib v1beta1 manifests into a Kubernetes cluster as follows: diff --git a/docs/new-algorithm-service.md b/docs/new-algorithm-service.md index 07bef17a937..6955b53d1c1 100644 --- a/docs/new-algorithm-service.md +++ b/docs/new-algorithm-service.md @@ -90,7 +90,7 @@ Then build the Docker image. ### Use the algorithm in Katib. -Update the [Katib config](../manifests/v1beta1/components/katib-config/katib-config.yaml) with the new algorithm entity: +Update the [Katib config](../manifests/v1beta1/installs/katib-standalone/katib-config.yaml) with the new algorithm entity: ```diff runtime: diff --git a/manifests/v1beta1/components/katib-config/katib-config.yaml b/manifests/v1beta1/components/katib-config/katib-config.yaml deleted file mode 100644 index 684f113f31d..00000000000 --- a/manifests/v1beta1/components/katib-config/katib-config.yaml +++ /dev/null @@ -1,59 +0,0 @@ ---- -apiVersion: config.kubeflow.org/v1beta1 -kind: KatibConfig -init: - controller: - webhookPort: 8443 - trialResources: - - Job.v1.batch - - TFJob.v1.kubeflow.org - - PyTorchJob.v1.kubeflow.org - - MPIJob.v1.kubeflow.org - - XGBoostJob.v1.kubeflow.org - - MXJob.v1.kubeflow.org -runtime: - metricsCollectors: - - kind: StdOut - image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) - - kind: File - image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) - - kind: TensorFlowEvent - image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION) - resources: - limits: - memory: 1Gi - suggestions: - - algorithmName: random - image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) - - algorithmName: tpe - image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) - - algorithmName: grid - image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) - - algorithmName: hyperband - image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION) - - algorithmName: bayesianoptimization - image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION) - - algorithmName: cmaes - image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) - - algorithmName: sobol - image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) - - algorithmName: multivariate-tpe - image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) - - algorithmName: enas - image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION) - resources: - limits: - memory: 200Mi - - algorithmName: darts - image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION) - - algorithmName: pbt - image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION) - persistentVolumeClaimSpec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 5Gi - earlyStoppings: - - algorithmName: medianstop - image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION) diff --git a/manifests/v1beta1/components/katib-config/kustomization.yaml b/manifests/v1beta1/components/katib-config/kustomization.yaml deleted file mode 100644 index d49fcc753a1..00000000000 --- a/manifests/v1beta1/components/katib-config/kustomization.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -configMapGenerator: - - name: katib-config - files: - - katib-config.yaml - options: - disableNameSuffixHash: true - labels: - katib.kubeflow.org/version: latest -configurations: - - transformer-configs.yaml -vars: - - name: KATIB_VERSION - fieldref: - fieldpath: metadata.labels.katib\.kubeflow\.org/version - objref: - apiVersion: v1 - kind: ConfigMap - name: katib-config diff --git a/manifests/v1beta1/components/katib-config/transformer-configs.yaml b/manifests/v1beta1/components/katib-config/transformer-configs.yaml deleted file mode 100644 index a32a457af2f..00000000000 --- a/manifests/v1beta1/components/katib-config/transformer-configs.yaml +++ /dev/null @@ -1,4 +0,0 @@ ---- -varReference: - - kind: ConfigMap - path: data diff --git a/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml b/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml b/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml index d5358eddaa3..26d7bd65a00 100644 --- a/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml @@ -19,8 +19,6 @@ resources: - ../../components/webhook/ # Cert-manager certificate for webhooks - certificate.yaml - # Katib Config. - - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller @@ -61,3 +59,11 @@ vars: configurations: - params.yaml + +configMapGenerator: + - name: katib-config + behavior: create + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml index 9d8ff99fea7..28eb85756ba 100644 --- a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml @@ -17,8 +17,6 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ - # Katib Config. - - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller @@ -39,3 +37,10 @@ secretGenerator: - name: katib-mysql-secrets envs: - secrets.env +configMapGenerator: + - name: katib-config + behavior: create + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml index f1a8503259b..0e5a21419ff 100644 --- a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml @@ -1,5 +1,3 @@ -# This KatibConfig is mostly same as https://github.com/kubeflow/katib/manifests/v1beta1/components/katib-config/katib-config.yaml. -# Only `.init.controller.enableLeaderElection` field is different. --- apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig @@ -17,40 +15,40 @@ init: runtime: metricsCollectors: - kind: StdOut - image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/file-metrics-collector:latest - kind: File - image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/file-metrics-collector:latest - kind: TensorFlowEvent - image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest resources: limits: memory: 1Gi suggestions: - algorithmName: random - image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest - algorithmName: tpe - image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest - algorithmName: grid - image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-optuna:latest - algorithmName: hyperband - image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-hyperband:latest - algorithmName: bayesianoptimization - image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-skopt:latest - algorithmName: cmaes - image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-goptuna:latest - algorithmName: sobol - image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-goptuna:latest - algorithmName: multivariate-tpe - image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-optuna:latest - algorithmName: enas - image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-enas:latest resources: limits: memory: 200Mi - algorithmName: darts - image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-darts:latest - algorithmName: pbt - image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/suggestion-pbt:latest persistentVolumeClaimSpec: accessModes: - ReadWriteMany @@ -59,4 +57,4 @@ runtime: storage: 5Gi earlyStoppings: - algorithmName: medianstop - image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION) + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-openshift/katib-config.yaml b/manifests/v1beta1/installs/katib-openshift/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-openshift/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-openshift/kustomization.yaml b/manifests/v1beta1/installs/katib-openshift/kustomization.yaml index b3ba0c97dfa..8a7f72bb7a7 100644 --- a/manifests/v1beta1/installs/katib-openshift/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-openshift/kustomization.yaml @@ -27,8 +27,6 @@ resources: - ../../components/ui/ # Katib webhooks. - ../../components/webhook/ - # Katib Config. - - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller @@ -64,3 +62,11 @@ patchesJson6902: kind: MutatingWebhookConfiguration name: katib.kubeflow.org path: patches/webhook-inject-cabundle.yaml + +configMapGenerator: + - name: katib-config + behavior: create + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml index 1c05bf7b423..7dda9d5d0a3 100644 --- a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml @@ -19,8 +19,6 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ - # Katib Config. - - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller @@ -41,3 +39,10 @@ patchesJson6902: kind: Deployment name: katib-db-manager path: ./patches/db-manager.yaml +configMapGenerator: + - name: katib-config + behavior: create + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml index 117ce4c7f45..cbf248d907f 100644 --- a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml @@ -19,8 +19,6 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ - # Katib Config. - - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller @@ -34,3 +32,10 @@ images: - name: docker.io/kubeflowkatib/cert-generator newName: docker.io/kubeflowkatib/cert-generator newTag: latest +configMapGenerator: + - name: katib-config + behavior: create + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/manifests/v1beta1/installs/katib-with-kubeflow/katib-config.yaml b/manifests/v1beta1/installs/katib-with-kubeflow/katib-config.yaml new file mode 100644 index 00000000000..1e3af3fb59b --- /dev/null +++ b/manifests/v1beta1/installs/katib-with-kubeflow/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:latest + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:latest + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:latest + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:latest + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:latest + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:latest + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:latest + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:latest + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/installs/katib-with-kubeflow/kustomization.yaml b/manifests/v1beta1/installs/katib-with-kubeflow/kustomization.yaml index a68523d98cf..c977af1d40c 100644 --- a/manifests/v1beta1/installs/katib-with-kubeflow/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-with-kubeflow/kustomization.yaml @@ -54,3 +54,11 @@ vars: configurations: - params.yaml + +configMapGenerator: + - name: katib-config + behavior: replace + files: + - katib-config.yaml + options: + disableNameSuffixHash: true diff --git a/scripts/v1beta1/update-images.sh b/scripts/v1beta1/update-images.sh index e5198ee8876..21230db18a3 100755 --- a/scripts/v1beta1/update-images.sh +++ b/scripts/v1beta1/update-images.sh @@ -73,11 +73,11 @@ update_yaml_files "${INSTALLS_PATH}" "newName: ${OLD_PREFIX}" "newName: ${NEW_PR update_yaml_files "${INSTALLS_PATH}" "newTag: .*" "newTag: ${TAG}" # Katib Config images. -CONFIG_PATH="manifests/v1beta1/components/katib-config/" - echo -e "Update Katib Metrics Collectors, Suggestions and EarlyStopping images\n" -update_yaml_files "${CONFIG_PATH}" "image: ${OLD_PREFIX}" "image: ${NEW_PREFIX}" -update_yaml_files "${CONFIG_PATH}" "katib.kubeflow.org/version: .*" "katib.kubeflow.org/version: ${TAG}" +for config in manifests/v1beta1/installs/**/katib-config.yaml; do + update_yaml_files "${config}" "${OLD_PREFIX}" "${NEW_PREFIX}" + update_yaml_files "${config}" ":[^[:space:]].*" ":${TAG}" +done # Katib Trial training container images. diff --git a/test/e2e/v1beta1/scripts/aws/setup-katib.sh b/test/e2e/v1beta1/scripts/aws/setup-katib.sh index 009afc7a846..1aee77eebc4 100755 --- a/test/e2e/v1beta1/scripts/aws/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/aws/setup-katib.sh @@ -36,7 +36,7 @@ make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="${ECR_REGIS echo -e "\n The Katib will be deployed with the following configs" cat "manifests/v1beta1/installs/katib-standalone/kustomization.yaml" -cat "manifests/v1beta1/components/katib-config/katib-config.yaml" +cat "manifests/v1beta1/installs/katib-standalone/katib-config.yaml" echo "Creating Kubeflow namespace" kubectl create namespace kubeflow diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh index ca8e24ea3c5..5cd2c10ff5a 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh @@ -80,7 +80,7 @@ run() { algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")" suggestion_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.suggestions.[] | select(.algorithmName == env(algorithm_name)) | .image' \ - manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)" + manifests/v1beta1/installs/katib-standalone/katib-config.yaml | cut -d: -f1)" suggestion_name="$(basename "$suggestion_image_name")" suggestions+=("$suggestion_name") @@ -106,7 +106,7 @@ run() { algorithm_name="$(yq eval '.spec.earlyStopping.algorithmName' "$exp_path")" earlystopping_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.earlyStoppings.[] | select(.algorithmName == env(algorithm_name)) | .image' \ - manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)" + manifests/v1beta1/installs/katib-standalone/katib-config.yaml | cut -d: -f1)" earlystopping_name="$(basename "$earlystopping_image_name")" earlystoppings+=("$earlystopping_name") diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh index 6a12a594f72..61bdb5c3490 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh @@ -53,7 +53,7 @@ yq eval -i '.spec.resources.requests.storage|="2Gi"' $PVC_FILE echo -e "\n The Katib will be deployed with the following configs" cat $KUSTOMIZATION_FILE -kustomize build ../../../../../manifests/v1beta1/components/katib-config +cat ../../../../../manifests/v1beta1/installs/katib-standalone/katib-config.yaml # If the user wants to deploy training operator, then use the kustomization file for training operator. if "$DEPLOY_TRAINING_OPERATOR"; then