diff --git a/jupyter/datascience/anaconda-python-3.8/Dockerfile b/jupyter/datascience/anaconda-python-3.8/Dockerfile index f62807860..219a1947e 100644 --- a/jupyter/datascience/anaconda-python-3.8/Dockerfile +++ b/jupyter/datascience/anaconda-python-3.8/Dockerfile @@ -15,21 +15,42 @@ LABEL name="jupyter-datascience-anaconda-python-3.8:latest" \ ENV JUPYTER_ENABLE_LAB="1" USER root - # Use the NodeJS from Anaconda > 12.0 -RUN yum remove -y nodejs +RUN yum remove -y nodejs && \ + yum install -y jq + +# Install usefull OS packages +RUN dnf install -y jq unixODBC git-lfs libsndfile + +# Disable announcement plugin of jupyterlab +RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements" + +# Install MongoDB Client, We need a special repo for MongoDB as they do their own distribution +COPY mongodb-org-6.0.repo-x86_64 /etc/yum.repos.d/mongodb-org-6.0.repo + +RUN dnf install -y mongocli + +# Install MSSQL Client, We need a special repo for MSSQL as they do their own distribution +COPY mssql-2022.repo-x86_64 /etc/yum.repos.d/mssql-2022.repo + +RUN ACCEPT_EULA=Y dnf install -y mssql-tools18 unixODBC-devel + +ENV PATH="$PATH:/opt/mssql-tools18/bin" + +# Other apps and tools installed as default user +USER 1001 WORKDIR /opt/app-root/bin -COPY utils utils/ +COPY utils ./utils/ # Install Python packages and Jupyterlab extensions from conda environment file COPY environment.yml start-notebook.sh ./ # Copy Elyra setup to utils so that it's sourced at startup -COPY jupyter_elyra_config.py setup-elyra.sh pipeline-flow.svg ./utils/ +COPY setup-elyra.sh ./utils/ RUN echo "Installing softwares and packages" && \ - /opt/anaconda3/bin/conda env update -p "${APP_ROOT}" --file environment.yml && \ + /opt/anaconda3/bin/conda env create --file environment.yml -p "${APP_ROOT}" && \ rm -f ./environment.yml && \ # setup path for runtime configuration \ mkdir /opt/app-root/runtimes && \ @@ -37,9 +58,26 @@ RUN echo "Installing softwares and packages" && \ cp utils/pipeline-flow.svg /opt/app-root/lib/python3.8/site-packages/elyra/static/icons/kubeflow.svg && \ sed -i "s/Kubeflow Pipelines/Data Science/g" /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/runtime_type.py && \ sed -i "s/Kubeflow Pipelines/Data Science Pipelines/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \ + sed -i "s/kubeflow-service/data-science-pipeline-service/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \ sed -i "s/\"default\": \"Argo\",/\"default\": \"Tekton\",/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \ + # Workaround for passing ssl_sa_cert + patch /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/kfp/kfp_authentication.py -i utils/kfp_authentication.patch && \ + patch /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/kfp/processor_kfp.py -i utils/processor_kfp.patch && \ + # switch to Data Science Pipeline in component catalog \ + DIR_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/local-directory-catalog.json" && \ + FILE_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/local-file-catalog.json" && \ + URL_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/url-catalog.json" && \ + tmp=$(mktemp) && \ + jq '.properties.metadata.properties.runtime_type = input' $DIR_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $DIR_COMPONENT && \ + jq '.properties.metadata.properties.runtime_type = input' $FILE_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $FILE_COMPONENT && \ + jq '.properties.metadata.properties.runtime_type = input' $URL_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $URL_COMPONENT && \ + sed -i "s/metadata.metadata.runtime_type/\"DATA_SCIENCE_PIPELINES\"/g" /opt/app-root/share/jupyter/labextensions/@elyra/pipeline-editor-extension/static/lib_index_js.*.js && \ + # Explicitly enable jupyterlab_s3_browser plugin \ + jupyter serverextension enable --py jupyterlab_s3_browser && \ + # Remove Elyra logo from JupyterLab because this is not a pure Elyra image \ + sed -i 's/widget\.id === \x27jp-MainLogo\x27/widget\.id === \x27jp-MainLogo\x27 \&\& false/' /opt/app-root/share/jupyter/labextensions/@elyra/theme-extension/static/lib_index_js.*.js && \ # Replace Notebook's launcher, "(ipykernel)" with Python's version 3.x.y \ - sed -i -e "s/Python.*/$(python --version)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \ + sed -i -e "s/Python.*/$(python --version| cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \ # Remove default Elyra runtime-images \ rm /opt/app-root/share/jupyter/metadata/runtime-images/*.json && \ # Fix permissions to support pip in Openshift environments \ @@ -47,12 +85,10 @@ RUN echo "Installing softwares and packages" && \ fix-permissions /opt/app-root -P # Copy Elyra runtime-images definitions and set the version -COPY runtime-images/ /opt/app-root/share/jupyter/metadata/runtime-images/ +# COPY runtime-images/ /opt/app-root/share/jupyter/metadata/runtime-images/ USER root RUN mkdir /etc/conda COPY condarc /etc/conda/condarc -USER 1001 - -ENTRYPOINT ["start-notebook.sh"] +# ENTRYPOINT ["start-notebook.sh"] diff --git a/jupyter/datascience/anaconda-python-3.8/README.md b/jupyter/datascience/anaconda-python-3.8/README.md new file mode 100644 index 000000000..28db75267 --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/README.md @@ -0,0 +1,24 @@ +# Conda Notebook Images + +This Notebooks image are managed with [Anaconda](https://anaconda.org/). + +## Manage the conda environment + +``` +cd base/anaconda-python-3.8 +podman build -t conda-base . + +cd jupyter/datascience/anaconda-python-3.8 +podman run -it localhost/conda-base bash + +(on a different terminal) podman cp environment.yml /environment.yml + +conda env update -n workbench --file environment.yml --prune +conda env export > environment.yml + +(on a different terminal) podman cp /environment.yml . +``` + + +Reference docs: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment + diff --git a/jupyter/datascience/anaconda-python-3.8/environment.in b/jupyter/datascience/anaconda-python-3.8/environment.in index a7c0979ab..5d28e8757 100644 --- a/jupyter/datascience/anaconda-python-3.8/environment.in +++ b/jupyter/datascience/anaconda-python-3.8/environment.in @@ -7,6 +7,7 @@ plotly~=5.16 scikit-learn~=1.3 scipy~=1.10 skl2onnx~=1.15 +codeflare-sdk~=0.12 pymongo~=4.5 psycopg~=3.1 pyodbc~=4.0 diff --git a/jupyter/datascience/anaconda-python-3.8/mongodb-org-6.0.repo-x86_64 b/jupyter/datascience/anaconda-python-3.8/mongodb-org-6.0.repo-x86_64 new file mode 100644 index 000000000..88e6eee51 --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/mongodb-org-6.0.repo-x86_64 @@ -0,0 +1,6 @@ +[mongodb-org-6.0] +name=MongoDB Repository +baseurl=https://repo.mongodb.org/yum/redhat/8/mongodb-org/6.0/x86_64/ +gpgcheck=1 +enabled=1 +gpgkey=https://www.mongodb.org/static/pgp/server-6.0.asc diff --git a/jupyter/datascience/anaconda-python-3.8/mssql-2022.repo-x86_64 b/jupyter/datascience/anaconda-python-3.8/mssql-2022.repo-x86_64 new file mode 100644 index 000000000..0d211db5f --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/mssql-2022.repo-x86_64 @@ -0,0 +1,6 @@ +[packages-microsoft-com-prod] +name=packages-microsoft-com-prod +baseurl=https://packages.microsoft.com/rhel/8/prod/ +enabled=1 +gpgcheck=1 +gpgkey=https://packages.microsoft.com/keys/microsoft.asc diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json index 5b74dfff7..806597ddb 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json @@ -1,10 +1,10 @@ { "display_name": "Datascience with Python 3.8 (UBI8)", "metadata": { - "tags": [], - "display_name": "Datascience with Python 3.8 (UBI8)", - "image_name": "quay.io/opendatahub/workbench-images@sha256:eccc4077d1251042cafb00d788c0b4f3109f9efa612781acec149a2b73aab994", - "pull_policy": "IfNotPresent" + "tags": ["datascience"], + "display_name": "Datascience with Python 3.8 (UBI8)", + "image_name": "quay.io/opendatahub/workbench-images@sha256:eccc4077d1251042cafb00d788c0b4f3109f9efa612781acec149a2b73aab994", + "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" } \ No newline at end of file diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json index 51c5e948f..127a5bb2d 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json @@ -1,7 +1,7 @@ { "display_name": "Pytorch with CUDA and Python 3.8 (UBI8)", "metadata": { - "tags": [], + "tags": ["pytorch"], "display_name": "Pytorch with CUDA and Python 3.8 (UBI8)", "image_name": "quay.io/opendatahub/workbench-images@sha256:6f6925fbbf2cbfb131d00cfa39bc9217c41462e153ca14421d5ef3ba99114179", "pull_policy": "IfNotPresent" diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json index fbae808c2..fcbb872ba 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json @@ -1,7 +1,7 @@ { "display_name": "TensorFlow with CUDA and Python 3.8 (UBI8)", "metadata": { - "tags": [], + "tags": ["tensorflow"], "display_name": "TensorFlow with CUDA and Python 3.8 (UBI8)", "image_name": "quay.io/opendatahub/workbench-images@sha256:a53da08eb87b117de0386b10ee1c55d8b889a552af6050f5f3fcb234f51b7333", "pull_policy": "IfNotPresent" diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json index 66f8c651f..3ba5bdf8b 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json @@ -1,7 +1,7 @@ { "display_name": "Python 3.8 (UBI8)", "metadata": { - "tags": [], + "tags": ["minimal"], "display_name": "Python 3.8 (UBI8)", "image_name": "quay.io/opendatahub/workbench-images@sha256:531f61fde893546b819aaf776025c2fc4fd17b4bf1229639cb9d595cf0054a2c", "pull_policy": "IfNotPresent" diff --git a/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh b/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh index e800f3dd9..d9b08b397 100644 --- a/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh +++ b/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh @@ -1,49 +1,24 @@ #!/bin/bash set -x -replace_invalid_characters (){ - python -c 'import sys;print(sys.argv[1].translate ({ord(c): "-" for c in "!@#$%^&*()[]{};:,/<>?\|`~=_+"}))' "$1" -} - -# Assumptions are existing kubeflow installation is in the kubeflow namespace -DEFAULT_RUNTIME_FILE=$(jupyter --data-dir)/metadata/runtimes/test.json - -if [ -f "/var/run/secrets/kubernetes.io/serviceaccount/namespace" ]; then - SA_NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) -fi - -COS_BUCKET=$(replace_invalid_characters "$COS_BUCKET") -export COS_BUCKET=${COS_BUCKET:-default} - -# If Kubeflow credentials are not supplied, use default Kubeflow installation credentials -KF_DEPLOYMENT_NAMESPACE="${SA_NAMESPACE:=default}" -AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:=minio}" -AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:=minio123}" - -if [[ ! -f "$DEFAULT_RUNTIME_FILE" ]]; then - elyra-metadata install runtimes --schema_name=kfp \ - --name=test \ - --display_name=Test \ - --auth_type=NO_AUTHENTICATION \ - --api_endpoint=http://ml-pipeline."$KF_DEPLOYMENT_NAMESPACE".svc.cluster.local:3000/pipeline \ - --cos_endpoint=http://minio-service."$KF_DEPLOYMENT_NAMESPACE".svc.cluster.local:9000 \ - --cos_auth_type=USER_CREDENTIALS \ - --cos_username="$AWS_ACCESS_KEY_ID" \ - --cos_password="$AWS_SECRET_ACCESS_KEY" \ - --cos_bucket="$COS_BUCKET" \ - --engine=Tekton -fi - # Set the elyra config on the right path jupyter elyra --generate-config cp /opt/app-root/bin/utils/jupyter_elyra_config.py /opt/app-root/src/.jupyter/ -# Set runtime config from volume mount +# create the elyra runtime directory if not present +if [ ! -d $(jupyter --data-dir)/metadata/runtimes/ ]; then + mkdir -p $(jupyter --data-dir)/metadata/runtimes/ +fi +# Set elyra runtime config from volume mount if [ "$(ls -A /opt/app-root/runtimes/)" ]; then cp -r /opt/app-root/runtimes/..data/*.json $(jupyter --data-dir)/metadata/runtimes/ fi # Environment vars set for accessing ssl_sa_certs and sa_token -export KF_PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +# export PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" export KF_PIPELINES_SA_TOKEN_ENV="/var/run/secrets/kubernetes.io/serviceaccount/token" -export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" \ No newline at end of file +export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" +# Environment vars set for accessing following dependencies for air-gapped enviroment +export ELYRA_BOOTSTRAP_SCRIPT_URL="file:///opt/app-root/bin/utils/bootstrapper.py" +export ELYRA_PIP_CONFIG_URL="file:///opt/app-root/bin/utils/pip.conf" +export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt" \ No newline at end of file diff --git a/jupyter/datascience/anaconda-python-3.8/utils/component_runtime.json b/jupyter/datascience/anaconda-python-3.8/utils/component_runtime.json new file mode 100644 index 000000000..a1e84196e --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/utils/component_runtime.json @@ -0,0 +1,10 @@ +{ + "title": "Runtime Type", + "description": "The type of runtime associated with this Component Catalog", + "type": "string", + "enum": ["KUBEFLOW_PIPELINES"], + "default": "KUBEFLOW_PIPELINES", + "uihints": { + "hidden": true + } +} \ No newline at end of file diff --git a/jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py b/jupyter/datascience/anaconda-python-3.8/utils/jupyter_elyra_config.py similarity index 100% rename from jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py rename to jupyter/datascience/anaconda-python-3.8/utils/jupyter_elyra_config.py diff --git a/jupyter/datascience/anaconda-python-3.8/utils/kfp_authentication.patch b/jupyter/datascience/anaconda-python-3.8/utils/kfp_authentication.patch new file mode 100644 index 000000000..3b4b776d1 --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/utils/kfp_authentication.patch @@ -0,0 +1,18 @@ +--- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400 ++++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400 +@@ -230,6 +230,7 @@ + """ + + kf_url = urlsplit(api_endpoint)._replace(path="").geturl() ++ kf_pipelines_ssl_sa_cert = os.getenv("PIPELINES_SSL_SA_CERTS", None) + + # return data structure for successful requests + auth_info = { +@@ -239,6 +240,7 @@ + "cookies": None, # passed to KFP SDK client as "cookies" param value + "credentials": None, # passed to KFP SDK client as "credentials" param value + "existing_token": None, # passed to KFP SDK client as "existing_token" param value ++ "ssl_ca_cert": kf_pipelines_ssl_sa_cert, # passed to KFP SDK Client as "ssl_ca_cert" param value + } + + try: diff --git a/jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg b/jupyter/datascience/anaconda-python-3.8/utils/pipeline-flow.svg similarity index 100% rename from jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg rename to jupyter/datascience/anaconda-python-3.8/utils/pipeline-flow.svg diff --git a/jupyter/datascience/anaconda-python-3.8/utils/pipeline_app.patch b/jupyter/datascience/anaconda-python-3.8/utils/pipeline_app.patch new file mode 100644 index 000000000..2d65b3a86 --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/utils/pipeline_app.patch @@ -0,0 +1,12 @@ +--- a/pipeline_app.py 2023-11-09 13:36:35 ++++ b/pipeline_app.py 2023-11-09 12:10:35 +@@ -296,7 +296,8 @@ + _validate_pipeline_definition(pipeline_definition) + except Exception: + raise click.ClickException("Pipeline validation FAILED.") +- ++ ++ print_info("Pipeline validation SUCCEEDED.", []) + + def validate_timeout_option(ctx, param, value): + """Callback for monitor-timeout parameter validation""" diff --git a/jupyter/datascience/anaconda-python-3.8/utils/pipeline_definition.patch b/jupyter/datascience/anaconda-python-3.8/utils/pipeline_definition.patch new file mode 100644 index 000000000..6110b643f --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/utils/pipeline_definition.patch @@ -0,0 +1,12 @@ +--- a/pipeline_definition.py 2023-11-09 13:36:35 ++++ b/pipeline_definition.py 2023-11-07 20:07:49 +@@ -265,6 +265,9 @@ + if parameter_class is None: + return None # runtime type does not support parameters, skip + ++ if not ElyraProperty.subclass_exists_for_property(parameter_class.property_id): ++ ElyraProperty.build_property_map() ++ + # Convert pipeline parameters to runtime-specific instances + converted_value = ElyraProperty.create_instance(parameter_class.property_id, self.pipeline_parameters) + if converted_value is not None: diff --git a/jupyter/datascience/anaconda-python-3.8/utils/processor_kfp.patch b/jupyter/datascience/anaconda-python-3.8/utils/processor_kfp.patch new file mode 100644 index 000000000..2b8353a35 --- /dev/null +++ b/jupyter/datascience/anaconda-python-3.8/utils/processor_kfp.patch @@ -0,0 +1,38 @@ +--- a/processor_kfp.py 2023-06-09 10:19:08.882563609 -0400 ++++ b/processor_kfp.py 2023-07-13 19:31:43.572407879 -0400 +@@ -213,6 +213,7 @@ + credentials=auth_info.get("credentials", None), + existing_token=auth_info.get("existing_token", None), + namespace=user_namespace, ++ ssl_ca_cert=auth_info.get("ssl_ca_cert", None), + ) + else: + client = ArgoClient( +@@ -416,7 +417,7 @@ + + # create pipeline run (or specified pipeline version) + run = client.run_pipeline( +- experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=version_id ++ experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=pipeline_id + ) + + except Exception as ex: +@@ -435,7 +436,7 @@ + + self.log_pipeline_info( + pipeline_name, +- f"pipeline submitted: {public_api_endpoint}/#/runs/details/{run.id}", ++ f"pipeline submitted: {public_api_endpoint}/{run.id}", + duration=time.time() - t0, + ) + +@@ -451,7 +452,7 @@ + + return KfpPipelineProcessorResponse( + run_id=run.id, +- run_url=f"{public_api_endpoint}/#/runs/details/{run.id}", ++ run_url=f"{public_api_endpoint}/{run.id}", + object_storage_url=object_storage_url, + object_storage_path=object_storage_path, + + ) \ No newline at end of file