Skip to content

Commit

Permalink
Upgrade the conda image with new features
Browse files Browse the repository at this point in the history
- Sync elyra based changes on conda base notebook image

Signed-off-by: Harshad Reddy Nalla <[email protected]>
  • Loading branch information
harshad16 committed Dec 11, 2023
1 parent 0df7031 commit 64739d4
Show file tree
Hide file tree
Showing 14 changed files with 182 additions and 47 deletions.
49 changes: 41 additions & 8 deletions jupyter/datascience/anaconda-python-3.8/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,66 @@ LABEL name="jupyter-datascience-anaconda-python-3.8:latest" \

ENV JUPYTER_ENABLE_LAB="1"
USER root

# Use the NodeJS from Anaconda > 12.0
RUN yum remove -y nodejs
RUN yum remove -y nodejs && \
yum install -y jq

# Install usefull OS packages
RUN dnf install -y jq unixODBC git-lfs libsndfile

# Disable announcement plugin of jupyterlab
RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"

# Install MongoDB Client, We need a special repo for MongoDB as they do their own distribution
COPY mongodb-org-6.0.repo-x86_64 /etc/yum.repos.d/mongodb-org-6.0.repo
RUN dnf install -y mongocli

# Install MSSQL Client, We need a special repo for MSSQL as they do their own distribution
COPY mssql-2022.repo-x86_64 /etc/yum.repos.d/mssql-2022.repo
RUN ACCEPT_EULA=Y dnf install -y mssql-tools18 unixODBC-devel
ENV PATH="$PATH:/opt/mssql-tools18/bin"

# Other apps and tools installed as default user
USER 1001

WORKDIR /opt/app-root/bin

COPY utils utils/
COPY utils ./utils/

# Install Python packages and Jupyterlab extensions from conda environment file
COPY environment.yml start-notebook.sh ./
# Copy Elyra setup to utils so that it's sourced at startup
COPY jupyter_elyra_config.py setup-elyra.sh pipeline-flow.svg ./utils/
COPY setup-elyra.sh ./utils/

RUN echo "Installing softwares and packages" && \
/opt/anaconda3/bin/conda env update -p "${APP_ROOT}" --file environment.yml && \
/opt/anaconda3/bin/conda env create --file environment.yml -p "${APP_ROOT}" && \
rm -f ./environment.yml && \
# setup path for runtime configuration \
mkdir /opt/app-root/runtimes && \
# switch to Data Science Pipeline \
cp utils/pipeline-flow.svg /opt/app-root/lib/python3.8/site-packages/elyra/static/icons/kubeflow.svg && \
sed -i "s/Kubeflow Pipelines/Data Science/g" /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/runtime_type.py && \
sed -i "s/Kubeflow Pipelines/Data Science Pipelines/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \
sed -i "s/kubeflow-service/data-science-pipeline-service/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \
sed -i "s/\"default\": \"Argo\",/\"default\": \"Tekton\",/g" /opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/kfp.json && \
# Workaround for passing ssl_sa_cert
patch /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/kfp/kfp_authentication.py -i utils/kfp_authentication.patch && \
patch /opt/app-root/lib/python3.8/site-packages/elyra/pipeline/kfp/processor_kfp.py -i utils/processor_kfp.patch && \
# switch to Data Science Pipeline in component catalog \
DIR_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/local-directory-catalog.json" && \
FILE_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/local-file-catalog.json" && \
URL_COMPONENT="/opt/app-root/lib/python3.8/site-packages/elyra/metadata/schemas/url-catalog.json" && \
tmp=$(mktemp) && \
jq '.properties.metadata.properties.runtime_type = input' $DIR_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $DIR_COMPONENT && \
jq '.properties.metadata.properties.runtime_type = input' $FILE_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $FILE_COMPONENT && \
jq '.properties.metadata.properties.runtime_type = input' $URL_COMPONENT utils/component_runtime.json > "$tmp" && mv "$tmp" $URL_COMPONENT && \
sed -i "s/metadata.metadata.runtime_type/\"DATA_SCIENCE_PIPELINES\"/g" /opt/app-root/share/jupyter/labextensions/@elyra/pipeline-editor-extension/static/lib_index_js.*.js && \
# Explicitly enable jupyterlab_s3_browser plugin \
jupyter serverextension enable --py jupyterlab_s3_browser && \
# Remove Elyra logo from JupyterLab because this is not a pure Elyra image \
sed -i 's/widget\.id === \x27jp-MainLogo\x27/widget\.id === \x27jp-MainLogo\x27 \&\& false/' /opt/app-root/share/jupyter/labextensions/@elyra/theme-extension/static/lib_index_js.*.js && \
# Replace Notebook's launcher, "(ipykernel)" with Python's version 3.x.y \
sed -i -e "s/Python.*/$(python --version)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \
sed -i -e "s/Python.*/$(python --version| cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \
# Remove default Elyra runtime-images \
rm /opt/app-root/share/jupyter/metadata/runtime-images/*.json && \
# Fix permissions to support pip in Openshift environments \
Expand All @@ -53,6 +88,4 @@ USER root
RUN mkdir /etc/conda
COPY condarc /etc/conda/condarc

USER 1001

ENTRYPOINT ["start-notebook.sh"]
24 changes: 24 additions & 0 deletions jupyter/datascience/anaconda-python-3.8/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Conda Notebook Images

This Notebooks image are managed with [Anaconda](https://anaconda.org/).

## Manage the conda environment

```
cd base/anaconda-python-3.8
podman build -t conda-base .
cd jupyter/datascience/anaconda-python-3.8
podman run -it localhost/conda-base bash
(on a different terminal) podman cp environment.yml <container_id>/environment.yml
conda env update -n workbench --file environment.yml --prune
conda env export > environment.yml
(on a different terminal) podman cp <container_id>/environment.yml .
```


Reference docs: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment

3 changes: 2 additions & 1 deletion jupyter/datascience/anaconda-python-3.8/environment.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ plotly~=5.16
scikit-learn~=1.3
scipy~=1.10
skl2onnx~=1.15
codeflare-sdk~=0.12
pymongo~=4.5
psycopg~=3.1
pyodbc~=4.0
Expand All @@ -24,4 +25,4 @@ jupyterlab-lsp~=4.2
jupyterlab_widgets~=3.0
jupyter-resource-usage~=0.6
nbdime~=3.2
nbgitpuller~=1.2
nbgitpuller~=1.2
2 changes: 1 addition & 1 deletion jupyter/datascience/anaconda-python-3.8/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -413,4 +413,4 @@ dependencies:
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hd590300_5
- zstd=1.5.5=hfc55251_0
prefix: /opt/app-root
prefix: /opt/app-root
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[mongodb-org-6.0]
name=MongoDB Repository
baseurl=https://repo.mongodb.org/yum/redhat/8/mongodb-org/6.0/x86_64/
gpgcheck=1
enabled=1
gpgkey=https://www.mongodb.org/static/pgp/server-6.0.asc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[packages-microsoft-com-prod]
name=packages-microsoft-com-prod
baseurl=https://packages.microsoft.com/rhel/8/prod/
enabled=1
gpgcheck=1
gpgkey=https://packages.microsoft.com/keys/microsoft.asc
47 changes: 11 additions & 36 deletions jupyter/datascience/anaconda-python-3.8/setup-elyra.sh
Original file line number Diff line number Diff line change
@@ -1,49 +1,24 @@
#!/bin/bash
set -x

replace_invalid_characters (){
python -c 'import sys;print(sys.argv[1].translate ({ord(c): "-" for c in "!@#$%^&*()[]{};:,/<>?\|`~=_+"}))' "$1"
}

# Assumptions are existing kubeflow installation is in the kubeflow namespace
DEFAULT_RUNTIME_FILE=$(jupyter --data-dir)/metadata/runtimes/test.json

if [ -f "/var/run/secrets/kubernetes.io/serviceaccount/namespace" ]; then
SA_NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
fi

COS_BUCKET=$(replace_invalid_characters "$COS_BUCKET")
export COS_BUCKET=${COS_BUCKET:-default}

# If Kubeflow credentials are not supplied, use default Kubeflow installation credentials
KF_DEPLOYMENT_NAMESPACE="${SA_NAMESPACE:=default}"
AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:=minio}"
AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:=minio123}"

if [[ ! -f "$DEFAULT_RUNTIME_FILE" ]]; then
elyra-metadata install runtimes --schema_name=kfp \
--name=test \
--display_name=Test \
--auth_type=NO_AUTHENTICATION \
--api_endpoint=http://ml-pipeline."$KF_DEPLOYMENT_NAMESPACE".svc.cluster.local:3000/pipeline \
--cos_endpoint=http://minio-service."$KF_DEPLOYMENT_NAMESPACE".svc.cluster.local:9000 \
--cos_auth_type=USER_CREDENTIALS \
--cos_username="$AWS_ACCESS_KEY_ID" \
--cos_password="$AWS_SECRET_ACCESS_KEY" \
--cos_bucket="$COS_BUCKET" \
--engine=Tekton
fi

# Set the elyra config on the right path
jupyter elyra --generate-config
cp /opt/app-root/bin/utils/jupyter_elyra_config.py /opt/app-root/src/.jupyter/

# Set runtime config from volume mount
# create the elyra runtime directory if not present
if [ ! -d $(jupyter --data-dir)/metadata/runtimes/ ]; then
mkdir -p $(jupyter --data-dir)/metadata/runtimes/
fi
# Set elyra runtime config from volume mount
if [ "$(ls -A /opt/app-root/runtimes/)" ]; then
cp -r /opt/app-root/runtimes/..data/*.json $(jupyter --data-dir)/metadata/runtimes/
fi

# Environment vars set for accessing ssl_sa_certs and sa_token
export KF_PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
# export PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
export KF_PIPELINES_SA_TOKEN_ENV="/var/run/secrets/kubernetes.io/serviceaccount/token"
export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token"
export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token"
# Environment vars set for accessing following dependencies for air-gapped enviroment
export ELYRA_BOOTSTRAP_SCRIPT_URL="file:///opt/app-root/bin/utils/bootstrapper.py"
export ELYRA_PIP_CONFIG_URL="file:///opt/app-root/bin/utils/pip.conf"
export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt"
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"title": "Runtime Type",
"description": "The type of runtime associated with this Component Catalog",
"type": "string",
"enum": ["KUBEFLOW_PIPELINES"],
"default": "KUBEFLOW_PIPELINES",
"uihints": {
"hidden": true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
# PipelineProcessorRegistry(SingletonConfigurable) configuration
#------------------------------------------------------------------------------

c.PipelineProcessorRegistry.runtimes = ['kfp']
c.PipelineProcessorRegistry.runtimes = ['kfp']
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
--- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400
+++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400
@@ -230,6 +230,7 @@
"""

kf_url = urlsplit(api_endpoint)._replace(path="").geturl()
+ kf_pipelines_ssl_sa_cert = os.getenv("PIPELINES_SSL_SA_CERTS", None)

# return data structure for successful requests
auth_info = {
@@ -239,6 +240,7 @@
"cookies": None, # passed to KFP SDK client as "cookies" param value
"credentials": None, # passed to KFP SDK client as "credentials" param value
"existing_token": None, # passed to KFP SDK client as "existing_token" param value
+ "ssl_ca_cert": kf_pipelines_ssl_sa_cert, # passed to KFP SDK Client as "ssl_ca_cert" param value
}

try:
12 changes: 12 additions & 0 deletions jupyter/datascience/anaconda-python-3.8/utils/pipeline_app.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
--- a/pipeline_app.py 2023-11-09 13:36:35
+++ b/pipeline_app.py 2023-11-09 12:10:35
@@ -296,7 +296,8 @@
_validate_pipeline_definition(pipeline_definition)
except Exception:
raise click.ClickException("Pipeline validation FAILED.")
-
+
+ print_info("Pipeline validation SUCCEEDED.", [])

def validate_timeout_option(ctx, param, value):
"""Callback for monitor-timeout parameter validation"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
--- a/pipeline_definition.py 2023-11-09 13:36:35
+++ b/pipeline_definition.py 2023-11-07 20:07:49
@@ -265,6 +265,9 @@
if parameter_class is None:
return None # runtime type does not support parameters, skip

+ if not ElyraProperty.subclass_exists_for_property(parameter_class.property_id):
+ ElyraProperty.build_property_map()
+
# Convert pipeline parameters to runtime-specific instances
converted_value = ElyraProperty.create_instance(parameter_class.property_id, self.pipeline_parameters)
if converted_value is not None:
38 changes: 38 additions & 0 deletions jupyter/datascience/anaconda-python-3.8/utils/processor_kfp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
--- a/processor_kfp.py 2023-06-09 10:19:08.882563609 -0400
+++ b/processor_kfp.py 2023-07-13 19:31:43.572407879 -0400
@@ -213,6 +213,7 @@
credentials=auth_info.get("credentials", None),
existing_token=auth_info.get("existing_token", None),
namespace=user_namespace,
+ ssl_ca_cert=auth_info.get("ssl_ca_cert", None),
)
else:
client = ArgoClient(
@@ -416,7 +417,7 @@

# create pipeline run (or specified pipeline version)
run = client.run_pipeline(
- experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=version_id
+ experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=pipeline_id
)

except Exception as ex:
@@ -435,7 +436,7 @@

self.log_pipeline_info(
pipeline_name,
- f"pipeline submitted: {public_api_endpoint}/#/runs/details/{run.id}",
+ f"pipeline submitted: {public_api_endpoint}/{run.id}",
duration=time.time() - t0,
)

@@ -451,7 +452,7 @@

return KfpPipelineProcessorResponse(
run_id=run.id,
- run_url=f"{public_api_endpoint}/#/runs/details/{run.id}",
+ run_url=f"{public_api_endpoint}/{run.id}",
object_storage_url=object_storage_url,
object_storage_path=object_storage_path,

)

0 comments on commit 64739d4

Please sign in to comment.