ScilifelabDataCentre · sandstromviktor · Nov 30, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/.github/workflows/serve-python.yml b/.github/workflows/serve-python.yml
@@ -0,0 +1,95 @@
+name: Serve-Python workflow
+
+on:
+  push:
+    paths:
+      - "serve-python/**"
+  # Adds ability to run this workflow manually
+  workflow_dispatch:
+    inputs:
+      logLevel:
+        description: 'Log level'
+        required: true
+        default: 'warning'
+        type: choice
+        options:
+        - info
+        - warning
+        - debug
+      tags:
+        description: 'Manual run'
+        required: false
+        type: boolean
+
+jobs:
+  build_and_test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: 'Checkout GitHub Action'
+        uses: actions/checkout@main
+
+      - name: 'Build test image'
+        run: |
+            docker build -t python-test-image -f ./serve-python/Dockerfile.test ./serve-python
+
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/[email protected]
+        with:
+          image-ref: 'python-test-image'
+          format: 'table'
+          severity: 'CRITICAL,HIGH'
+          security-checks: 'vuln'
+          timeout: '59m59s'
+          exit-code: '0'
+
+      - name: 'Run tests'
+        env:
+          IMAGE_NAME: python-test-image
+        run: | 
+          pip install -r ./serve-python/tests/requirements.txt
+          python3 -m pytest ./serve-python
+
+  push: 
+    if: | 
+      github.ref == 'refs/heads/main' &&
+      github.repository == 'scilifelabdatacentre/serve-images'
+    needs: build_and_test
+    runs-on: ubuntu-latest
+    concurrency:
+      group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
+      cancel-in-progress: true
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: 'Checkout github action'
+        uses: actions/checkout@main
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ghcr.io/scilifelabdatacentre/serve-python
+          tags: |
+            type=raw,value={{date 'YYMMDD-HHmm' tz='Europe/Stockholm'}}
+
+      - name: 'Login to GHCR'
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{github.actor}}
+          password: ${{secrets.GITHUB_TOKEN}}
+
+      - name: Publish image to GHCR
+        uses: docker/build-push-action@v3
+        with:
+          file: ./serve-python/Dockerfile
+          context: ./serve-python
+          push: true
+          build-args: version=${{ github.ref_name }}
+          tags: |
+            ${{ steps.meta.outputs.tags }}
+            ghcr.io/scilifelabdatacentre/serve-python:latest
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,6 @@
 venv/
 .pytest_cache
 __pycache__
-
+.DS_Store
 #VScode settings
 .vscode/*
diff --git a/dev_scripts/run_python.sh b/dev_scripts/run_python.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -o errexit
+
+docker build -t python-dev-img -f ./serve-python/Dockerfile.test ./serve-python
+python3 -m venv venv
+source ./venv/bin/activate
+python3 -m pip install --upgrade pip
+pip install -r ./serve-python/tests/requirements.txt
+export IMAGE_NAME=python-dev-img
+python3 -m pytest ./serve-python/
diff --git a/serve-python/Dockerfile b/serve-python/Dockerfile
@@ -1,18 +1,42 @@
+# FROM python:3
 FROM ubuntu:18.04
 
-RUN apt-get update -yq \
-    && apt-get install --no-install-recommends -y python3.7-dev curl python3-distutils gcc \
+# Create user name and home directory variables. 
+# The variables are later used as $USER and $HOME. 
+ENV USER=user
+ENV HOME=/home/$USER
+
+# Add user to system
+RUN useradd -m -u 1000 $USER
+
+# Set working directory (this is where the code should go)
+WORKDIR $HOME
+
+RUN apt-get update \
+    && apt-get install ca-certificates -y --no-install-recommends \
+    && apt-get install python3.7-dev -y --no-install-recommends \
+    && apt-get install curl -y --no-install-recommends \
+    && apt-get install python3-distutils -y --no-install-recommends \
     && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
     && python3.7 get-pip.py \
+    && apt-get install gcc -y --no-install-recommends \
     && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 \
-    && pip3 install --upgrade --no-cache-dir pip \
+    && pip3 install --upgrade pip --no-cache-dir \
+    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
+# RUN /bin/bash -c "apt update"
+# RUN /bin/bash -c "curl https://dl.min.io/client/mc/release/linux-amd64/mc --output mc && chmod +x mc"
+COPY requirements.txt $HOME/requirements.txt
+RUN /bin/bash -c "pip3 install -r requirements.txt --no-cache-dir"
 
-COPY requirements.txt /requirements.txt
-COPY serve.py /serve.py
-COPY deploy.sh /deploy.sh
+COPY serve.py $HOME/serve.py
+COPY deploy.sh $HOME/deploy.sh
+COPY deploy.sh $HOME/start-script.sh
+RUN chmod +x start-script.sh \
+    && chmod +x deploy.sh \
+    && chown -R $USER:$USER $HOME
 
-RUN pip3 install -r --no-cache-dir requirements.txt \
-    && chmod +x deploy.sh
+ENV STACKN_MODEL_PATH=$HOME/models
+ENV PYTHONPATH=$HOME/models
 
 CMD ["./deploy.sh"]
diff --git a/serve-python/Dockerfile.test b/serve-python/Dockerfile.test
@@ -0,0 +1,46 @@
+# FROM python:3
+FROM ubuntu:18.04
+
+# Create user name and home directory variables. 
+# The variables are later used as $USER and $HOME. 
+ENV USER=user
+ENV HOME=/home/$USER
+
+# Add user to system
+RUN useradd -m -u 1000 $USER
+
+# Set working directory (this is where the code should go)
+WORKDIR $HOME
+
+RUN apt-get update \
+    && apt-get install ca-certificates -y --no-install-recommends \
+    && apt-get install python3.7-dev -y --no-install-recommends \
+    && apt-get install curl -y --no-install-recommends \
+    && apt-get install python3-distutils -y --no-install-recommends \
+    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
+    && python3.7 get-pip.py \
+    && apt-get install gcc -y --no-install-recommends \
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 \
+    && pip3 install --upgrade pip --no-cache-dir \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# RUN /bin/bash -c "apt update"
+# RUN /bin/bash -c "curl https://dl.min.io/client/mc/release/linux-amd64/mc --output mc && chmod +x mc"
+COPY requirements.txt $HOME/requirements.txt
+RUN /bin/bash -c "pip3 install -r requirements.txt"
+
+COPY serve.py $HOME/serve.py
+COPY deploy.sh $HOME/deploy.sh
+COPY deploy.sh $HOME/start-script.sh
+COPY tests/model/ $HOME/models/
+RUN chmod +x start-script.sh \
+    && chmod +x deploy.sh \
+    && chown -R $USER:$USER $HOME
+
+WORKDIR $HOME/models
+RUN pip3 install -r requirements.txt --no-cache-dir
+WORKDIR $HOME
+ENV STACKN_MODEL_PATH=$HOME/models
+ENV PYTHONPATH=$HOME/models
+
+CMD ["./deploy.sh"]
diff --git a/serve-python/deploy.sh b/serve-python/deploy.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-cd models
+# STACKN_MODEL_PATH is passed as an env variable to the serve app deployment.yaml
+# It should contain the folder with models and code
+cd $STACKN_MODEL_PATH
 [[ -f requirements.txt ]] && pip3 install -r requirements.txt
+
+# Go back to the home directory and start sever app
 cd ..
 uvicorn serve:app --host 0.0.0.0 --port 8501
diff --git a/serve-python/serve.py b/serve-python/serve.py
@@ -24,58 +24,6 @@
 
 from src.models import predict
 
-REQUEST_TIME = Histogram(
-    "serve_predict_duration_seconds",
-    "Prediction duration in seconds",
-    ("method", "status_code", "path"),
-)
-
-
-REQUEST_COUNT = Counter(
-    "serve_predict_total",
-    "Total number of predict calls",
-    ("method", "status_code", "path"),
-)
-
-
-class PromMiddleware(BaseHTTPMiddleware):
-    def __init__(self, app: ASGIApp):
-        super().__init__(app)
-
-    async def dispatch(self, request, call_next):
-        method = request.method
-        path = request.url.path
-        timer_begin = time.time()
-
-        status_code = 500
-
-        try:
-            response = await call_next(request)
-            status_code = response.status_code
-        except:
-            raise Exception("Call failed.")
-
-        timer_end = time.time()
-
-        labels = [method, status_code, path]
-
-        REQUEST_COUNT.labels(*labels).inc()
-        REQUEST_TIME.labels(*labels).observe(timer_end - timer_begin)
-        return response
-
-
-def metrics(request):
-    registry = REGISTRY
-    if "prometheus_multiproc_dir" in os.environ:
-        registry = CollectorRegistry()
-        multiprocess.MultiProcessCollector(registry)
-
-    data = generate_latest(registry)
-    response_headers = {"Content-Type": CONTENT_TYPE_LATEST}
-
-    return Response(data, status_code=200, headers=response_headers)
-
-
 try:
     from src.models import input_type
 
@@ -102,8 +50,6 @@ class PredType(BaseModel):
     print("Done.")
 
 app = FastAPI()
-app.add_middleware(PromMiddleware)
-app.add_route("/metrics/", metrics)
 
 
 @app.post("/predict/")
@@ -114,3 +60,19 @@ async def predict_route(
     print(pred_request)
     res = predict.model_predict(pred_request, model_data)
     return json.dumps(res)
+
+
+@app.get(
+    "/health",
+    tags=["healthcheck"],
+    summary="Perform a Health Check",
+    response_description="Return HTTP Status Code 200 (OK)",
+)
+def get_health():
+    """
+    ## Perform a Health Check
+    Endpoint to perform a healthcheck on.
+    Returns:
+        HealthCheck: Returns a JSON response with the health status
+    """
+    return {"status": "OK"}
diff --git a/serve-python/tests/model/README.md b/serve-python/tests/model/README.md
@@ -0,0 +1,49 @@
+# Transformer example project
+
+This STACKn example project demonstrates how to deploy the Swedish BERT model developed by the Swedish Unemployment Agency: https://github.com/af-ai-center/SweBERT.git and publish live prediction endpoints to a STACKn model portal.
+
+***
+
+## BERT and the Swedish BERT models
+
+
+The realease of the BERT architecture is seen as one of the major breakthroughs in NLP (natural language processing) in the last few years. BERT has presented state of the art results across a number of different use cases, such as document classification, sentiment analysis, natural language inference, questions answering, sentence similarity and more.
+
+Arbetsförmedlingen (The Swedish Public Employment Service) has developed Swedish BERT models which were trained on Swedish Wikipedia with approximately 2 million articles and 300 million words.
+
+## Getting started
+
+Please follow and refer to [these detailed steps](https://github.com/scaleoutsystems/examples/tree/main/tutorials/studio/quickstart#transformers-example-project) in order to create a "STACKn Default" project and correclty set up a Jupiter instance.
+
+Clone this repository:
+
+    $ git clone https://github.com/scaleoutsystems/transformers-example-project.git
+
+and then install the pip requirements and enable the Jupyter notebook extension:
+
+    $ pip install -r requirements.txt
+    $ jupyter nbextension enable --py widgetsnbextension
+
+Now you should be ready to open the `getting_started_with_swebert.ipynb` in the _notebooks_ folder. Please follow the notebook's instructions.
+
+## Deploying the model
+
+Once you have run all the cells in the above notebook, open up again a terminal in your Jupyter Lab session and execute the following command within the repository directory:
+
+- `stackn create object afbert -r minor` (**Note:** add the flag `--insecure` in case you have deployed STACKn locally with a self-signed certificate)
+
+- `stackn get objects` (**Note:** add the flag `--insecure` in case you have deployed STACKn locally with a self-signed certificate)
+
+(Check that the model is listed; you should be able to see the newly created model object in your Studio UI, under the "_Objects_" tab)
+
+Deploy the newly created model object with the "_Python Model Deployment_" component (under the "_Serve_" tab in Studio). _Name_ can be anything, _Model_ should match the name of the newly created model (e.g. "afbert:v0.1.0"); leave the rest as defaults.
+
+**Note:** It could take some time for this model to initialize, so keep checking the logs until it is available and wait until it is running successfully.
+
+## Run the prediction
+
+Once the above serving app is up and running, copy the endpoint URL by right-clicking on the _Open_ link.
+
+Go back to your Jupyter Lab session and open the `predict.ipynb` notebook under the _notebooks_ folder. Paste the copied URL at line 12 in order to use the correct endpoint for the prediction. It is time to test the prediction! Run all the cells and check the results.
+
+**Tips:** You can play around by changing the values of the `example` and `msk_ind` variables. The latter will mask (or "hide") one of the words in the example sentence; then the prediction will shown the possible candidates for such "missing" word.