From 0f5d2d7f492dda1754b34b736bd80a80abe15bae Mon Sep 17 00:00:00 2001 From: Nik <6206742+nik-418@users.noreply.github.com> Date: Thu, 19 Oct 2023 10:30:05 +0200 Subject: [PATCH] Warmup calls (#38) --- Dockerfile | 9 +++++++++ README.md | 8 ++++++++ potassium/potassium.py | 10 ++++++++++ setup.py | 2 +- tests/test_endpoints.py | 16 ++++++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d3e104a..42f8e83 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,14 @@ FROM python:3.8-slim-buster +ENV NODE_MAJOR=20 +RUN apt-get update && \ + apt-get install -y ca-certificates curl gnupg && \ + mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ + apt-get update && \ + apt-get install nodejs -y + WORKDIR /potassium RUN pip install pyright pytest diff --git a/README.md b/README.md index 78d4713..1d63157 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,14 @@ The context dict passed in is a mutable reference, so you can modify it in-place `app.serve` runs the server, and is a blocking operation. +--- +## Pre-warming your app + +Potassium comes with a built-in endpoint for those cases where you want to "warm up" your app to better control the timing of your inference calls. You don't *need* to call it, since your inference call requires `init()` to have run once on server startup anyway, but this gives you a bit more control. + +Once your model is warm (i.e., cold boot finished), this endpoint returns a 200. If a cold boot is required, the `init()` function is first called while the server starts up, and then a 200 is returned from this endpoint. + +You don't need any extra code to enable it, it comes out of the box and you can call it at `/_k/warmup` as either a GET or POST request. --- diff --git a/potassium/potassium.py b/potassium/potassium.py index 2cce945..7be0da8 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -229,7 +229,17 @@ def handle(path): endpoint = self._endpoints[route] return self._handle_generic(endpoint, request) + + @flask_app.route('/_k/warmup', methods=["POST"]) + def warm(): + res = make_response({ + "warm": True, + }) + res.status_code = 200 + res.headers['X-Endpoint-Type'] = "warmup" + return res + @flask_app.route('/_k/status', methods=["GET"]) @flask_app.route('/__status__', methods=["GET"]) def status(): idle_time = 0 diff --git a/setup.py b/setup.py index 2bb6283..446e516 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='potassium', packages=['potassium'], - version='0.2.1', + version='0.3.0', license='Apache License 2.0', # Give a short description about your library description='The potassium package is a flask-like HTTP server for serving large AI models', diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index d3f2fd6..7872767 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -183,3 +183,19 @@ def wait_for_background_task(): assert order_of_execution_queue.get() == "send_background_task" assert order_of_execution_queue.get() == "background_task_completed" +def test_warmup(): + app = potassium.Potassium("my_app") + + @app.init + def init(): + return {} + + @app.handler() + def handler(context: dict, request: potassium.Request) -> potassium.Response: + raise Exception("should not be called") + + client = app.test_client() + + res = client.post("/_k/warmup", json={}) + assert res.status_code == 200 + assert res.json == {"warm": True}