Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Erikk/ban 375 inference time to potassium #33

Merged
merged 7 commits into from
Sep 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions potassium/potassium.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from flask import Flask, request, make_response, abort
from werkzeug.serving import make_server
from threading import Thread, Lock, Condition
Expand Down Expand Up @@ -47,6 +48,8 @@ def __init__(self, name):
self._gpu_lock = Lock()
self._background_task_cv = Condition()
self._sequence_number = 0
self._idle_start_time = 0
self._last_inference_start_time = None
self._flask_app = self._create_flask_app()

#
Expand Down Expand Up @@ -145,6 +148,7 @@ def _handle_generic(self, endpoint, flask_request):
return res

res = None
self._last_inference_start_time = time.time()

if endpoint.type == "handler":
req = Request(
Expand All @@ -162,6 +166,8 @@ def _handle_generic(self, endpoint, flask_request):
res = make_response(tb_str)
res.status_code = 500
res.headers['X-Endpoint-Type'] = endpoint.type
self._idle_start_time = time.time()
self._last_inference_start_time = None
self._gpu_lock.release()
elif endpoint.type == "background":
req = Request(
Expand All @@ -178,7 +184,9 @@ def task(endpoint, lock, req):
finally:
with self._background_task_cv:
self._background_task_cv.notify_all()
# release lock

self._idle_start_time = time.time()
self._last_inference_start_time = None
lock.release()

thread = Thread(target=task, args=(endpoint, self._gpu_lock, req))
Expand Down Expand Up @@ -219,14 +227,25 @@ def handle(path):

@flask_app.route('/__status__', methods=["GET"])
def status():
idle_time = 0
inference_time = 0
gpu_available = not self._gpu_lock.locked()

if self._last_inference_start_time != None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT prefer is not None in python

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, missed this before merging. I'll add that in another PR and merge it so we get it as well 👍

inference_time = int((time.time() - self._last_inference_start_time)*1000)

if gpu_available:
idle_time = int((time.time() - self._idle_start_time)*1000)

res = make_response({
"gpu_available": not self._gpu_lock.locked(),
"sequence_number": self._sequence_number
"gpu_available": gpu_available,
"sequence_number": self._sequence_number,
"idle_time": idle_time,
"inference_time": inference_time,
})

res.status_code = 200
res.headers['X-Endpoint-Type'] = "status"
res
return res

return flask_app
Expand All @@ -235,6 +254,7 @@ def status():
def serve(self, host="0.0.0.0", port=8000):
print(colored("------\nStarting Potassium Server 🍌", 'yellow'))
self._init_func()
server = make_server(host, port, self._flask_app)
server = make_server(host, port, self._flask_app, threaded=True)
print(colored(f"Serving at http://{host}:{port}\n------", 'green'))
self._idle_start_time = time.time()
server.serve_forever()
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from distutils.core import setup
import setuptools
from pathlib import Path

this_directory = Path(__file__).parent
Expand Down
30 changes: 18 additions & 12 deletions tests/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,28 @@ def background(context: dict, request: potassium.Request):
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": True,
"sequence_number": 0,
}
assert res.json is not None
assert res.json["gpu_available"] == True
assert res.json["sequence_number"] == 0
assert res.json["idle_time"] > 0
assert res.json["inference_time"] == 0

# send background post in separate thread
res = client.post("/background", json={})
assert res.status_code == 200

# add a small sleep for inference time to be above 0
time.sleep(0.1)

# check status
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": False,
"sequence_number": 1,
}
assert res.json is not None
assert res.json["gpu_available"] == False
assert res.json["sequence_number"] == 1
assert res.json["idle_time"] == 0
assert res.json["inference_time"] > 0

# notify background thread to continue
with resolve_background_condition:
Expand All @@ -124,10 +129,11 @@ def background(context: dict, request: potassium.Request):
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": True,
"sequence_number": 1,
}
assert res.json is not None
assert res.json["gpu_available"] == True
assert res.json["sequence_number"] == 1
assert res.json["idle_time"] > 0
assert res.json["inference_time"] == 0

def test_wait_for_background_task():
app = potassium.Potassium("my_app")
Expand Down
Loading