feat: limit and configure number of virtual kernels to infinite (was 40)

Can now be configured using the SOLARA_KERNELS_MAX_COUNT env variable.
widgetti · Mar 8, 2024 · aef5db5 · aef5db5
1 parent e29e492
commit aef5db5
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 2 deletions.
diff --git a/solara/server/settings.py b/solara/server/settings.py
@@ -85,6 +85,7 @@ class Config:
 
 class Kernel(BaseSettings):
     cull_timeout: str = "24h"
+    max_count: Optional[int] = None
 
     class Config:
         env_prefix = "solara_kernel_"

diff --git a/solara/server/starlette.py b/solara/server/starlette.py
@@ -1,9 +1,11 @@
 import asyncio
 import logging
+import math
 import os
 import sys
+import threading
 import typing
-from typing import Dict, List, Union, cast
+from typing import Dict, List, Optional, Union, cast
 from uuid import uuid4
 
 import anyio
@@ -51,6 +53,19 @@
 from .cdn_helper import cdn_url_path, get_path
 
 os.environ["SERVER_SOFTWARE"] = "solara/" + str(solara.__version__)
+limiter: Optional[anyio.CapacityLimiter] = None
+lock = threading.Lock()
+
+
+def _ensure_limiter():
+    # in older anyios (<4) the limiter can only be created in an async context
+    # so we call this in a starlette handler
+    global limiter
+    if limiter is None:
+        with lock:
+            if limiter is None:
+                limiter = anyio.CapacityLimiter(settings.kernel.max_count if settings.kernel.max_count is not None else math.inf)
+
 
 logger = logging.getLogger("solara.server.fastapi")
 # if we add these to the router, the server_test does not run (404's)
@@ -167,6 +182,7 @@ async def kernels(id):
 
 
 async def kernel_connection(ws: starlette.websockets.WebSocket):
+    _ensure_limiter()
     session_id = ws.cookies.get(server.COOKIE_KEY_SESSION_ID)
 
     if settings.oauth.private and not has_auth_support:
@@ -224,7 +240,7 @@ async def run():
     try:
         async with anyio.from_thread.BlockingPortal() as portal:
             ws_wrapper = WebsocketWrapper(ws, portal)
-            thread_return = anyio.to_thread.run_sync(websocket_thread_runner, ws, portal)  # type: ignore
+            thread_return = anyio.to_thread.run_sync(websocket_thread_runner, ws, portal, limiter=limiter)  # type: ignore
             await thread_return
     finally:
         if settings.main.experimental_performance:

diff --git a/solara/website/pages/docs/content/20-understanding/50-solara-server.md b/solara/website/pages/docs/content/20-understanding/50-solara-server.md
@@ -16,6 +16,10 @@ However, when the websocket between the web page and the server disconnects, the
 
 To optimize memory usage or address specific needs, one might opt for a shorter expiration duration. For instance, setting `SOLARA_KERNEL_CULL_TIMEOUT=1m` will cause sessions to expire after just 1 minute. Other possible options are `2d` (2 days), `3h` (3 hours), `30s` (30 seconds), etc. If no units are given, seconds are assumed.
 
+### Maximum number of kernels connected
+
+Each virtual kernel runs in its own thread, this ensures that one particular user (actually browser page) cannot block the execution of another virtual kernel. However, each thread consumes a bit of resources. If you want to limit the number of kernels, this can be done by setting the `SOLARA_KERNELS_MAX_COUNT` environment variable. The default is unlimited (empty string), but you can set it to any number you like. If the limit is reached, the server will refuse new connections until a kernel is closed.
+
 
 ## Handling Multiple Workers
 In setups with multiple workers, it's possible for a page to reconnect to a different worker than its original. This would result in a loss of the virtual kernel (since it lives on a different worker), prompting the Solara app to initiate a fresh start. To prevent this scenario, a sticky session configuration is recommended, ensuring consistent client-worker connections. Utilizing a load balancer, such as [nginx](https://www.nginx.com/), can achieve this.