From 01ef8d6b38b62d25c0940020c345b542f65ee81c Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Mon, 14 Oct 2024 03:15:53 +0000 Subject: [PATCH] Implement periodic GC --- README.md | 2 +- server/requirements.txt | 3 ++- server/src/main.py | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3f197fa..39e2462 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Coming soon: - [x] File upload API (we may be able to simply use the [publisher](https://cvmfs.readthedocs.io/en/stable/cpt-repository-gateway.html#publisher-configuration). It has nice features like being able to handle concurrent transactions.) - The publisher appears to be bottlenecked at 20MiB/s when running the server in Kubernetes, and around 80MiB/s when running in Docker. `iperf` gives much higher bandwidth (between nodes and between the Kubernetes container and nodes), so it's likely not a network bottleneck. - When using the custom FastAPI upload server, speeds reach over 400MiB/s easily. We'll adopt this approach. -- [ ] Garbage collection +- [x] Garbage collection - [ ] Better documentation - [ ] Automatic [whitelist re-signing](https://cvmfs.readthedocs.io/en/stable/apx-security.html#signature-details) diff --git a/server/requirements.txt b/server/requirements.txt index afc0a41..8b03d3a 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,4 +1,5 @@ watcloud-utils @ git+https://github.com/WATonomous/watcloud-utils.git@c8ce1006716e65971f750560f90f442721b3777d python-slugify>=8.0.4,<9 python-multipart>=0.0.12,<1 -uvicorn>=0.31.1,<1 \ No newline at end of file +uvicorn>=0.31.1,<1 +apscheduler>=3.10.4,<4 \ No newline at end of file diff --git a/server/src/main.py b/server/src/main.py index fe06b01..0fe3a23 100644 --- a/server/src/main.py +++ b/server/src/main.py @@ -4,16 +4,19 @@ import subprocess import sys import time +from contextlib import asynccontextmanager from pathlib import Path from threading import Lock import typer import uvicorn +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.cron import CronTrigger from fastapi import HTTPException, UploadFile from fastapi.responses import FileResponse from slugify import slugify from typing_extensions import Annotated -from watcloud_utils.fastapi import WATcloudFastAPI +from watcloud_utils.fastapi import WATcloudFastAPI, FastAPI from watcloud_utils.logging import logger, set_up_logging from watcloud_utils.typer import app @@ -99,13 +102,22 @@ def init_cvmfs_repo( print(f"Successfully initialized CVMFS repo: {repo_name}") print(f"The public key is available via HTTP at GET /cvmfs-meta/{repo_name}.pub") -@app.command() -def start_server(): - print("Starting server") - while True: - pass - -fastapi_app = WATcloudFastAPI(logger=logger) +@asynccontextmanager +async def fastapi_lifespan(app: FastAPI): + """ + This function wraps the FastAPI app in a lifespan context manager. + i.e. it allows us to run code when the app starts and stops. + """ + try: + scheduler.start() + # Run garbage collection every minute + scheduler.add_job(gc, CronTrigger.from_crontab("* * * * *")) + yield + finally: + scheduler.shutdown() + +scheduler = BackgroundScheduler() +fastapi_app = WATcloudFastAPI(logger=logger, lifespan=fastapi_lifespan) transaction_lock = Lock() @fastapi_app.post("/repos/{repo_name}")