diff --git a/Dockerfile b/Dockerfile index 7556de2..403ab25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM alpine:3.20.2@sha256:0a4eaa0eecf5f8c050e5bba433f58c052be7587ee8af3e8b3910ef ARG DOCKER_METADATA_OUTPUT_JSON='{}' ENV DOCKER_METADATA_OUTPUT_JSON=${DOCKER_METADATA_OUTPUT_JSON} -RUN apk add py-pip curl +RUN apk add py-pip curl git COPY requirements.txt /tmp/requirements.txt RUN pip install -r /tmp/requirements.txt --break-system-packages && rm /tmp/requirements.txt diff --git a/requirements.txt b/requirements.txt index 56aa107..e3e882e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ -uvicorn[standard]>=0.30.6<1 -sentry-sdk[fastapi]>=2.13.0<3 -azure-data-tables>=12.5.0<13 -typer>=0.12.4<1 +uvicorn[standard]>=0.30.6,<1 +azure-data-tables>=12.5.0,<13 +typer>=0.12.4,<1 +# TODO: fix to a version once we are stable +watcloud-utils @ git+https://github.com/WATonomous/watcloud-utils.git +apscheduler>=3.10.4,<4 + diff --git a/src/main.py b/src/main.py index 522c8af..ecb9ddc 100644 --- a/src/main.py +++ b/src/main.py @@ -1,149 +1,53 @@ -import json -import logging import os import re import time import urllib.parse +from contextlib import asynccontextmanager from smtplib import SMTP from textwrap import dedent -import sentry_sdk +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.cron import CronTrigger from fastapi import FastAPI, HTTPException, Request -from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from sentry_sdk.crons import capture_checkin -from sentry_sdk.crons.consts import MonitorStatus -from sentry_sdk.integrations.logging import LoggingIntegration - -from utils import get_azure_table_client, logger, random_str, set_up_logging - -# BUILD_INFO is generated by the build pipeline (e.g. docker/metadata-action). -# It looks like: -# {"tags":["ghcr.io/watonomous/repo-ingestion:main"],"labels":{"org.opencontainers.image.title":"repo-ingestion","org.opencontainers.image.description":"Simple server to receive file changes and open GitHub pull requests","org.opencontainers.image.url":"https://github.com/WATonomous/repo-ingestion","org.opencontainers.image.source":"https://github.com/WATonomous/repo-ingestion","org.opencontainers.image.version":"main","org.opencontainers.image.created":"2024-01-20T16:10:39.421Z","org.opencontainers.image.revision":"1d55b62b15c78251e0560af9e97927591e260a98","org.opencontainers.image.licenses":""}} -BUILD_INFO = json.loads(os.getenv("DOCKER_METADATA_OUTPUT_JSON", "{}")) -IS_SENTRY_ENABLED = not not os.getenv("SENTRY_DSN") - -# Set up Sentry -if IS_SENTRY_ENABLED: - build_labels = BUILD_INFO.get("labels", {}) - image_title = build_labels.get("org.opencontainers.image.title", "unknown_image") - image_version = build_labels.get( - "org.opencontainers.image.version", "unknown_version" - ) - image_rev = build_labels.get("org.opencontainers.image.revision", "unknown_rev") - - sentry_config = { - "dsn": os.environ["SENTRY_DSN"], - "environment": os.getenv("DEPLOYMENT_ENVIRONMENT", "unknown"), - "release": os.getenv( - "SENTRY_RELEASE", f"{image_title}:{image_version}@{image_rev}" - ), - } - - print(f"Sentry SDK version: {sentry_sdk.VERSION}") - print(f"Sentry DSN found. Setting up Sentry with config: {sentry_config}") - - sentry_logging = LoggingIntegration( - level=logging.INFO, # Capture info and above as breadcrumbs - event_level=logging.ERROR, # Send errors as events - ) - - def sentry_traces_sampler(sampling_context): - # Inherit parent sampling decision - if sampling_context["parent_sampled"] is not None: - return sampling_context["parent_sampled"] - - # Don't need to sample health checks - if sampling_context.get("asgi_scope", {}).get("path", "").startswith("/health"): - return 0 - - # Sample everything else - return 1 - - sentry_sdk.init( - **sentry_config, - integrations=[sentry_logging], - # Set traces_sample_rate to 1.0 to capture 100% - # of transactions for performance monitoring. - # We recommend adjusting this value in production, - # traces_sample_rate=1.0, - traces_sampler=sentry_traces_sampler, - enable_tracing=True, - ) -else: - print("No Sentry DSN found. Skipping Sentry setup.") - -app = FastAPI() -state = { - "sentry_cron_last_ping_time": 0, - "num_signups": 0, - "num_successful_confirms": 0, - "num_failed_confirms": 0, -} - -# Add CORS for local development. In production, this is handled by the reverse proxy. -origins = [ - "http://localhost:3000", -] - -app.add_middleware( - CORSMiddleware, - allow_origins=origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], +from watcloud_utils.fastapi import WATcloudFastAPI +from watcloud_utils.logging import logger, set_up_logging + +from utils import get_azure_table_client, random_str + +scheduler = BackgroundScheduler() +scheduler.start() + + +@asynccontextmanager +async def lifespan(app: FastAPI): + scheduler.add_job(cleanup, trigger=CronTrigger.from_crontab("* * * * *")) + yield + scheduler.shutdown() + + +def healthcheck(app: WATcloudFastAPI): + cleanup_delay_threshold = 120 + if time.time() - app.runtime_info["last_cleanup_time"] > cleanup_delay_threshold: + msg = f"Last cleanup was more than {cleanup_delay_threshold} seconds ago." + logger.error(msg) + raise HTTPException(status_code=500, detail=msg) + + +set_up_logging() +app = WATcloudFastAPI( + logger=logger, + lifespan=lifespan, + initial_runtime_info={ + "num_signups": 0, + "num_successful_confirms": 0, + "num_failed_confirms": 0, + "num_expired_signups": 0, + "last_cleanup_time": time.time(), + }, + health_fns=[healthcheck], ) - -@app.on_event("startup") -async def startup_event(): - set_up_logging() - logger.info( - f"Logging configured with level {logger.level} ({logging.getLevelName(logger.level)})" - ) - - -@app.get("/health") -def read_health(): - current_time = time.time() - success = True - # Assuming the /health endpoint is called every 10 seconds, ping Sentry about once every minute. - if IS_SENTRY_ENABLED and current_time - state["sentry_cron_last_ping_time"] > 50: - state["sentry_cron_last_ping_time"] = current_time - capture_checkin( - monitor_slug="repo-ingestion", - status=MonitorStatus.OK if success else MonitorStatus.ERROR, - monitor_config={ - "schedule": {"type": "interval", "value": 1, "unit": "minute"}, - "checkin_margin": 5, # minutes - "max_runtime": 1, # minutes - "failure_issue_threshold": 1, - "recovery_threshold": 2, - }, - ) - logging.info(f"Pinged Sentry CRON with status {'OK' if success else 'ERROR'}") - - return {"status": "ok"} - - -@app.get("/build-info") -def read_build_info(): - return BUILD_INFO - - -@app.get("/runtime-info") -def read_runtime_info(): - return { - "sentry_enabled": IS_SENTRY_ENABLED, - "sentry_sdk_version": sentry_sdk.VERSION, - "deployment_environment": os.getenv("DEPLOYMENT_ENVIRONMENT", "unknown"), - "sentry_cron_last_ping_time": state["sentry_cron_last_ping_time"], - "num_signups": state["num_signups"], - "num_successful_confirms": state["num_successful_confirms"], - "num_failed_confirms": state["num_failed_confirms"], - } - - table_client = get_azure_table_client("signups", create_table_if_not_exists=True) @@ -173,7 +77,9 @@ def sign_up(req: SignUpRequest, request: Request): } ) - app_url = os.environ.get("APP_URL") or f"{request.url.scheme}://{request.url.netloc}" + app_url = ( + os.environ.get("APP_URL") or f"{request.url.scheme}://{request.url.netloc}" + ) confirmation_url = f"{app_url}/confirm/{req.mailing_list}/{urllib.parse.quote_plus(req.email)}/{code}" smtp = SMTP(os.environ["SMTP_HOST"], port=os.environ["SMTP_PORT"]) @@ -234,7 +140,7 @@ def sign_up(req: SignUpRequest, request: Request): ), ) - state["num_signups"] += 1 + app.runtime_info["num_signups"] += 1 return {"status": "ok", "message": f"Confirmation email sent to '{req.email}'."} @@ -242,14 +148,15 @@ def sign_up(req: SignUpRequest, request: Request): @app.get("/confirm/{mailing_list}/{email}/{code}") def confirm(mailing_list: str, email: str, code: str): from azure.core.exceptions import ResourceNotFoundError + try: entity = table_client.get_entity(partition_key=mailing_list, row_key=email) except ResourceNotFoundError: - state["num_failed_confirms"] += 1 + app.runtime_info["num_failed_confirms"] += 1 raise HTTPException(status_code=400, detail="Code expired or invalid") if entity["Code"] != code or time.time() - entity["CreatedAt"] > CODE_TTL_SEC: - state["num_failed_confirms"] += 1 + app.runtime_info["num_failed_confirms"] += 1 raise HTTPException(status_code=400, detail="Code expired or invalid") # TODO: Add email to mailing list @@ -263,7 +170,6 @@ def confirm(mailing_list: str, email: str, code: str): } -# TODO: run this on a schedule @app.post("/cleanup") def cleanup(): """ @@ -277,7 +183,12 @@ def cleanup(): ) deleted_count = 0 for entity in expired_entities: - table_client.delete_entity(partition_key=entity["PartitionKey"], row_key=entity["RowKey"]) + table_client.delete_entity( + partition_key=entity["PartitionKey"], row_key=entity["RowKey"] + ) deleted_count += 1 - return {"status": "ok", "message": f"Deleted {deleted_count} expired signups."} + app.runtime_info["num_expired_signups"] += deleted_count + msg = f"cleanup: Deleted {deleted_count} expired signup(s)." + logger.info(msg) + return {"status": "ok", "message": msg} diff --git a/src/utils.py b/src/utils.py index 4408f08..e932154 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,49 +1,7 @@ -import json -import logging import os -from enum import Enum -import typer -import yaml - -logger = logging.getLogger() - - -class OutputFormat(str, Enum): - yaml = "yaml" - json = "json" - raw = "raw" - - -def cli_print_retval(ret: dict | list, output_format: OutputFormat): - if output_format == OutputFormat.yaml: - print(yaml.dump(ret, default_flow_style=False)) - elif output_format == OutputFormat.json: - print(json.dumps(ret, indent=2)) - elif output_format == OutputFormat.raw: - print(ret) - else: - raise ValueError(f"Unknown output format: {output_format}") - - -app = typer.Typer(result_callback=cli_print_retval) - - -@app.callback() -# This function is used to add global CLI options -def main(output_format: OutputFormat = OutputFormat.yaml): - pass - - -def set_up_logging(): - log_level = os.environ.get("APP_LOG_LEVEL", "INFO") - logger.setLevel(log_level) - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - handler = logging.StreamHandler() - handler.setFormatter(formatter) - logger.addHandler(handler) +from watcloud_utils.logging import logger +from watcloud_utils.typer import app def get_azure_table_client(table_name: str, create_table_if_not_exists: bool = False): @@ -51,8 +9,8 @@ def get_azure_table_client(table_name: str, create_table_if_not_exists: bool = F Get an Azure Table client for the given table name. Docs: https://learn.microsoft.com/en-us/python/api/azure-data-tables/azure.data.tables.tableclient?view=azure-python """ - from azure.data.tables import TableClient from azure.core.exceptions import ResourceExistsError + from azure.data.tables import TableClient conn_str = os.environ["AZURE_STORAGE_CONNECTION_STRING"] @@ -75,12 +33,14 @@ def dump_azure_table(table_name: str): table_client = get_azure_table_client(table_name) return list(table_client.list_entities()) + @app.command() def delete_azure_table(table_name: str): table_client = get_azure_table_client(table_name) table_client.delete_table() return f"Deleted table: {table_name}" + @app.command() def random_str(length: int = 10): """