Skip to content

Commit

Permalink
Simplify code using watcloud-utils
Browse files Browse the repository at this point in the history
Developed alongside WATonomous/watcloud-utils#2
  • Loading branch information
ben-z committed Aug 18, 2024
1 parent 3d8201e commit 86bac61
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 192 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ FROM alpine:3.20.2@sha256:0a4eaa0eecf5f8c050e5bba433f58c052be7587ee8af3e8b3910ef
ARG DOCKER_METADATA_OUTPUT_JSON='{}'
ENV DOCKER_METADATA_OUTPUT_JSON=${DOCKER_METADATA_OUTPUT_JSON}

RUN apk add py-pip curl
RUN apk add py-pip curl git

COPY requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt --break-system-packages && rm /tmp/requirements.txt
Expand Down
11 changes: 7 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
uvicorn[standard]>=0.30.6<1
sentry-sdk[fastapi]>=2.13.0<3
azure-data-tables>=12.5.0<13
typer>=0.12.4<1
uvicorn[standard]>=0.30.6,<1
azure-data-tables>=12.5.0,<13
typer>=0.12.4,<1
# TODO: fix to a version once we are stable
watcloud-utils @ git+https://github.com/WATonomous/watcloud-utils.git
apscheduler>=3.10.4,<4

195 changes: 53 additions & 142 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,149 +1,53 @@
import json
import logging
import os
import re
import time
import urllib.parse
from contextlib import asynccontextmanager
from smtplib import SMTP
from textwrap import dedent

import sentry_sdk
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sentry_sdk.crons import capture_checkin
from sentry_sdk.crons.consts import MonitorStatus
from sentry_sdk.integrations.logging import LoggingIntegration

from utils import get_azure_table_client, logger, random_str, set_up_logging

# BUILD_INFO is generated by the build pipeline (e.g. docker/metadata-action).
# It looks like:
# {"tags":["ghcr.io/watonomous/repo-ingestion:main"],"labels":{"org.opencontainers.image.title":"repo-ingestion","org.opencontainers.image.description":"Simple server to receive file changes and open GitHub pull requests","org.opencontainers.image.url":"https://github.com/WATonomous/repo-ingestion","org.opencontainers.image.source":"https://github.com/WATonomous/repo-ingestion","org.opencontainers.image.version":"main","org.opencontainers.image.created":"2024-01-20T16:10:39.421Z","org.opencontainers.image.revision":"1d55b62b15c78251e0560af9e97927591e260a98","org.opencontainers.image.licenses":""}}
BUILD_INFO = json.loads(os.getenv("DOCKER_METADATA_OUTPUT_JSON", "{}"))
IS_SENTRY_ENABLED = not not os.getenv("SENTRY_DSN")

# Set up Sentry
if IS_SENTRY_ENABLED:
build_labels = BUILD_INFO.get("labels", {})
image_title = build_labels.get("org.opencontainers.image.title", "unknown_image")
image_version = build_labels.get(
"org.opencontainers.image.version", "unknown_version"
)
image_rev = build_labels.get("org.opencontainers.image.revision", "unknown_rev")

sentry_config = {
"dsn": os.environ["SENTRY_DSN"],
"environment": os.getenv("DEPLOYMENT_ENVIRONMENT", "unknown"),
"release": os.getenv(
"SENTRY_RELEASE", f"{image_title}:{image_version}@{image_rev}"
),
}

print(f"Sentry SDK version: {sentry_sdk.VERSION}")
print(f"Sentry DSN found. Setting up Sentry with config: {sentry_config}")

sentry_logging = LoggingIntegration(
level=logging.INFO, # Capture info and above as breadcrumbs
event_level=logging.ERROR, # Send errors as events
)

def sentry_traces_sampler(sampling_context):
# Inherit parent sampling decision
if sampling_context["parent_sampled"] is not None:
return sampling_context["parent_sampled"]

# Don't need to sample health checks
if sampling_context.get("asgi_scope", {}).get("path", "").startswith("/health"):
return 0

# Sample everything else
return 1

sentry_sdk.init(
**sentry_config,
integrations=[sentry_logging],
# Set traces_sample_rate to 1.0 to capture 100%
# of transactions for performance monitoring.
# We recommend adjusting this value in production,
# traces_sample_rate=1.0,
traces_sampler=sentry_traces_sampler,
enable_tracing=True,
)
else:
print("No Sentry DSN found. Skipping Sentry setup.")

app = FastAPI()
state = {
"sentry_cron_last_ping_time": 0,
"num_signups": 0,
"num_successful_confirms": 0,
"num_failed_confirms": 0,
}

# Add CORS for local development. In production, this is handled by the reverse proxy.
origins = [
"http://localhost:3000",
]

app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
from watcloud_utils.fastapi import WATcloudFastAPI
from watcloud_utils.logging import logger, set_up_logging

from utils import get_azure_table_client, random_str

scheduler = BackgroundScheduler()
scheduler.start()


@asynccontextmanager
async def lifespan(app: FastAPI):
scheduler.add_job(cleanup, trigger=CronTrigger.from_crontab("* * * * *"))
yield
scheduler.shutdown()


def healthcheck(app: WATcloudFastAPI):
cleanup_delay_threshold = 120
if time.time() - app.runtime_info["last_cleanup_time"] > cleanup_delay_threshold:
msg = f"Last cleanup was more than {cleanup_delay_threshold} seconds ago."
logger.error(msg)
raise HTTPException(status_code=500, detail=msg)


set_up_logging()
app = WATcloudFastAPI(
logger=logger,
lifespan=lifespan,
initial_runtime_info={
"num_signups": 0,
"num_successful_confirms": 0,
"num_failed_confirms": 0,
"num_expired_signups": 0,
"last_cleanup_time": time.time(),
},
health_fns=[healthcheck],
)


@app.on_event("startup")
async def startup_event():
set_up_logging()
logger.info(
f"Logging configured with level {logger.level} ({logging.getLevelName(logger.level)})"
)


@app.get("/health")
def read_health():
current_time = time.time()
success = True
# Assuming the /health endpoint is called every 10 seconds, ping Sentry about once every minute.
if IS_SENTRY_ENABLED and current_time - state["sentry_cron_last_ping_time"] > 50:
state["sentry_cron_last_ping_time"] = current_time
capture_checkin(
monitor_slug="repo-ingestion",
status=MonitorStatus.OK if success else MonitorStatus.ERROR,
monitor_config={
"schedule": {"type": "interval", "value": 1, "unit": "minute"},
"checkin_margin": 5, # minutes
"max_runtime": 1, # minutes
"failure_issue_threshold": 1,
"recovery_threshold": 2,
},
)
logging.info(f"Pinged Sentry CRON with status {'OK' if success else 'ERROR'}")

return {"status": "ok"}


@app.get("/build-info")
def read_build_info():
return BUILD_INFO


@app.get("/runtime-info")
def read_runtime_info():
return {
"sentry_enabled": IS_SENTRY_ENABLED,
"sentry_sdk_version": sentry_sdk.VERSION,
"deployment_environment": os.getenv("DEPLOYMENT_ENVIRONMENT", "unknown"),
"sentry_cron_last_ping_time": state["sentry_cron_last_ping_time"],
"num_signups": state["num_signups"],
"num_successful_confirms": state["num_successful_confirms"],
"num_failed_confirms": state["num_failed_confirms"],
}


table_client = get_azure_table_client("signups", create_table_if_not_exists=True)


Expand Down Expand Up @@ -173,7 +77,9 @@ def sign_up(req: SignUpRequest, request: Request):
}
)

app_url = os.environ.get("APP_URL") or f"{request.url.scheme}://{request.url.netloc}"
app_url = (
os.environ.get("APP_URL") or f"{request.url.scheme}://{request.url.netloc}"
)
confirmation_url = f"{app_url}/confirm/{req.mailing_list}/{urllib.parse.quote_plus(req.email)}/{code}"

smtp = SMTP(os.environ["SMTP_HOST"], port=os.environ["SMTP_PORT"])
Expand Down Expand Up @@ -234,22 +140,23 @@ def sign_up(req: SignUpRequest, request: Request):
),
)

state["num_signups"] += 1
app.runtime_info["num_signups"] += 1

return {"status": "ok", "message": f"Confirmation email sent to '{req.email}'."}


@app.get("/confirm/{mailing_list}/{email}/{code}")
def confirm(mailing_list: str, email: str, code: str):
from azure.core.exceptions import ResourceNotFoundError

try:
entity = table_client.get_entity(partition_key=mailing_list, row_key=email)
except ResourceNotFoundError:
state["num_failed_confirms"] += 1
app.runtime_info["num_failed_confirms"] += 1
raise HTTPException(status_code=400, detail="Code expired or invalid")

if entity["Code"] != code or time.time() - entity["CreatedAt"] > CODE_TTL_SEC:
state["num_failed_confirms"] += 1
app.runtime_info["num_failed_confirms"] += 1
raise HTTPException(status_code=400, detail="Code expired or invalid")

# TODO: Add email to mailing list
Expand All @@ -263,7 +170,6 @@ def confirm(mailing_list: str, email: str, code: str):
}


# TODO: run this on a schedule
@app.post("/cleanup")
def cleanup():
"""
Expand All @@ -277,7 +183,12 @@ def cleanup():
)
deleted_count = 0
for entity in expired_entities:
table_client.delete_entity(partition_key=entity["PartitionKey"], row_key=entity["RowKey"])
table_client.delete_entity(
partition_key=entity["PartitionKey"], row_key=entity["RowKey"]
)
deleted_count += 1

return {"status": "ok", "message": f"Deleted {deleted_count} expired signups."}
app.runtime_info["num_expired_signups"] += deleted_count
msg = f"cleanup: Deleted {deleted_count} expired signup(s)."
logger.info(msg)
return {"status": "ok", "message": msg}
50 changes: 5 additions & 45 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,16 @@
import json
import logging
import os
from enum import Enum

import typer
import yaml

logger = logging.getLogger()


class OutputFormat(str, Enum):
yaml = "yaml"
json = "json"
raw = "raw"


def cli_print_retval(ret: dict | list, output_format: OutputFormat):
if output_format == OutputFormat.yaml:
print(yaml.dump(ret, default_flow_style=False))
elif output_format == OutputFormat.json:
print(json.dumps(ret, indent=2))
elif output_format == OutputFormat.raw:
print(ret)
else:
raise ValueError(f"Unknown output format: {output_format}")


app = typer.Typer(result_callback=cli_print_retval)


@app.callback()
# This function is used to add global CLI options
def main(output_format: OutputFormat = OutputFormat.yaml):
pass


def set_up_logging():
log_level = os.environ.get("APP_LOG_LEVEL", "INFO")
logger.setLevel(log_level)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
from watcloud_utils.logging import logger
from watcloud_utils.typer import app


def get_azure_table_client(table_name: str, create_table_if_not_exists: bool = False):
"""
Get an Azure Table client for the given table name.
Docs: https://learn.microsoft.com/en-us/python/api/azure-data-tables/azure.data.tables.tableclient?view=azure-python
"""
from azure.data.tables import TableClient
from azure.core.exceptions import ResourceExistsError
from azure.data.tables import TableClient

conn_str = os.environ["AZURE_STORAGE_CONNECTION_STRING"]

Expand All @@ -75,12 +33,14 @@ def dump_azure_table(table_name: str):
table_client = get_azure_table_client(table_name)
return list(table_client.list_entities())


@app.command()
def delete_azure_table(table_name: str):
table_client = get_azure_table_client(table_name)
table_client.delete_table()
return f"Deleted table: {table_name}"


@app.command()
def random_str(length: int = 10):
"""
Expand Down

0 comments on commit 86bac61

Please sign in to comment.