Skip to content

Commit

Permalink
Merge pull request #6 from mlibrary/DWI-20-add-to-db_script
Browse files Browse the repository at this point in the history
DWI 20: make CLI; add an add_to_db script
  • Loading branch information
niquerio authored Oct 4, 2024
2 parents 1ec9547 + 4967ae2 commit 49c6ec8
Show file tree
Hide file tree
Showing 24 changed files with 1,026 additions and 42 deletions.
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
12 changes: 7 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# wheel packages, which are both needed for installing applications like Pandas and Numpy.

# The base layer will contain the dependencies shared by the other layers
FROM python:3.11-slim-bookworm as base
FROM python:3.11-slim-bookworm AS base

# Allowing the argumenets to be read into the dockerfile. Ex: .env > compose.yml > Dockerfile
ARG POETRY_VERSION=1.8.3
Expand All @@ -34,10 +34,12 @@ RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \
# Set the working directory to /app
WORKDIR /app

ENV PYTHONPATH="/app"

CMD ["tail", "-f", "/dev/null"]

# Both build and development need poetry, so it is its own step.
FROM base as poetry
FROM base AS poetry

RUN pip install poetry==${POETRY_VERSION}

Expand All @@ -56,23 +58,23 @@ ENV PYTHONUNBUFFERED=1\
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache

FROM poetry as build
FROM poetry AS build
# Just copy the files needed to install the dependencies
COPY pyproject.toml poetry.lock README.md ./

#Use poetry to create a requirements.txt file. Dont include development dependencies
RUN poetry export --without dev -f requirements.txt --output requirements.txt

# We want poetry on in development
FROM poetry as development
FROM poetry AS development
RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \
git

# Switch to the non-root user "user"
USER app

# We don't want poetry on in production, so we copy the needed files form the build stage
FROM base as production
FROM base AS production
# Switch to the non-root user "user"
# RUN mkdir -p /venv && chown ${UID}:${GID} /venv

Expand Down
3 changes: 3 additions & 0 deletions aim/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from aim.cli.main import app # pragma: no cover

app() # pragma: no cover
Empty file added aim/cli/__init__.py
Empty file.
32 changes: 32 additions & 0 deletions aim/cli/digifeeds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import typer
from aim.digifeeds.add_to_db import add_to_db as add_to_digifeeds_db
from aim.digifeeds.list_barcodes_in_bucket import list_barcodes_in_bucket
from aim.digifeeds.database import models, main
import json
import sys


app = typer.Typer()


@app.command()
def add_to_db(barcode: str):
print(f'Adding barcode "{barcode}" to database')
item = add_to_digifeeds_db(barcode)
if item.has_status("not_found_in_alma"):
print("Item not found in alma.")
if item.has_status("added_to_digifeeds_set"):
print("Item added to digifeeds set")
else:
print("Item not added to digifeeds set")


@app.command()
def load_statuses():
with main.SessionLocal() as db_session:
models.load_statuses(session=db_session)


@app.command()
def list_barcodes_in_input_bucket():
json.dump(list_barcodes_in_bucket(), sys.stdout)
9 changes: 9 additions & 0 deletions aim/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import typer
import aim.cli.digifeeds as digifeeds

app = typer.Typer()
app.add_typer(digifeeds.app, name="digifeeds")


if __name__ == "__main__": # pragma: no cover
app()
27 changes: 27 additions & 0 deletions aim/digifeeds/add_to_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from aim.digifeeds.alma_client import AlmaClient
from aim.digifeeds.db_client import DBClient
from aim.digifeeds.item import Item
from requests.exceptions import HTTPError


def add_to_db(barcode: str):
item = Item(DBClient().get_or_add_item(barcode))
if not item.has_status("added_to_digifeeds_set"):
try:
AlmaClient().add_barcode_to_digifeeds_set(barcode)
except HTTPError as ext_inst:
errorList = ext_inst.response.json()["errorList"]["error"]
if any(e["errorCode"] == "60120" for e in errorList):
if not item.has_status("not_found_in_alma"):
item = Item(
DBClient().add_item_status(
barcode=barcode, status="not_found_in_alma"
)
)
return item
else:
raise ext_inst
item = Item(
DBClient().add_item_status(barcode=barcode, status="added_to_digifeeds_set")
)
return item
32 changes: 32 additions & 0 deletions aim/digifeeds/alma_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import requests
from aim.services import S


class AlmaClient:
def __init__(self) -> None:
self.session = requests.Session()
self.session.headers.update(
{
"content": "application/json",
"Accept": "application/json",
"Authorization": f"apikey { S.alma_api_key }",
}
)
self.base_url = S.alma_api_url
self.digifeeds_set_id = S.digifeeds_set_id

def add_barcode_to_digifeeds_set(self, barcode: str) -> None:
url = self._url(f"conf/sets/{self.digifeeds_set_id}")
query = {
"id_type": "BARCODE",
"op": "add_members",
"fail_on_invalid_id": "true",
}
body = {"members": {"member": [{"id": barcode}]}}
response = self.session.post(url, params=query, json=body)
if response.status_code != 200:
response.raise_for_status()
return None

def _url(self, path: str) -> str:
return f"{self.base_url}/{path}"
12 changes: 0 additions & 12 deletions aim/digifeeds/bin/load_statuses.py

This file was deleted.

32 changes: 21 additions & 11 deletions aim/digifeeds/database/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,29 @@
from aim.digifeeds.database import crud, schemas
from aim.services import S

if S.ci_on is None: # pragma: no cover
engine = create_engine(S.mysql_database)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# models.Base.metadata.create_all(bind=engine)
# This is here so SessionLocal won't have a problem in tests in github
if S.ci_on: # pragma: no cover
engine = create_engine(S.test_database)
else: # pragma: no cover
engine = create_engine(S.mysql_database)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
app = FastAPI()


# Dependency
def get_db(): # pragma: no cover
def get_db(): # pragma: no cover
db = SessionLocal()
try:
yield db
finally:
db.close()


@app.get("/items/", response_model_by_alias=False)
def get_items(in_zephir: bool | None = None, db: Session = Depends(get_db)) -> list[schemas.Item]:
def get_items(
in_zephir: bool | None = None, db: Session = Depends(get_db)
) -> list[schemas.Item]:
db_items = crud.get_items(in_zephir=in_zephir, db=db)
return db_items

Expand All @@ -32,6 +38,7 @@ def get_item(barcode: str, db: Session = Depends(get_db)) -> schemas.Item:
raise HTTPException(status_code=404, detail="Item not found")
return db_item


@app.post("/items/{barcode}", response_model_by_alias=False)
def create_item(barcode: str, db: Session = Depends(get_db)) -> schemas.Item:
item = schemas.ItemCreate(barcode=barcode)
Expand All @@ -41,18 +48,21 @@ def create_item(barcode: str, db: Session = Depends(get_db)) -> schemas.Item:
db_item = crud.add_item(item=item, db=db)
return db_item


@app.put("/items/{barcode}/status/{status_name}", response_model_by_alias=False)
def update_item(barcode: str, status_name: str, db: Session=Depends(get_db)) -> schemas.Item:
def update_item(
barcode: str, status_name: str, db: Session = Depends(get_db)
) -> schemas.Item:
db_status = crud.get_status(name=status_name, db=db)
if db_status is None:
raise HTTPException(status_code=404, detail="Status not found")
db_item = crud.get_item(barcode=barcode, db=db)
if db_item is None:
raise HTTPException(status_code=404, detail="Item not found")
return crud.add_item_status(db=db,item=db_item,status=db_status)
return crud.add_item_status(db=db, item=db_item, status=db_status)


@app.get("/statuses")
def get_statuses(db: Session=Depends(get_db)) -> list[schemas.Status]:
def get_statuses(db: Session = Depends(get_db)) -> list[schemas.Status]:
db_statuses = crud.get_statuses(db=db)
return db_statuses

return db_statuses
39 changes: 39 additions & 0 deletions aim/digifeeds/db_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import requests
from aim.services import S


class DBClient:
def __init__(self) -> None:
self.base_url = S.digifeeds_api_url

def get_item(self, barcode: str):
url = self._url(f"items/{barcode}")
response = requests.get(url)
if response.status_code == 404:
return None
elif response.status_code != 200:
response.raise_for_status()
return response.json()

def add_item(self, barcode: str):
url = self._url(f"items/{barcode}")
response = requests.post(url)
if response.status_code != 200:
response.raise_for_status()
return response.json()

def get_or_add_item(self, barcode: str):
item = self.get_item(barcode)
if not item:
item = self.add_item(barcode)
return item

def add_item_status(self, barcode: str, status: str):
url = self._url(f"items/{barcode}/status/{status}")
response = requests.put(url)
if response.status_code != 200:
response.raise_for_status()
return response.json()

def _url(self, path) -> str:
return f"{self.base_url}/{path}"
10 changes: 10 additions & 0 deletions aim/digifeeds/item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class Item:
def __init__(self, data: dict) -> None:
self.data = data

def has_status(self, status: str) -> bool:
return any(s["name"] == status for s in self.data["statuses"])

@property
def barcode(self) -> str:
return self.data["barcode"]
19 changes: 19 additions & 0 deletions aim/digifeeds/list_barcodes_in_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import boto3
from aim.services import S


def list_barcodes_in_bucket():
s3 = boto3.client(
"s3",
aws_access_key_id=S.digifeeds_s3_access_key,
aws_secret_access_key=S.digifeeds_s3_secret_access_key,
)
prefix = S.digifeeds_s3_input_path + "/"
response = s3.list_objects_v2(
Bucket=S.digifeeds_s3_bucket,
Prefix=prefix,
Delimiter="/",
)
paths = [object["Prefix"] for object in response["CommonPrefixes"]]
barcodes = [path.split("/")[1] for path in paths]
return barcodes
49 changes: 39 additions & 10 deletions aim/services.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,43 @@
from types import SimpleNamespace
from typing import NamedTuple
import os
import sqlalchemy as sa

S = SimpleNamespace()
S.mysql_database = sa.engine.URL.create(
drivername="mysql+mysqldb",
username=os.environ["MARIADB_USER"],
password=os.environ["MARIADB_PASSWORD"],
host=os.environ["DATABASE_HOST"],
database=os.environ["MARIADB_DATABASE"],
Services = NamedTuple(
"Services",
[
("mysql_database", sa.engine.URL),
("test_database", str),
("ci_on", str | None),
("alma_api_key", str),
("alma_api_url", str),
("digifeeds_api_url", str),
("digifeeds_set_id", str),
("digifeeds_s3_access_key", str),
("digifeeds_s3_secret_access_key", str),
("digifeeds_s3_bucket", str),
("digifeeds_s3_input_path", str),
],
)

S = Services(
mysql_database=sa.engine.URL.create(
drivername="mysql+mysqldb",
username=os.environ["MARIADB_USER"],
password=os.environ["MARIADB_PASSWORD"],
host=os.environ["DATABASE_HOST"],
database=os.environ["MARIADB_DATABASE"],
),
test_database="sqlite:///:memory:",
ci_on=os.getenv("CI"),
digifeeds_api_url=os.getenv("DIGIFEEDS_API_URL") or "http://api:8000",
digifeeds_set_id=os.getenv("DIGIFEEDS_SET_ID") or "digifeeds_set_id",
alma_api_key=os.getenv("ALMA_API_KEY") or "alma_api_key",
alma_api_url="https://api-na.hosted.exlibrisgroup.com/almaws/v1",
digifeeds_s3_access_key=os.getenv("DIGIFEEDS_S3_ACCESS_KEY")
or "digifeeds_s3_access_key",
digifeeds_s3_secret_access_key=os.getenv("DIGIFEEDS_S3_SECRET_ACCESS_KEY")
or "digifeeds_s3_secret_access_key",
digifeeds_s3_bucket=os.getenv("DIGIFEEDS_S3_BUCKET") or "digifeeds_s3_bucket",
digifeeds_s3_input_path=os.getenv("DIGIFEEDS_S3_INPUT_PATH")
or "path_to_input_barcodes",
)
S.test_database = "sqlite:///:memory:"
S.ci_on = os.getenv("CI")
Loading

0 comments on commit 49c6ec8

Please sign in to comment.