Skip to content

Commit

Permalink
Merge branch 'main' into magic-trailing-comma
Browse files Browse the repository at this point in the history
  • Loading branch information
underdarknl authored Oct 3, 2024
2 parents 54b8c03 + a3f0d1f commit a81fe5c
Show file tree
Hide file tree
Showing 6 changed files with 1,209 additions and 872 deletions.
4 changes: 4 additions & 0 deletions bytes/bytes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ class Settings(BaseSettings):

logging_format: Literal["text", "json"] = Field("text", description="Logging format")

s3_bucket_prefix: str | None = Field(None, validation_alias="S3_BUCKET_PREFIX")
s3_bucket_name: str | None = Field(None, validation_alias="S3_BUCKET")
bucket_per_org: bool = Field(True, validation_alias="BUCKET_PER_ORG")

model_config = SettingsConfigDict(env_prefix="BYTES_")

@classmethod
Expand Down
79 changes: 73 additions & 6 deletions bytes/bytes/raw/file_raw_repository.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import logging
from pathlib import Path
from uuid import UUID

import structlog
from boto3 import set_stream_logger as set_boto3_stream_logger
from boto3.session import Session as BotoSession

from bytes.config import Settings
from bytes.models import BoefjeMeta, RawData
Expand All @@ -12,12 +15,20 @@


def create_raw_repository(settings: Settings) -> RawRepository:
return FileRawRepository(
settings.data_dir,
make_middleware(),
folder_permissions=int(settings.folder_permission, 8),
file_permissions=int(settings.file_permission, 8),
)
if settings.s3_bucket_name or settings.s3_bucket_prefix:
return S3RawRepository(
make_middleware(),
settings.bucket_per_org,
settings.s3_bucket_prefix or "OpenKAT-",
settings.s3_bucket_name or "OpenKAT",
)
else:
return FileRawRepository(
settings.data_dir,
make_middleware(),
folder_permissions=int(settings.folder_permission, 8),
file_permissions=int(settings.file_permission, 8),
)


class FileRawRepository(RawRepository):
Expand Down Expand Up @@ -62,3 +73,59 @@ def _raw_file_path(self, raw_id: UUID, boefje_meta: BoefjeMeta) -> Path:

def _index(self, raw_id: UUID) -> str:
return str(raw_id)[: self.UUID_INDEX]


class S3RawRepository(RawRepository):
def __init__(
self,
file_middleware: FileMiddleware,
bucket_per_org: bool,
s3_bucket_prefix: str,
s3_bucket_name: str,
) -> None:
self._file_middleware = file_middleware
self.bucket_per_org = bucket_per_org
self.s3_bucket_prefix = s3_bucket_prefix
self.s3_bucket_name = s3_bucket_name

set_boto3_stream_logger("", logging.WARNING)
self._s3resource = BotoSession().resource("s3")

def get_or_create_bucket(self, organization: str):
# Create a bucket, and if it exists already return that instead
bucket_name = f"{self.s3_bucket_prefix}{organization}" if self.bucket_per_org else self.s3_bucket_name

try:
bucket = self._s3resource.create_bucket(Bucket=bucket_name)
bucket.wait_until_exists()
return bucket
except bucket.meta.client.exceptions.ClientError as error:
logger.error("Something went wrong with creating/getting bucket %s: %s", bucket_name, error)
raise error

def save_raw(self, raw_id: UUID, raw: RawData) -> None:
object_name = self._raw_file_name(raw_id, raw.boefje_meta)
contents = self._file_middleware.encode(raw.value)

logger.info("Writing raw data with id %s to s3", raw_id)
bucket = self.get_or_create_bucket(raw.boefje_meta.organization)
bucket.Object(object_name).put(Body=contents)

def get_raw(self, raw_id: UUID, boefje_meta: BoefjeMeta) -> RawData:
object_name = self._raw_file_name(raw_id, boefje_meta)
bucket = self.get_or_create_bucket(boefje_meta.organization)

try:
contents = bucket.Object(object_name).get()["Body"].read()
except self._s3resource.meta.client.exceptions.ClientError as error:
if error.response["Error"]["Code"] == "404":
raise BytesFileNotFoundException(error)
logger.error("Could not get file from s3: %s/%s due to %s", bucket.name, object_name, error)
raise error

return RawData(value=self._file_middleware.decode(contents), boefje_meta=boefje_meta)

def _raw_file_name(self, raw_id: UUID, boefje_meta: BoefjeMeta) -> str:
if self.bucket_per_org:
return str(raw_id)
return f"{boefje_meta.organization}/{raw_id}"
Loading

0 comments on commit a81fe5c

Please sign in to comment.