diff --git a/.env-dist b/.env-dist index 5e441c4ed95..daca58ec3ff 100644 --- a/.env-dist +++ b/.env-dist @@ -65,6 +65,8 @@ BYTES_DB_URI=postgresql://${BYTES_DB_USER}:${BYTES_DB_PASSWORD}@postgres:5432/${ # --- Octopoes --- # # See `octopoes/octopoes/config/settings.py` +# Number of Celery workers (for the Octopoes API worker) that need to be started +CELERY_WORKER_CONCURRENCY=${CELERY_WORKER_CONCURRENCY:-4} # --- Mula --- # # See `mula/scheduler/config/settings.py` diff --git a/.gitignore b/.gitignore index cbf5265286c..bce85366c2c 100644 --- a/.gitignore +++ b/.gitignore @@ -444,3 +444,4 @@ nl-kat-* /boefjes/boefjes/plugins/kat_rpki/rpki-meta.json *.pstat +**/.cache* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 77f9b918082..99f38a8cfc6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -76,7 +76,7 @@ repos: rev: 1.16.0 hooks: - id: django-upgrade - args: [--target-version, "4.2"] + args: [--target-version, "5.0"] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.9.0 @@ -91,11 +91,13 @@ repos: - httpx - types-python-dateutil - types-requests + - types-croniter exclude: | (?x)( ^boefjes/tools | ^keiko/templates | ^mula/whitelist\.py$ | + ^mula/scripts | ^octopoes/tools | ^rocky/whitelist\.py$ | /tests/ | @@ -108,7 +110,7 @@ repos: hooks: - id: codespell additional_dependencies: ["tomli"] - args: [-L, lama] + args: ["-L", "lama", "--ignore-regex", ".{1024}|.*codespell-ignore.*"] exclude: | (?x)( \.po$ | diff --git a/boefjes/.ci/.env.test b/boefjes/.ci/.env.test index 88a4030008b..896206d75ad 100644 --- a/boefjes/.ci/.env.test +++ b/boefjes/.ci/.env.test @@ -1,11 +1,42 @@ POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres -POSTGRES_DB=ci_katalogus +POSTGRES_DB=test -KATALOGUS_DB_URI=postgresql://postgres:postgres@ci_katalogus-db:5432/ci_katalogus +KATALOGUS_DB_URI=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@ci_katalogus-db:5432/${POSTGRES_DB} CI=1 -ENCRYPTION_MIDDLEWARE=NACL_SEALBOX +BOEFJES_ENCRYPTION_MIDDLEWARE=NACL_SEALBOX KATALOGUS_PRIVATE_KEY_B64=Vpb0g34rGFbnoUuiSjkFr8TKh278AViSJEdjII5DvQY= KATALOGUS_PUBLIC_KEY_B64=iR/vPrBVrx0LXOiwK6DMB3QCggjzQXDtj/hyVK7mpy8= +BOEFJES_API=http://placeholder:1234 + + +# Benchmark setup +RABBITMQ_DEFAULT_VHOST=kat +RABBITMQ_DEFAULT_USER=ci_user +RABBITMQ_DEFAULT_PASS=ci_pass + +QUEUE_URI=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@ci_rabbitmq:5672/${RABBITMQ_DEFAULT_VHOST} + +KATALOGUS_API=http://ci_katalogus:8080 +OCTOPOES_API=http://ci_octopoes:80 +XTDB_URI=http://ci_xtdb:3000 +BYTES_API=http://ci_bytes:8000 +SCHEDULER_API=http://placeholder:8000 + +CI=1 + +# CI Bytes configuration + +BYTES_SECRET=3d54f6e4e65723aa678d17d8fd22aba5234136d3e44e5a77305dedaa8ce13f45 +BYTES_ACCESS_TOKEN_EXPIRE_MINUTES=1000 +BYTES_USERNAME=test +BYTES_PASSWORD=secret +BYTES_ENCRYPTION_MIDDLEWARE=IDENTITY + +BYTES_DB_URI=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@ci_bytes-db:5432/${POSTGRES_DB} +BYTES_LOG_FILE=/var/log/bytes-test.log +BYTES_FILE_PERMISSION=555 + +BYTES_METRICS_TTL_SECONDS=0 diff --git a/boefjes/.ci/docker-compose.yml b/boefjes/.ci/docker-compose.yml index 0ed9b4c14dc..f3dc4ecc244 100644 --- a/boefjes/.ci/docker-compose.yml +++ b/boefjes/.ci/docker-compose.yml @@ -5,9 +5,10 @@ services: dockerfile: boefjes/Dockerfile args: - ENVIRONMENT=dev - command: sh -c 'python -m pytest -v boefjes/katalogus/tests/integration' + command: sh -c 'python -m pytest -v tests/integration' depends_on: - ci_katalogus-db + - ci_katalogus env_file: - .ci/.env.test volumes: @@ -17,3 +18,101 @@ services: image: docker.io/library/postgres:15 env_file: - .ci/.env.test + + migration_bench: + build: + context: .. + dockerfile: boefjes/Dockerfile + args: + - ENVIRONMENT=dev + command: bash -c "python -m cProfile -o .ci/bench_$(date +%Y_%m_%d-%H:%M:%S).pstat -m pytest -v -m slow tests/integration" + depends_on: + - ci_bytes + - ci_octopoes + - ci_katalogus-db + env_file: + - .ci/.env.test + volumes: + - .:/app/boefjes + environment: + - DATABASE_MIGRATION=1 + + ci_bytes: + build: + context: ../bytes + args: + ENVIRONMENT: dev + command: uvicorn bytes.api:app --host 0.0.0.0 + depends_on: + ci_rabbitmq: + condition: service_healthy + ci_bytes-db: + condition: service_started + env_file: + - .ci/.env.test + environment: + - DATABASE_MIGRATION=1 + + ci_bytes-db: + image: docker.io/library/postgres:15 + env_file: + - .ci/.env.test + + ci_octopoes: + build: + context: ../octopoes + command: uvicorn octopoes.api.api:app --host 0.0.0.0 --port 80 + depends_on: + ci_rabbitmq: + condition: service_healthy + ci_xtdb: + condition: service_started + ci_katalogus: + condition: service_started + ci_octopoes_api_worker: + condition: service_started + env_file: + - .ci/.env.test + + ci_rabbitmq: + restart: on-failure + image: "docker.io/library/rabbitmq:3.12-management" + healthcheck: + test: ["CMD", "rabbitmqctl", "status"] + interval: 5s + retries: 4 + env_file: + - .ci/.env.test + + ci_xtdb: + image: "ghcr.io/dekkers/xtdb-http-multinode:v1.0.8" + + ci_octopoes_api_worker: + build: + context: ../octopoes + command: celery -A octopoes.tasks.tasks worker -E --loglevel=INFO + depends_on: + ci_rabbitmq: + condition: service_healthy + ci_xtdb: + condition: service_started + env_file: + - .ci/.env.test + ulimits: + nofile: + soft: 262144 + hard: 262144 + + ci_katalogus: + build: + context: .. + dockerfile: boefjes/Dockerfile + args: + - ENVIRONMENT=dev + command: uvicorn boefjes.katalogus.root:app --host 0.0.0.0 --port 8080 + depends_on: + - ci_katalogus-db + env_file: + - .ci/.env.test + volumes: + - .:/app/boefjes diff --git a/boefjes/Dockerfile b/boefjes/Dockerfile index c0a75319776..b38ca57e307 100644 --- a/boefjes/Dockerfile +++ b/boefjes/Dockerfile @@ -6,8 +6,8 @@ ARG USER_GID=1000 ENTRYPOINT ["/app/boefjes/entrypoint.sh"] -RUN groupadd --gid $USER_GID nonroot -RUN adduser --disabled-password --gecos '' --uid $USER_UID --gid $USER_GID nonroot +RUN groupadd --gid "$USER_GID" nonroot +RUN adduser --disabled-password --gecos '' --uid "$USER_UID" --gid "$USER_GID" nonroot WORKDIR /app/boefjes ENV PATH=/home/nonroot/.local/bin:${PATH} @@ -20,10 +20,10 @@ RUN --mount=type=cache,target=/root/.cache \ pip install --upgrade pip \ && if [ "$ENVIRONMENT" = "dev" ]; \ then \ - grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \ + grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin && \ grep git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \ else \ - grep -v git+https:// requirements.txt | pip install -r /dev/stdin ;\ + grep -v git+https:// requirements.txt | pip install -r /dev/stdin && \ grep git+https:// requirements.txt | pip install -r /dev/stdin ; \ fi diff --git a/boefjes/Makefile b/boefjes/Makefile index 579543e3926..09e5fc21b47 100644 --- a/boefjes/Makefile +++ b/boefjes/Makefile @@ -77,6 +77,12 @@ itest: ## Run the integration tests. $(ci-docker-compose) run --rm katalogus_integration $(ci-docker-compose) down +bench: ## Run the report benchmark. + $(ci-docker-compose) build + $(ci-docker-compose) down --remove-orphans + $(ci-docker-compose) run --rm migration_bench + $(ci-docker-compose) stop + debian12: docker run --rm \ --env PKG_NAME=kat-boefjes \ diff --git a/boefjes/boefjes/__main__.py b/boefjes/boefjes/__main__.py index e9621de3e0e..a5572253d49 100644 --- a/boefjes/boefjes/__main__.py +++ b/boefjes/boefjes/__main__.py @@ -2,6 +2,7 @@ import logging.config import click +import structlog from boefjes.app import get_runtime_manager from boefjes.config import settings @@ -10,7 +11,27 @@ with settings.log_cfg.open() as f: logging.config.dictConfig(json.load(f)) -logger = logging.getLogger(__name__) +structlog.configure( + processors=[ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.StackInfoRenderer(), + structlog.dev.set_exc_info, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper("iso", utc=False), + ( + structlog.dev.ConsoleRenderer(colors=True, pad_level=False) + if settings.logging_format == "text" + else structlog.processors.JSONRenderer() + ), + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger(__name__) @click.command() diff --git a/boefjes/boefjes/api.py b/boefjes/boefjes/api.py index c26f64e7979..22542898dfd 100644 --- a/boefjes/boefjes/api.py +++ b/boefjes/boefjes/api.py @@ -1,10 +1,10 @@ import base64 -import logging import multiprocessing from datetime import datetime, timezone from enum import Enum from uuid import UUID +import structlog from fastapi import Depends, FastAPI, HTTPException, Response from httpx import HTTPError, HTTPStatusError from pydantic import BaseModel, ConfigDict, Field @@ -13,15 +13,15 @@ from boefjes.clients.bytes_client import BytesAPIClient from boefjes.clients.scheduler_client import SchedulerAPIClient, TaskStatus from boefjes.config import settings -from boefjes.job_handler import get_environment_settings, get_octopoes_api_connector, serialize_ooi +from boefjes.job_handler import get_environment_settings, get_octopoes_api_connector from boefjes.job_models import BoefjeMeta -from boefjes.katalogus.local_repository import LocalPluginRepository, get_local_repository +from boefjes.local_repository import LocalPluginRepository, get_local_repository from boefjes.plugins.models import _default_mime_types from octopoes.models import Reference from octopoes.models.exception import ObjectNotFoundException app = FastAPI(title="Boefje API") -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class UvicornServer(multiprocessing.Process): @@ -85,7 +85,7 @@ async def root(): @app.get("/api/v0/tasks/{task_id}", response_model=BoefjeInput) -async def boefje_input( +def boefje_input( task_id: UUID, scheduler_client: SchedulerAPIClient = Depends(get_scheduler_client), local_repository: LocalPluginRepository = Depends(get_local_repository), @@ -102,7 +102,7 @@ async def boefje_input( @app.post("/api/v0/tasks/{task_id}") -async def boefje_output( +def boefje_output( task_id: UUID, boefje_output: BoefjeOutput, scheduler_client: SchedulerAPIClient = Depends(get_scheduler_client), @@ -122,7 +122,7 @@ async def boefje_output( bytes_client.save_boefje_meta(boefje_meta) if boefje_output.files: - mime_types = _default_mime_types(task.p_item.data.boefje) + mime_types = _default_mime_types(task.data.boefje) for file in boefje_output.files: raw = base64.b64decode(file.content) # when supported, also save file.name to Bytes @@ -149,13 +149,12 @@ def get_task(task_id, scheduler_client): def create_boefje_meta(task, local_repository): - boefje = task.p_item.data.boefje + boefje = task.data.boefje boefje_resource = local_repository.by_id(boefje.id) - env_keys = boefje_resource.environment_keys - environment = get_environment_settings(task.p_item.data, env_keys) if env_keys else {} + environment = get_environment_settings(task.data, boefje_resource.schema) - organization = task.p_item.data.organization - input_ooi = task.p_item.data.input_ooi + organization = task.data.organization + input_ooi = task.data.input_ooi arguments = {"oci_arguments": boefje_resource.oci_arguments} if input_ooi: @@ -165,7 +164,7 @@ def create_boefje_meta(task, local_repository): except ObjectNotFoundException as e: raise ObjectNotFoundException(f"Object {reference} not found in Octopoes") from e - arguments["input"] = serialize_ooi(ooi) + arguments["input"] = ooi.serialize() boefje_meta = BoefjeMeta( id=task.id, diff --git a/boefjes/boefjes/app.py b/boefjes/boefjes/app.py index 6734395a6cd..731cbd7e19c 100644 --- a/boefjes/boefjes/app.py +++ b/boefjes/boefjes/app.py @@ -1,4 +1,3 @@ -import logging import multiprocessing as mp import os import signal @@ -6,22 +5,18 @@ import time from queue import Queue +import structlog from httpx import HTTPError from pydantic import ValidationError -from boefjes.clients.scheduler_client import ( - QueuePrioritizedItem, - SchedulerAPIClient, - SchedulerClientInterface, - TaskStatus, -) +from boefjes.clients.scheduler_client import SchedulerAPIClient, SchedulerClientInterface, Task, TaskStatus from boefjes.config import Settings from boefjes.job_handler import BoefjeHandler, NormalizerHandler, bytes_api_client -from boefjes.katalogus.local_repository import get_local_repository from boefjes.local import LocalBoefjeJobRunner, LocalNormalizerJobRunner +from boefjes.local_repository import get_local_repository from boefjes.runtime_interfaces import Handler, WorkerManager -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class SchedulerWorkerManager(WorkerManager): @@ -192,7 +187,7 @@ def exit(self, queue_type: WorkerManager.Queue, signum: int | None = None): logger.info("Received %s, exiting", signal.Signals(signum).name) if not self.task_queue.empty(): - items: list[QueuePrioritizedItem] = [self.task_queue.get() for _ in range(self.task_queue.qsize())] + items: list[Task] = [self.task_queue.get() for _ in range(self.task_queue.qsize())] for p_item in items: try: diff --git a/boefjes/boefjes/clients/bytes_client.py b/boefjes/boefjes/clients/bytes_client.py index bfa91ecf9db..b7b66bbc272 100644 --- a/boefjes/boefjes/clients/bytes_client.py +++ b/boefjes/boefjes/clients/bytes_client.py @@ -1,16 +1,18 @@ -import logging import typing +import uuid +from base64 import b64encode from collections.abc import Callable, Set from functools import wraps from typing import Any from uuid import UUID +import structlog from httpx import Client, HTTPStatusError, HTTPTransport, Response from boefjes.job_models import BoefjeMeta, NormalizerMeta, RawDataMeta BYTES_API_CLIENT_VERSION = "0.3" -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) ClientSessionMethod = Callable[..., Any] @@ -89,19 +91,34 @@ def save_normalizer_meta(self, normalizer_meta: NormalizerMeta) -> None: self._verify_response(response) + @retry_with_login + def get_normalizer_meta(self, normalizer_meta_id: uuid.UUID) -> NormalizerMeta: + response = self._session.get(f"/bytes/normalizer_meta/{normalizer_meta_id}", headers=self.headers) + self._verify_response(response) + + return NormalizerMeta.model_validate_json(response.content) + @retry_with_login def save_raw(self, boefje_meta_id: str, raw: str | bytes, mime_types: Set[str] = frozenset()) -> UUID: - headers = {"content-type": "application/octet-stream"} - headers.update(self.headers) + file_name = "raw" # The name provides a key for all ids returned, so this is arbitrary as we only upload 1 file + response = self._session.post( "/bytes/raw", - content=raw, - headers=headers, - params={"mime_types": list(mime_types), "boefje_meta_id": boefje_meta_id}, + json={ + "files": [ + { + "name": file_name, + "content": b64encode(raw if isinstance(raw, bytes) else raw.encode()).decode(), + "tags": list(mime_types), + } + ] + }, + headers=self.headers, + params={"boefje_meta_id": str(boefje_meta_id)}, ) - self._verify_response(response) - return UUID(response.json()["id"]) + + return UUID(response.json()[file_name]) @retry_with_login def get_raw(self, raw_data_id: str) -> bytes: diff --git a/boefjes/boefjes/clients/scheduler_client.py b/boefjes/boefjes/clients/scheduler_client.py index be7c04f8d26..5e07d83d0be 100644 --- a/boefjes/boefjes/clients/scheduler_client.py +++ b/boefjes/boefjes/clients/scheduler_client.py @@ -1,5 +1,4 @@ import datetime -import logging import uuid from enum import Enum @@ -8,26 +7,12 @@ from boefjes.job_models import BoefjeMeta, NormalizerMeta -logger = logging.getLogger(__name__) - class Queue(BaseModel): id: str size: int -class QueuePrioritizedItem(BaseModel): - """Representation of a queue.PrioritizedItem on the priority queue. Used - for unmarshalling of priority queue prioritized items to a JSON - representation. - """ - - id: uuid.UUID - priority: int - hash: str | None = None - data: BoefjeMeta | NormalizerMeta - - class TaskStatus(Enum): """Status of a task.""" @@ -37,14 +22,18 @@ class TaskStatus(Enum): RUNNING = "running" COMPLETED = "completed" FAILED = "failed" + CANCELLED = "cancelled" class Task(BaseModel): id: uuid.UUID scheduler_id: str - type: str - p_item: QueuePrioritizedItem + schedule_id: str | None + priority: int status: TaskStatus + type: str + hash: str | None = None + data: BoefjeMeta | NormalizerMeta created_at: datetime.datetime modified_at: datetime.datetime @@ -53,7 +42,7 @@ class SchedulerClientInterface: def get_queues(self) -> list[Queue]: raise NotImplementedError() - def pop_item(self, queue: str) -> QueuePrioritizedItem | None: + def pop_item(self, queue: str) -> Task | None: raise NotImplementedError() def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: @@ -62,7 +51,7 @@ def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: def get_task(self, task_id: uuid.UUID) -> Task: raise NotImplementedError() - def push_item(self, queue_id: str, p_item: QueuePrioritizedItem) -> None: + def push_item(self, queue_id: str, p_item: Task) -> None: raise NotImplementedError() @@ -80,13 +69,13 @@ def get_queues(self) -> list[Queue]: return TypeAdapter(list[Queue]).validate_json(response.content) - def pop_item(self, queue: str) -> QueuePrioritizedItem | None: + def pop_item(self, queue: str) -> Task | None: response = self._session.post(f"/queues/{queue}/pop") self._verify_response(response) - return TypeAdapter(QueuePrioritizedItem | None).validate_json(response.content) + return TypeAdapter(Task | None).validate_json(response.content) - def push_item(self, queue_id: str, p_item: QueuePrioritizedItem) -> None: + def push_item(self, queue_id: str, p_item: Task) -> None: response = self._session.post(f"/queues/{queue_id}/push", content=p_item.json()) self._verify_response(response) diff --git a/boefjes/boefjes/config.py b/boefjes/boefjes/config.py index 353cfed9368..79f5655e093 100644 --- a/boefjes/boefjes/config.py +++ b/boefjes/boefjes/config.py @@ -1,13 +1,13 @@ import logging import os from pathlib import Path -from typing import Any +from typing import Any, Literal from pydantic import AmqpDsn, AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict from pydantic_settings.sources import EnvSettingsSource -from boefjes.katalogus.models import EncryptionMiddleware +from boefjes.models import EncryptionMiddleware BASE_DIR: Path = Path(__file__).parent.resolve() @@ -131,6 +131,8 @@ class Settings(BaseSettings): None, description="OpenTelemetry endpoint", validation_alias="SPAN_EXPORT_GRPC_ENDPOINT" ) + logging_format: Literal["text", "json"] = Field("text", description="Logging format") + model_config = SettingsConfigDict(env_prefix="BOEFJES_") @classmethod diff --git a/boefjes/boefjes/katalogus/api/__init__.py b/boefjes/boefjes/dependencies/__init__.py similarity index 100% rename from boefjes/boefjes/katalogus/api/__init__.py rename to boefjes/boefjes/dependencies/__init__.py diff --git a/boefjes/boefjes/katalogus/dependencies/encryption.py b/boefjes/boefjes/dependencies/encryption.py similarity index 100% rename from boefjes/boefjes/katalogus/dependencies/encryption.py rename to boefjes/boefjes/dependencies/encryption.py diff --git a/boefjes/boefjes/katalogus/dependencies/plugins.py b/boefjes/boefjes/dependencies/plugins.py similarity index 63% rename from boefjes/boefjes/katalogus/dependencies/plugins.py rename to boefjes/boefjes/dependencies/plugins.py index d254afa8ca7..080ff6d7da7 100644 --- a/boefjes/boefjes/katalogus/dependencies/plugins.py +++ b/boefjes/boefjes/dependencies/plugins.py @@ -1,63 +1,58 @@ import contextlib -import logging from collections.abc import Iterator from pathlib import Path from typing import Literal +import structlog from fastapi import Query from jsonschema.exceptions import ValidationError from jsonschema.validators import validate from sqlalchemy.orm import Session -from boefjes.katalogus.local_repository import LocalPluginRepository, get_local_repository -from boefjes.katalogus.models import Boefje, FilterParameters, Normalizer, PaginationParameters, PluginType -from boefjes.katalogus.storage.interfaces import ( +from boefjes.local_repository import LocalPluginRepository, get_local_repository +from boefjes.models import Boefje, FilterParameters, Normalizer, PaginationParameters, PluginType +from boefjes.sql.config_storage import create_config_storage +from boefjes.sql.db import session_managed_iterator +from boefjes.sql.plugin_storage import create_plugin_storage +from boefjes.storage.interfaces import ( + ConfigStorage, ExistingPluginId, NotFound, - PluginEnabledStorage, + PluginNotFound, PluginStorage, SettingsNotConformingToSchema, - SettingsStorage, ) -from boefjes.sql.db import session_managed_iterator -from boefjes.sql.plugin_enabled_storage import create_plugin_enabled_storage -from boefjes.sql.plugin_storage import create_plugin_storage -from boefjes.sql.setting_storage import create_setting_storage -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class PluginService: def __init__( self, plugin_storage: PluginStorage, - plugin_enabled_store: PluginEnabledStorage, - settings_storage: SettingsStorage, + config_storage: ConfigStorage, local_repo: LocalPluginRepository, ): self.plugin_storage = plugin_storage - self.plugin_enabled_store = plugin_enabled_store - self.settings_storage = settings_storage + self.config_storage = config_storage self.local_repo = local_repo def __enter__(self): - self.plugin_enabled_store.__enter__() self.plugin_storage.__enter__() - self.settings_storage.__enter__() + self.config_storage.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): - self.plugin_enabled_store.__exit__(exc_type, exc_val, exc_tb) self.plugin_storage.__exit__(exc_type, exc_val, exc_tb) - self.settings_storage.__exit__(exc_type, exc_val, exc_tb) + self.config_storage.__exit__(exc_type, exc_val, exc_tb) def get_all(self, organisation_id: str) -> list[PluginType]: - all_plugins = self.get_all_without_enabled() + all_plugins = self._get_all_without_enabled() return [self._set_plugin_enabled(plugin, organisation_id) for plugin in all_plugins.values()] - def get_all_without_enabled(self): + def _get_all_without_enabled(self) -> dict[str, PluginType]: all_plugins = {plugin.id: plugin for plugin in self.local_repo.get_all()} for plugin in self.plugin_storage.get_all(): @@ -88,24 +83,25 @@ def by_plugin_ids(self, plugin_ids: list[str], organisation_id: str) -> list[Plu return found_plugins def get_all_settings(self, organisation_id: str, plugin_id: str): - return self.settings_storage.get_all(organisation_id, plugin_id) + return self.config_storage.get_all_settings(organisation_id, plugin_id) def clone_settings_to_organisation(self, from_organisation: str, to_organisation: str): # One requirement is that only boefjes enabled in the from_organisation end up being enabled for the target. - for plugin_id in self.plugin_enabled_store.get_all_enabled(to_organisation): + for plugin_id in self.config_storage.get_enabled_boefjes(to_organisation): self.set_enabled_by_id(plugin_id, to_organisation, enabled=False) for plugin in self.get_all(from_organisation): if all_settings := self.get_all_settings(from_organisation, plugin.id): self.upsert_settings(all_settings, to_organisation, plugin.id) - for plugin_id in self.plugin_enabled_store.get_all_enabled(from_organisation): + for plugin_id in self.config_storage.get_enabled_boefjes(from_organisation): self.set_enabled_by_id(plugin_id, to_organisation, enabled=True) - def upsert_settings(self, values: dict, organisation_id: str, plugin_id: str): - self._assert_settings_match_schema(values, organisation_id, plugin_id) + def upsert_settings(self, settings: dict, organisation_id: str, plugin_id: str): + self._assert_settings_match_schema(settings, plugin_id) + self._put_boefje(plugin_id) - return self.settings_storage.upsert(values, organisation_id, plugin_id) + return self.config_storage.upsert(organisation_id, plugin_id, settings=settings) def create_boefje(self, boefje: Boefje) -> None: try: @@ -121,18 +117,48 @@ def create_normalizer(self, normalizer: Normalizer) -> None: except KeyError: self.plugin_storage.create_normalizer(normalizer) - def delete_settings(self, organisation_id: str, plugin_id: str): - self.settings_storage.delete(organisation_id, plugin_id) + def _put_boefje(self, boefje_id: str) -> None: + """Check existence of a boefje, and insert a database entry if it concerns a local boefje""" + + try: + self.plugin_storage.boefje_by_id(boefje_id) + except PluginNotFound as e: + try: + plugin = self.local_repo.by_id(boefje_id) + except KeyError: + raise e + + if plugin.type != "boefje": + raise e + self.plugin_storage.create_boefje(plugin) + + def _put_normalizer(self, normalizer_id: str) -> None: + """Check existence of a normalizer, and insert a database entry if it concerns a local normalizer""" try: - self._assert_settings_match_schema({}, organisation_id, plugin_id) - except SettingsNotConformingToSchema: - logger.warning("Making sure %s is disabled for %s because settings are deleted", plugin_id, organisation_id) + self.plugin_storage.normalizer_by_id(normalizer_id) + except PluginNotFound: + try: + plugin = self.local_repo.by_id(normalizer_id) + except KeyError: + raise + + if plugin.type != "normalizer": + raise + self.plugin_storage.create_normalizer(plugin) - self.set_enabled_by_id(plugin_id, organisation_id, False) + def delete_settings(self, organisation_id: str, plugin_id: str): + self.config_storage.delete(organisation_id, plugin_id) + + # We don't check the schema anymore because we can provide entries through the global environment as well def schema(self, plugin_id: str) -> dict | None: - return self.local_repo.schema(plugin_id) + try: + boefje = self.plugin_storage.boefje_by_id(plugin_id) + + return boefje.schema + except PluginNotFound: + return self.local_repo.schema(plugin_id) def cover(self, plugin_id: str) -> Path: try: @@ -153,28 +179,27 @@ def description(self, plugin_id: str, organisation_id: str) -> str: return "" def set_enabled_by_id(self, plugin_id: str, organisation_id: str, enabled: bool): - if enabled: - all_settings = self.settings_storage.get_all(organisation_id, plugin_id) - self._assert_settings_match_schema(all_settings, organisation_id, plugin_id) - - self.plugin_enabled_store.update_or_create_by_id( - plugin_id, - enabled, - organisation_id, - ) + # We don't check the schema anymore because we can provide entries through the global environment as well + + try: + self._put_boefje(plugin_id) + except PluginNotFound: + self._put_normalizer(plugin_id) + + self.config_storage.upsert(organisation_id, plugin_id, enabled=enabled) - def _assert_settings_match_schema(self, all_settings: dict, organisation_id: str, plugin_id: str): + def _assert_settings_match_schema(self, all_settings: dict, plugin_id: str): schema = self.schema(plugin_id) if schema: # No schema means that there is nothing to assert try: validate(instance=all_settings, schema=schema) except ValidationError as e: - raise SettingsNotConformingToSchema(organisation_id, plugin_id, e.message) from e + raise SettingsNotConformingToSchema(plugin_id, e.message) from e def _set_plugin_enabled(self, plugin: PluginType, organisation_id: str) -> PluginType: with contextlib.suppress(KeyError, NotFound): - plugin.enabled = self.plugin_enabled_store.get_by_id(plugin.id, organisation_id) + plugin.enabled = self.config_storage.is_enabled_by_id(plugin.id, organisation_id) return plugin @@ -183,8 +208,7 @@ def get_plugin_service(organisation_id: str) -> Iterator[PluginService]: def closure(session: Session): return PluginService( create_plugin_storage(session), - create_plugin_enabled_storage(session), - create_setting_storage(session), + create_config_storage(session), get_local_repository(), ) diff --git a/boefjes/boefjes/docker_boefjes_runner.py b/boefjes/boefjes/docker_boefjes_runner.py index 8375c57a1be..f28c64055ae 100644 --- a/boefjes/boefjes/docker_boefjes_runner.py +++ b/boefjes/boefjes/docker_boefjes_runner.py @@ -1,7 +1,7 @@ -import logging from datetime import datetime, timezone import docker +import structlog from docker.errors import APIError, ContainerError, ImageNotFound from httpx import HTTPError @@ -9,9 +9,9 @@ from boefjes.clients.scheduler_client import SchedulerAPIClient, TaskStatus from boefjes.config import settings from boefjes.job_models import BoefjeMeta -from boefjes.katalogus.models import Boefje +from boefjes.models import Boefje -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class DockerBoefjesRunner: diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index 54f45052458..7ad0f7247ad 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -1,29 +1,31 @@ -import logging import os import traceback from collections.abc import Callable from datetime import datetime, timezone -from enum import Enum -from typing import Any, cast +from typing import cast import httpx +import structlog from httpx import HTTPError +from jsonschema.exceptions import ValidationError +from jsonschema.validators import validate from boefjes.clients.bytes_client import BytesAPIClient from boefjes.config import settings from boefjes.docker_boefjes_runner import DockerBoefjesRunner -from boefjes.job_models import BoefjeMeta, NormalizerMeta, SerializedOOI, SerializedOOIValue -from boefjes.katalogus.local_repository import LocalPluginRepository +from boefjes.job_models import BoefjeMeta, NormalizerMeta +from boefjes.local_repository import LocalPluginRepository from boefjes.plugins.models import _default_mime_types from boefjes.runtime_interfaces import BoefjeJobRunner, Handler, NormalizerJobRunner +from boefjes.storage.interfaces import SettingsNotConformingToSchema from octopoes.api.models import Affirmation, Declaration, Observation from octopoes.connector.octopoes import OctopoesAPIConnector -from octopoes.models import OOI, Reference, ScanLevel +from octopoes.models import Reference, ScanLevel from octopoes.models.exception import ObjectNotFoundException MIMETYPE_MIN_LENGTH = 5 # two chars before, and 2 chars after the slash ought to be reasonable -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) bytes_api_client = BytesAPIClient( str(settings.bytes_api), @@ -32,39 +34,11 @@ ) -def _serialize_value(value: Any, required: bool) -> SerializedOOIValue: - if isinstance(value, list): - return [_serialize_value(item, required) for item in value] - if isinstance(value, Reference): - try: - return value.tokenized.root - except AttributeError: - if required: - raise - - return None - if isinstance(value, Enum): - return value.value - if isinstance(value, int | float): - return value - else: - return str(value) - - -def serialize_ooi(ooi: OOI) -> SerializedOOI: - serialized_oois = {} - for key, value in ooi: - if key not in ooi.model_fields: - continue - serialized_oois[key] = _serialize_value(value, ooi.model_fields[key].is_required()) - return serialized_oois - - def get_octopoes_api_connector(org_code: str) -> OctopoesAPIConnector: return OctopoesAPIConnector(str(settings.octopoes_api), org_code) -def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: list[str]) -> dict[str, str]: +def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None) -> dict[str, str]: try: katalogus_api = str(settings.katalogus_api).rstrip("/") response = httpx.get( @@ -72,22 +46,34 @@ def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: list[str timeout=30, ) response.raise_for_status() - environment = response.json() - - # Add prefixed BOEFJE_* global environment variables - for key, value in os.environ.items(): - if key.startswith("BOEFJE_"): - katalogus_key = key.split("BOEFJE_", 1)[1] - # Only pass the environment variable if it is not explicitly set through the katalogus, - # if and only if they are defined in boefje.json - if katalogus_key in environment_keys and katalogus_key not in environment: - environment[katalogus_key] = value - - return {k: str(v) for k, v in environment.items() if k in environment_keys} except HTTPError: logger.exception("Error getting environment settings") raise + allowed_keys = schema.get("properties", []) if schema else [] + new_env = { + key.split("BOEFJE_", 1)[1]: value + for key, value in os.environ.items() + if key.startswith("BOEFJE_") and key in allowed_keys + } + + settings_from_katalogus = response.json() + + for key, value in settings_from_katalogus.items(): + if key in allowed_keys: + new_env[key] = value + + # The schema, besides dictating that a boefje cannot run if it is not matched, also provides an extra safeguard: + # it is possible to inject code if arguments are passed that "escape" the call to a tool. Hence, we should enforce + # the schema somewhere and make the schema as strict as possible. + if schema is not None: + try: + validate(instance=new_env, schema=schema) + except ValidationError as e: + raise SettingsNotConformingToSchema(boefje_meta.boefje.id, e.message) from e + + return new_env + class BoefjeHandler(Handler): def __init__( @@ -124,12 +110,10 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: except ObjectNotFoundException as e: raise ObjectNotFoundException(f"Object {reference} not found in Octopoes") from e - boefje_meta.arguments["input"] = serialize_ooi(ooi) - - env_keys = boefje_resource.environment_keys + boefje_meta.arguments["input"] = ooi.serialize() boefje_meta.runnable_hash = boefje_resource.runnable_hash - boefje_meta.environment = get_environment_settings(boefje_meta, env_keys) if env_keys else {} + boefje_meta.environment = get_environment_settings(boefje_meta, boefje_resource.schema) mime_types = _default_mime_types(boefje_meta.boefje) @@ -209,6 +193,7 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None: Observation( method=normalizer_meta.normalizer.id, source=reference, + source_method=normalizer_meta.raw_data.boefje_meta.boefje.id, task_id=normalizer_meta.id, valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, result=[ooi for ooi in observation.results if ooi.primary_key != observation.input_ooi], @@ -219,6 +204,7 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None: connector.save_declaration( Declaration( method=normalizer_meta.normalizer.id, + source_method=normalizer_meta.raw_data.boefje_meta.boefje.id, ooi=declaration.ooi, task_id=normalizer_meta.id, valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, @@ -229,12 +215,30 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None: connector.save_affirmation( Affirmation( method=normalizer_meta.normalizer.id, + source_method=normalizer_meta.raw_data.boefje_meta.boefje.id, ooi=affirmation.ooi, task_id=normalizer_meta.id, valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, ) ) + if ( + normalizer_meta.raw_data.boefje_meta.input_ooi # No input OOI means no deletion propagation + and not (results.observations or results.declarations or results.affirmations) + ): + # There were no results found, which we still need to signal to Octopoes for deletion propagation + + connector.save_observation( + Observation( + method=normalizer_meta.normalizer.id, + source=Reference.from_str(normalizer_meta.raw_data.boefje_meta.input_ooi), + source_method=normalizer_meta.raw_data.boefje_meta.boefje.id, + task_id=normalizer_meta.id, + valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, + result=[], + ) + ) + corrected_scan_profiles = [] for profile in results.scan_profiles: profile.level = ScanLevel( diff --git a/boefjes/boefjes/job_models.py b/boefjes/boefjes/job_models.py index 9d1a0e49aca..8e419b79541 100644 --- a/boefjes/boefjes/job_models.py +++ b/boefjes/boefjes/job_models.py @@ -1,10 +1,10 @@ -from datetime import timedelta +from datetime import datetime, timedelta from typing import Annotated, Literal, TypeAlias from uuid import UUID from pydantic import AwareDatetime, BaseModel, Field, StringConstraints -from octopoes.models import DeclaredScanProfile, PrimaryKeyToken +from octopoes.models import DeclaredScanProfile from octopoes.models.types import OOIType @@ -85,6 +85,7 @@ class NormalizerObservation(BaseModel): class NormalizerDeclaration(BaseModel): type: Literal["declaration"] = "declaration" ooi: OOIType + end_valid_time: datetime | None = None class NormalizerAffirmation(BaseModel): @@ -100,5 +101,3 @@ class NormalizerResults(BaseModel): NormalizerOutput: TypeAlias = OOIType | NormalizerDeclaration | NormalizerAffirmation | DeclaredScanProfile -SerializedOOIValue: TypeAlias = None | str | int | float | dict[str, str | PrimaryKeyToken] | list["SerializedOOIValue"] -SerializedOOI: TypeAlias = dict[str, SerializedOOIValue] diff --git a/boefjes/boefjes/katalogus/dependencies/organisations.py b/boefjes/boefjes/katalogus/dependencies/organisations.py deleted file mode 100644 index 46c03a936c3..00000000000 --- a/boefjes/boefjes/katalogus/dependencies/organisations.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging -from collections.abc import Iterator - -from boefjes.katalogus.storage.interfaces import OrganisationStorage -from boefjes.sql.db import session_managed_iterator -from boefjes.sql.organisation_storage import create_organisation_storage - -logger = logging.getLogger(__name__) - - -def get_organisations_store() -> Iterator[OrganisationStorage]: - yield from session_managed_iterator(create_organisation_storage) diff --git a/boefjes/boefjes/katalogus/api/organisations.py b/boefjes/boefjes/katalogus/organisations.py similarity index 88% rename from boefjes/boefjes/katalogus/api/organisations.py rename to boefjes/boefjes/katalogus/organisations.py index fcb68e0f99f..3f8d8e6cc84 100644 --- a/boefjes/boefjes/katalogus/api/organisations.py +++ b/boefjes/boefjes/katalogus/organisations.py @@ -1,9 +1,9 @@ from fastapi import APIRouter, Depends, HTTPException, status -from boefjes.katalogus.dependencies.organisations import get_organisations_store -from boefjes.katalogus.models import Organisation -from boefjes.katalogus.storage.interfaces import OrganisationNotFound, OrganisationStorage +from boefjes.models import Organisation from boefjes.sql.db import ObjectNotFoundException +from boefjes.sql.organisation_storage import get_organisations_store +from boefjes.storage.interfaces import OrganisationNotFound, OrganisationStorage router = APIRouter(prefix="/organisations", tags=["organisations"]) diff --git a/boefjes/boefjes/katalogus/api/plugins.py b/boefjes/boefjes/katalogus/plugins.py similarity index 90% rename from boefjes/boefjes/katalogus/api/plugins.py rename to boefjes/boefjes/katalogus/plugins.py index 7b639a88ba0..6ecc4b031db 100644 --- a/boefjes/boefjes/katalogus/api/plugins.py +++ b/boefjes/boefjes/katalogus/plugins.py @@ -3,18 +3,19 @@ from fastapi import APIRouter, Body, Depends, HTTPException, status from fastapi.responses import FileResponse, JSONResponse, Response -from pydantic import BaseModel, Field +from jsonschema.validators import Draft202012Validator +from pydantic import BaseModel, Field, field_validator -from boefjes.katalogus.api.organisations import check_organisation_exists -from boefjes.katalogus.dependencies.plugins import ( +from boefjes.dependencies.plugins import ( PluginService, get_pagination_parameters, get_plugin_service, get_plugins_filter_parameters, ) -from boefjes.katalogus.models import FilterParameters, PaginationParameters, PluginType -from boefjes.katalogus.storage.interfaces import PluginStorage +from boefjes.katalogus.organisations import check_organisation_exists +from boefjes.models import FilterParameters, PaginationParameters, PluginType from boefjes.sql.plugin_storage import get_plugin_storage +from boefjes.storage.interfaces import PluginStorage router = APIRouter( prefix="/organisations/{organisation_id}", @@ -90,6 +91,8 @@ def get_plugin( @router.post("/plugins", status_code=status.HTTP_201_CREATED) def add_plugin(plugin: PluginType, plugin_service: PluginService = Depends(get_plugin_service)): with plugin_service as service: + plugin.static = False # Creation through the API implies that these cannot be static + if plugin.type == "boefje": return service.create_boefje(plugin) @@ -120,13 +123,22 @@ class BoefjeIn(BaseModel): version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) + schema: dict | None = None oci_image: str | None = None oci_arguments: list[str] = Field(default_factory=list) + @field_validator("schema") + @classmethod + def json_schema_valid(cls, schema: dict | None) -> dict | None: + if schema is not None: + Draft202012Validator.check_schema(schema) + return schema + + return None + @router.patch("/boefjes/{boefje_id}", status_code=status.HTTP_204_NO_CONTENT) def update_boefje( @@ -154,7 +166,6 @@ class NormalizerIn(BaseModel): version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) consumes: list[str] = Field(default_factory=list) # mime types (and/ or boefjes) produces: list[str] = Field(default_factory=list) # oois diff --git a/boefjes/boefjes/katalogus/api/root.py b/boefjes/boefjes/katalogus/root.py similarity index 66% rename from boefjes/boefjes/katalogus/api/root.py rename to boefjes/boefjes/katalogus/root.py index 0b3a7ac2bae..8aa0c1683c5 100644 --- a/boefjes/boefjes/katalogus/api/root.py +++ b/boefjes/boefjes/katalogus/root.py @@ -2,8 +2,10 @@ import logging.config from typing import Any +import structlog from fastapi import APIRouter, FastAPI, Request, status from fastapi.responses import JSONResponse, RedirectResponse +from jsonschema.exceptions import SchemaError from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor @@ -15,15 +17,35 @@ from pydantic import BaseModel, Field from boefjes.config import settings -from boefjes.katalogus.api import organisations, plugins -from boefjes.katalogus.api import settings as settings_router -from boefjes.katalogus.storage.interfaces import NotFound, StorageError +from boefjes.katalogus import organisations, plugins +from boefjes.katalogus import settings as settings_router from boefjes.katalogus.version import __version__ +from boefjes.storage.interfaces import NotAllowed, NotFound, StorageError with settings.log_cfg.open() as f: logging.config.dictConfig(json.load(f)) -logger = logging.getLogger(__name__) +structlog.configure( + processors=[ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.StackInfoRenderer(), + structlog.dev.set_exc_info, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper("iso", utc=False), + ( + structlog.dev.ConsoleRenderer(pad_level=False) + if settings.logging_format == "text" + else structlog.processors.JSONRenderer() + ), + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger(__name__) app = FastAPI(title="KAT-alogus API", version=__version__) @@ -59,6 +81,14 @@ def entity_not_found_handler(request: Request, exc: NotFound): ) +@app.exception_handler(NotAllowed) +def not_allowed_handler(request: Request, exc: NotAllowed): + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content={"message": exc.message}, + ) + + @app.exception_handler(StorageError) def storage_error_handler(request: Request, exc: StorageError): return JSONResponse( @@ -67,6 +97,14 @@ def storage_error_handler(request: Request, exc: StorageError): ) +@app.exception_handler(SchemaError) +def schema_error_handler(request: Request, exc: StorageError): + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content={"message": "Invalid jsonschema provided"}, + ) + + class ServiceHealth(BaseModel): service: str healthy: bool = False diff --git a/boefjes/boefjes/katalogus/api/settings.py b/boefjes/boefjes/katalogus/settings.py similarity index 85% rename from boefjes/boefjes/katalogus/api/settings.py rename to boefjes/boefjes/katalogus/settings.py index 7a710a92711..b40223bbd92 100644 --- a/boefjes/boefjes/katalogus/api/settings.py +++ b/boefjes/boefjes/katalogus/settings.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, Depends -from boefjes.katalogus.api.organisations import check_organisation_exists -from boefjes.katalogus.dependencies.plugins import PluginService, get_plugin_service +from boefjes.dependencies.plugins import PluginService, get_plugin_service +from boefjes.katalogus.organisations import check_organisation_exists router = APIRouter( prefix="/organisations/{organisation_id}/{plugin_id}/settings", diff --git a/boefjes/boefjes/katalogus/storage/memory.py b/boefjes/boefjes/katalogus/storage/memory.py deleted file mode 100644 index d89e972ca32..00000000000 --- a/boefjes/boefjes/katalogus/storage/memory.py +++ /dev/null @@ -1,106 +0,0 @@ -from boefjes.katalogus.models import Boefje, Normalizer, Organisation, PluginType -from boefjes.katalogus.storage.interfaces import ( - OrganisationStorage, - PluginEnabledStorage, - PluginStorage, - SettingsStorage, -) - -# key = organisation id; value = organisation -organisations: dict[str, Organisation] = {} - -# key = organisation, repository/plugin id; value = enabled/ disabled -plugins_state: dict[str, dict[str, bool]] = {} - - -class OrganisationStorageMemory(OrganisationStorage): - def __init__(self, defaults: dict[str, Organisation] | None = None): - self._data = organisations if defaults is None else defaults - - def get_by_id(self, organisation_id: str) -> Organisation: - return self._data[organisation_id] - - def get_all(self) -> dict[str, Organisation]: - return self._data - - def create(self, organisation: Organisation) -> None: - self._data[organisation.id] = organisation - - def delete_by_id(self, organisation_id: str) -> None: - del self._data[organisation_id] - - -class PluginStorageMemory(PluginStorage): - def __init__(self): - self._boefjes = {} - self._normalizers = {} - - def get_all(self) -> list[PluginType]: - return list(self._boefjes.values()) + list(self._normalizers.values()) - - def boefje_by_id(self, boefje_id: str) -> Boefje: - return self._boefjes[boefje_id] - - def normalizer_by_id(self, normalizer_id: str) -> Normalizer: - return self._normalizers[normalizer_id] - - def create_boefje(self, boefje: Boefje) -> None: - self._boefjes[boefje.id] = boefje - - def create_normalizer(self, normalizer: Normalizer) -> None: - self._normalizers[normalizer.id] = normalizer - - def delete_boefje_by_id(self, boefje_id: str) -> None: - del self._boefjes[boefje_id] - - def delete_normalizer_by_id(self, normalizer_id: str) -> None: - del self._normalizers[normalizer_id] - - -class SettingsStorageMemory(SettingsStorage): - def __init__(self): - self._data = {} - - def get_all(self, organisation_id: str, plugin_id: str) -> dict[str, str]: - if organisation_id not in self._data: - return {} - - return self._data[organisation_id].get(plugin_id, {}) - - def upsert(self, values: dict, organisation_id: str, plugin_id: str) -> None: - if organisation_id not in self._data: - self._data[organisation_id] = {} - - if plugin_id not in self._data[organisation_id]: - self._data[organisation_id][plugin_id] = {} - - self._data[organisation_id][plugin_id] = values - - def delete(self, organisation_id: str, plugin_id: str) -> None: - del self._data[organisation_id][plugin_id] - - -class PluginStatesStorageMemory(PluginEnabledStorage): - def __init__( - self, - organisation: str, - defaults: dict[str, bool] | None = None, - ): - self._data = plugins_state.setdefault(organisation, {}) if defaults is None else defaults - self._organisation = organisation - - def get_by_id(self, plugin_id: str, organisation_id: str) -> bool: - return self._data[f"{organisation_id}.{plugin_id}"] - - def get_all_enabled(self, organisation_id: str) -> list[str]: - return [ - key.split(".", maxsplit=1)[1] - for key, value in self._data.items() - if value and key.split(".", maxsplit=1)[0] == organisation_id - ] - - def create(self, plugin_id: str, enabled: bool, organisation_id: str) -> None: - self._data[f"{organisation_id}.{plugin_id}"] = enabled - - def update_or_create_by_id(self, plugin_id: str, enabled: bool, organisation_id: str) -> None: - self._data[f"{organisation_id}.{plugin_id}"] = enabled diff --git a/boefjes/boefjes/local.py b/boefjes/boefjes/local.py index 7581423dbee..e743c9b9de9 100644 --- a/boefjes/boefjes/local.py +++ b/boefjes/boefjes/local.py @@ -1,7 +1,8 @@ -import logging import os from collections.abc import Iterable +import structlog + from boefjes.job_models import ( BoefjeMeta, InvalidReturnValueNormalizer, @@ -13,11 +14,11 @@ NormalizerResults, ObservationsWithoutInputOOI, ) -from boefjes.katalogus.local_repository import LocalPluginRepository +from boefjes.local_repository import LocalPluginRepository from boefjes.runtime_interfaces import BoefjeJobRunner, JobRuntimeError, NormalizerJobRunner from octopoes.models import OOI, DeclaredScanProfile -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class TemporaryEnvironment: diff --git a/boefjes/boefjes/katalogus/local_repository.py b/boefjes/boefjes/local_repository.py similarity index 92% rename from boefjes/boefjes/katalogus/local_repository.py rename to boefjes/boefjes/local_repository.py index 3e9071123f8..a29fb92ba35 100644 --- a/boefjes/boefjes/katalogus/local_repository.py +++ b/boefjes/boefjes/local_repository.py @@ -1,21 +1,23 @@ import json -import logging import pkgutil from pathlib import Path from typing import Any -from boefjes.katalogus.models import PluginType +import structlog + +from boefjes.models import PluginType from boefjes.plugins.models import ( BOEFJE_DEFINITION_FILE, BOEFJES_DIR, ENTRYPOINT_NORMALIZERS, NORMALIZER_DEFINITION_FILE, + SCHEMA_FILE, BoefjeResource, ModuleException, NormalizerResource, ) -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) class LocalPluginRepository: @@ -51,10 +53,10 @@ def schema(self, id_: str) -> dict | None: if id_ not in boefjes: return None - path = boefjes[id_].path / "schema.json" + path = boefjes[id_].path / SCHEMA_FILE if not path.exists(): - logger.debug("Did not find schema for boefje %s", boefjes[id_]) + logger.debug("Did not find schema for boefje %s", id_) return None return json.loads(path.read_text()) @@ -133,14 +135,14 @@ def _find_packages_in_path_containing_files(self, required_files: list[str]) -> for package in pkgutil.walk_packages([str(self.path)], prefix): if not package.ispkg: - logging.debug("%s is not a package", package.name) + logger.debug("%s is not a package", package.name) continue path = self.path / package.name.replace(prefix, "").replace(".", "/") missing_files = [file for file in required_files if not (path / file).exists()] if missing_files: - logging.debug("Files %s not found for %s", missing_files, package.name) + logger.debug("Files %s not found for %s", missing_files, package.name) continue paths.append((path, package.name)) diff --git a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py new file mode 100644 index 00000000000..2cd63145aa5 --- /dev/null +++ b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py @@ -0,0 +1,62 @@ +"""Introduce schema field to Boefje model + +Revision ID: 5be152459a7b +Revises: f9de6eb7824b +Create Date: 2024-08-08 14:47:12.582017 + +""" + +import logging + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.orm import sessionmaker + +from boefjes.local_repository import get_local_repository +from boefjes.sql.plugin_storage import create_plugin_storage +from boefjes.storage.interfaces import PluginNotFound + +# revision identifiers, used by Alembic. +revision = "5be152459a7b" +down_revision = "f9de6eb7824b" +branch_labels = None +depends_on = None + +logger = logging.getLogger(__name__) + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("boefje", sa.Column("schema", sa.JSON(), nullable=True)) + + local_repo = get_local_repository() + session = sessionmaker(bind=op.get_bind())() + + with create_plugin_storage(session) as storage: + plugins = local_repo.get_all() + logger.info("Found %s plugins", len(plugins)) + + for plugin in local_repo.get_all(): + schema = local_repo.schema(plugin.id) + + if schema: + try: + # This way we avoid the safeguard that updating static boefjes is not allowed + instance = storage._db_boefje_instance_by_id(plugin.id) + instance.schema = schema + storage.session.add(instance) + logger.info("Updated database entry for plugin %s", plugin.id) + except PluginNotFound: + logger.info("No database entry for plugin %s", plugin.id) + continue + else: + logger.info("No schema present for plugin %s", plugin.id) + + session.close() + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("boefje", "schema") + # ### end Alembic commands ### diff --git a/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py new file mode 100644 index 00000000000..7bdfbd9e024 --- /dev/null +++ b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py @@ -0,0 +1,37 @@ +"""Remove environment keys field + +Revision ID: 870fc302b852 +Revises: 5be152459a7b +Create Date: 2024-08-20 06:08:20.943924 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "870fc302b852" +down_revision = "5be152459a7b" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("boefje", "environment_keys") + op.drop_column("normalizer", "environment_keys") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "normalizer", + sa.Column("environment_keys", postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False), + ) + op.add_column( + "boefje", + sa.Column("environment_keys", postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False), + ) + # ### end Alembic commands ### diff --git a/boefjes/boefjes/migrations/versions/cd34fdfafdaf_json_settings_for_settings_table.py b/boefjes/boefjes/migrations/versions/cd34fdfafdaf_json_settings_for_settings_table.py index 7351d140501..f76286dee3c 100644 --- a/boefjes/boefjes/migrations/versions/cd34fdfafdaf_json_settings_for_settings_table.py +++ b/boefjes/boefjes/migrations/versions/cd34fdfafdaf_json_settings_for_settings_table.py @@ -13,8 +13,8 @@ from sqlalchemy.engine import Connection from sqlalchemy.orm import sessionmaker +from boefjes.sql.config_storage import create_encrypter from boefjes.sql.db import get_engine -from boefjes.sql.setting_storage import create_encrypter # revision identifiers, used by Alembic. revision = "cd34fdfafdaf" diff --git a/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py new file mode 100644 index 00000000000..d46f360b703 --- /dev/null +++ b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py @@ -0,0 +1,277 @@ +"""Introduce BoefjeConfig model + +Revision ID: f9de6eb7824b +Revises: 6f99834a4a5a +Create Date: 2024-05-31 10:45:16.474714 + +""" + +import logging + +import sqlalchemy as sa +from alembic import op +from psycopg2._json import Json +from psycopg2.extensions import register_adapter +from psycopg2.extras import execute_values + +from boefjes.local_repository import get_local_repository +from boefjes.models import Boefje, Normalizer + +# revision identifiers, used by Alembic. +revision = "f9de6eb7824b" +down_revision = "6f99834a4a5a" +branch_labels = None +depends_on = None + + +logger = logging.getLogger(__name__) + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "boefje_config", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("settings", sa.String(length=512), nullable=False, server_default="{}"), + sa.Column("enabled", sa.Boolean(), nullable=False, server_default="false"), + sa.Column("boefje_id", sa.Integer(), nullable=False), + sa.Column("organisation_pk", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["boefje_id"], ["boefje.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["organisation_pk"], ["organisation.pk"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("organisation_pk", "boefje_id", name="unique_boefje_config_per_organisation_per_boefje"), + ) + op.create_table( + "normalizer_config", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("enabled", sa.Boolean(), server_default="false", nullable=False), + sa.Column("normalizer_id", sa.Integer(), nullable=False), + sa.Column("organisation_pk", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["normalizer_id"], ["normalizer.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["organisation_pk"], ["organisation.pk"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "organisation_pk", "normalizer_id", name="unique_normalizer_config_per_organisation_per_normalizer" + ), + ) + + op.add_column("boefje", sa.Column("static", sa.Boolean(), server_default="false", nullable=False)) + op.add_column("normalizer", sa.Column("static", sa.Boolean(), server_default="false", nullable=False)) + + register_adapter(dict, Json) + + local_plugins = {plugin.id: plugin for plugin in get_local_repository().get_all()} + connection = op.get_bind() + + # Get unique plugin_ids from the settings table for boefjes that do not exist yet in the database + query = """ + SELECT DISTINCT s.plugin_id FROM settings s left join boefje b on b.plugin_id = s.plugin_id + where b.plugin_id IS NULL + """ # noqa: S608 + + to_insert: list[Boefje] = [] + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + raise ValueError(f"Invalid plugin id found: {plugin_id}") + + # Since settings are boefje-only at this moment + if local_plugins[plugin_id].type != "boefje": + raise ValueError(f"Settings for normalizer or bit found: {plugin_id}. Remove these entries first.") + + res = connection.execute(f"SELECT id FROM boefje where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Boefje already exists + + if local_plugins[plugin_id].type == "boefje": + to_insert.append(local_plugins[plugin_id]) + + entries = [ + ( + boefje.id, + boefje.name, + boefje.description, + str(boefje.scan_level), + list(boefje.consumes), + list(boefje.produces), + ["TEST_KEY"], + boefje.oci_image, + boefje.oci_arguments, + boefje.version, + ) + for boefje in to_insert + ] + query = """INSERT INTO boefje (plugin_id, name, description, scan_level, consumes, produces, environment_keys, + oci_image, oci_arguments, version) values %s""" + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, entries) + + to_insert = [] + + query = """ + SELECT DISTINCT p.plugin_id FROM plugin_state p left join boefje b on b.plugin_id = p.plugin_id + where b.plugin_id IS NULL + """ + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) + continue + + res = connection.execute(f"SELECT id FROM boefje where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Boefje already exists + + if local_plugins[plugin_id].type == "boefje": + to_insert.append(local_plugins[plugin_id]) + + entries = [ + ( + boefje.id, + boefje.name, + boefje.description, + str(boefje.scan_level), + list(boefje.consumes), + list(boefje.produces), + ["TEST_KEY"], + boefje.oci_image, + boefje.oci_arguments, + boefje.version, + ) + for boefje in to_insert + ] + query = """INSERT INTO boefje (plugin_id, name, description, scan_level, consumes, produces, environment_keys, + oci_image, oci_arguments, version) values %s""" # noqa: S608 + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, entries) + + normalizers_to_insert: list[Normalizer] = [] + query = """ + SELECT DISTINCT p.plugin_id FROM plugin_state p left join normalizer n on n.plugin_id = p.plugin_id + where n.plugin_id IS NULL + """ # noqa: S608 + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) + continue + + res = connection.execute(f"SELECT id FROM normalizer where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Normalizer already exists + + if local_plugins[plugin_id].type == "normalizer": + normalizers_to_insert.append(local_plugins[plugin_id]) + + normalizer_entries = [ + ( + normalizer.id, + normalizer.name, + normalizer.description, + normalizer.consumes, + normalizer.produces, + ["TEST_KEY"], + normalizer.version, + ) + for normalizer in normalizers_to_insert + ] + query = """INSERT INTO normalizer (plugin_id, name, description, consumes, produces, environment_keys, version) + values %s""" # noqa: S608 + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, normalizer_entries) + + with connection.begin(): + connection.execute(""" + INSERT INTO boefje_config (settings, boefje_id, organisation_pk) + SELECT s.values, b.id, s.organisation_pk from settings s + join boefje b on s.plugin_id = b.plugin_id + """) # Add boefjes and set the settings for boefjes + + with connection.begin(): + connection.execute(""" + INSERT INTO boefje_config (enabled, boefje_id, organisation_pk) + SELECT p.enabled, b.id, p.organisation_pk FROM plugin_state p + JOIN boefje b ON p.plugin_id = b.plugin_id + LEFT JOIN boefje_config bc ON bc.boefje_id = b.id WHERE bc.boefje_id IS NULL + """) # Add boefjes and set the enabled field for boefjes that to not exist yet + connection.execute(""" + UPDATE boefje_config bc SET enabled = p.enabled from plugin_state p + JOIN boefje b ON p.plugin_id = b.plugin_id + where b.id = bc.boefje_id and p.organisation_pk = bc.organisation_pk + """) # Set the enabled field for boefjes + connection.execute(""" + UPDATE normalizer_config nc SET enabled = p.enabled from plugin_state p + JOIN normalizer n ON p.plugin_id = n.plugin_id + where n.id = nc.normalizer_id and p.organisation_pk = nc.organisation_pk + """) # Set the enabled field for normalizers + + op.drop_table("settings") + op.drop_table("plugin_state") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("normalizer", "static") + op.drop_column("boefje", "static") + + op.create_table( + "settings", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("values", sa.VARCHAR(length=512), autoincrement=False, nullable=False), + sa.Column("plugin_id", sa.VARCHAR(length=64), autoincrement=False, nullable=False), + sa.Column("organisation_pk", sa.INTEGER(), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint( + ["organisation_pk"], ["organisation.pk"], name="settings_organisation_pk_fkey", ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id", name="settings_pkey"), + sa.UniqueConstraint("organisation_pk", "plugin_id", name="unique_settings_per_organisation_per_plugin"), + ) + op.create_table( + "plugin_state", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("plugin_id", sa.VARCHAR(length=64), autoincrement=False, nullable=False), + sa.Column("enabled", sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column("organisation_pk", sa.INTEGER(), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint( + ["organisation_pk"], ["organisation.pk"], name="plugin_state_organisation_pk_fkey", ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id", name="plugin_state_pkey"), + sa.UniqueConstraint("plugin_id", "organisation_pk", name="unique_plugin_id_per_org"), + ) + + connection = op.get_bind() + with connection.begin(): + connection.execute(""" + INSERT INTO settings (values, plugin_id, organisation_pk) + SELECT bc.settings, b.plugin_id, bc.organisation_pk from boefje_config bc + join boefje b on bc.boefje_id = b.id + """) + + with connection.begin(): + connection.execute(""" + INSERT INTO plugin_state (enabled, plugin_id, organisation_pk) + SELECT bc.enabled, b.plugin_id, bc.organisation_pk from boefje_config bc + join boefje b on bc.boefje_id = b.id + """) + + with connection.begin(): + connection.execute(""" + INSERT INTO plugin_state (enabled, plugin_id, organisation_pk) + SELECT nc.enabled, n.plugin_id, nc.organisation_pk from normalizer_config nc + join normalizer n on nc.normalizer_id = n.id + """) + + op.drop_table("boefje_config") + op.drop_table("normalizer_config") + + # ### end Alembic commands ### diff --git a/boefjes/boefjes/katalogus/models.py b/boefjes/boefjes/models.py similarity index 79% rename from boefjes/boefjes/katalogus/models.py rename to boefjes/boefjes/models.py index 69d8cc2c637..58665b8588a 100644 --- a/boefjes/boefjes/katalogus/models.py +++ b/boefjes/boefjes/models.py @@ -2,7 +2,8 @@ from enum import Enum from typing import Literal -from pydantic import BaseModel, Field +from jsonschema.validators import Draft202012Validator +from pydantic import BaseModel, Field, field_validator class Organisation(BaseModel): @@ -12,11 +13,10 @@ class Organisation(BaseModel): class Plugin(BaseModel): id: str - name: str | None = None + name: str version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) enabled: bool = False static: bool = True # We need to differentiate between local and remote plugins to know which ones can be deleted @@ -29,10 +29,21 @@ class Boefje(Plugin): scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) + schema: dict | None = None runnable_hash: str | None = None oci_image: str | None = None oci_arguments: list[str] = Field(default_factory=list) + @field_validator("schema") + @classmethod + def json_schema_valid(cls, schema: dict) -> dict: + if schema is not None: + Draft202012Validator.check_schema(schema) + return schema + + class Config: + validate_assignment = True + class Normalizer(Plugin): type: Literal["normalizer"] = "normalizer" diff --git a/boefjes/boefjes/plugins/kat_adr_finding_types/boefje.json b/boefjes/boefjes/plugins/kat_adr_finding_types/boefje.json index e53d47f3982..93c2ae8ef7d 100644 --- a/boefjes/boefjes/plugins/kat_adr_finding_types/boefje.json +++ b/boefjes/boefjes/plugins/kat_adr_finding_types/boefje.json @@ -1,7 +1,7 @@ { "id": "adr-finding-types", "name": "ADR Finding Types", - "description": "Hydrate information of ADR finding types", + "description": "Hydrate information on API Design Rules (ADR) finding types for common design mistakes.", "consumes": [ "ADRFindingType" ], diff --git a/boefjes/boefjes/plugins/kat_adr_finding_types/normalizer.json b/boefjes/boefjes/plugins/kat_adr_finding_types/normalizer.json index 583b7714bed..fabda504805 100644 --- a/boefjes/boefjes/plugins/kat_adr_finding_types/normalizer.json +++ b/boefjes/boefjes/plugins/kat_adr_finding_types/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_adr_finding_types_normalize", + "name": "API Design Rules (ADR) Finding Types", + "description": "Parse API Design Rules (ADR) finding types.", "consumes": [ "boefje/adr-finding-types" ], diff --git a/boefjes/boefjes/plugins/kat_adr_validator/boefje.json b/boefjes/boefjes/plugins/kat_adr_validator/boefje.json index b782dbb5b5b..9d43ee60519 100644 --- a/boefjes/boefjes/plugins/kat_adr_validator/boefje.json +++ b/boefjes/boefjes/plugins/kat_adr_validator/boefje.json @@ -1,7 +1,7 @@ { "id": "adr-validator", "name": "API Design Rules validator", - "description": "Validate if an API conforms to the API Design Rules", + "description": "Validate if an API conforms to the API Design Rules (ADR).", "consumes": [ "RESTAPI" ], diff --git a/boefjes/boefjes/plugins/kat_adr_validator/normalizer.json b/boefjes/boefjes/plugins/kat_adr_validator/normalizer.json index 52c21e9a03e..f840cded2ad 100644 --- a/boefjes/boefjes/plugins/kat_adr_validator/normalizer.json +++ b/boefjes/boefjes/plugins/kat_adr_validator/normalizer.json @@ -1,5 +1,7 @@ { "id": "adr-validator-normalize", + "name": "API Design Rules validator", + "description": "TODO", "consumes": [ "boefje/adr-validator" ], diff --git a/boefjes/boefjes/plugins/kat_answer_parser/normalizer.json b/boefjes/boefjes/plugins/kat_answer_parser/normalizer.json index 41a89a217b0..922b333697f 100644 --- a/boefjes/boefjes/plugins/kat_answer_parser/normalizer.json +++ b/boefjes/boefjes/plugins/kat_answer_parser/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_answer_parser", + "name": "Answer Parser", + "description": "Parses the answers from Config objects.", "consumes": [ "answer" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/boefje.json b/boefjes/boefjes/plugins/kat_binaryedge/boefje.json index 004015e0570..e7d90e4ee98 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/boefje.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/boefje.json @@ -1,13 +1,10 @@ { "id": "binaryedge", "name": "BinaryEdge", - "description": "Use BinaryEdge to find open ports with vulnerabilities that are found on that port", + "description": "Use BinaryEdge to find open ports with vulnerabilities. Requires a BinaryEdge API key.", "consumes": [ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "BINARYEDGE_API" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_binaryedge/containers/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/containers/normalizer.json index 46a034d0d1c..086ce350160 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/containers/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/containers/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_containers", + "name": "BinaryEdge containers", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/databases/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/databases/normalizer.json index 22fd81eb927..2af3f47f891 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/databases/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/databases/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_databases", + "name": "BinaryEdge databases", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/http_web/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/http_web/normalizer.json index f0e5825f36d..f5cafc7560a 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/http_web/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/http_web/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_http_web", + "name": "BinaryEdge Websites", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/message_queues/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/message_queues/normalizer.json index 15ea3e250b0..caa59b56f4b 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/message_queues/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/message_queues/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_message_queues", + "name": "BinaryEdge message queues", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/protocols/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/protocols/normalizer.json index 34f17a681c1..30d0f02963e 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/protocols/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/protocols/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_protocols", + "name": "BinaryEdge protocols", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/remote_desktop/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/remote_desktop/normalizer.json index c28180a88c7..80e1837a499 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/remote_desktop/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/remote_desktop/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_remote_desktop", + "name": "Binary Edge remote desktop", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/service_identification/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/service_identification/normalizer.json index eaea2744052..d451a79b150 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/service_identification/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/service_identification/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_service_identification", + "name": "BinaryEdge service identification", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_binaryedge/services/normalizer.json b/boefjes/boefjes/plugins/kat_binaryedge/services/normalizer.json index b2671be67a1..57a0f8dac16 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/services/normalizer.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/services/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_binaryedge_services", + "name": "BinaryEdge services", "consumes": [ "boefje/binaryedge" ], diff --git a/boefjes/boefjes/plugins/kat_burpsuite/normalizer.json b/boefjes/boefjes/plugins/kat_burpsuite/normalizer.json index 44c8b40ab3e..c0b88e6a857 100644 --- a/boefjes/boefjes/plugins/kat_burpsuite/normalizer.json +++ b/boefjes/boefjes/plugins/kat_burpsuite/normalizer.json @@ -1,7 +1,7 @@ { "id": "kat_burpsuite_normalize", "name": "Burpsuite normalizer", - "description": "Parses Burpsuite XML output (reports). Check https://docs.openkat.nl on how to create the XML file.", + "description": "Parses Burpsuite XML output into findings. Check https://docs.openkat.nl/manual/normalizers.html#burp-suite on how to create the XML file.", "consumes": [ "xml/burp-export" ], diff --git a/boefjes/boefjes/plugins/kat_calvin/normalizer.json b/boefjes/boefjes/plugins/kat_calvin/normalizer.json index 601433e8681..c596dbdcd4a 100644 --- a/boefjes/boefjes/plugins/kat_calvin/normalizer.json +++ b/boefjes/boefjes/plugins/kat_calvin/normalizer.json @@ -1,5 +1,7 @@ { "id": "calvin-normalize", + "name": "Calvin", + "description": "Produces applications and incidents for Calvin.", "consumes": [ "boefje/calvin" ], diff --git a/boefjes/boefjes/plugins/kat_censys/boefje.json b/boefjes/boefjes/plugins/kat_censys/boefje.json index e8c15547c76..ef6c3ab9a67 100644 --- a/boefjes/boefjes/plugins/kat_censys/boefje.json +++ b/boefjes/boefjes/plugins/kat_censys/boefje.json @@ -1,14 +1,10 @@ { "id": "censys", "name": "Censys", - "description": "Use Censys to discover open ports, services and certificates", + "description": "Use Censys to discover open ports, services and certificates. Requires and API key.", "consumes": [ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "CENSYS_API_ID", - "CENSYS_API_SECRET" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_censys/normalizer.json b/boefjes/boefjes/plugins/kat_censys/normalizer.json index 446c55cd485..809fc7d7174 100644 --- a/boefjes/boefjes/plugins/kat_censys/normalizer.json +++ b/boefjes/boefjes/plugins/kat_censys/normalizer.json @@ -1,5 +1,6 @@ { "id": "kat_censys_normalize", + "name": "Censys", "consumes": [ "boefje/censys" ], diff --git a/boefjes/boefjes/plugins/kat_crt_sh/boefje.json b/boefjes/boefjes/plugins/kat_crt_sh/boefje.json index 72051dbb411..f9aa67e604e 100644 --- a/boefjes/boefjes/plugins/kat_crt_sh/boefje.json +++ b/boefjes/boefjes/plugins/kat_crt_sh/boefje.json @@ -1,7 +1,7 @@ { "id": "certificate-search", "name": "CRT", - "description": "Certificate search", + "description": "Searches for certificates and new hostnames in the transparency logs of crt.sh.", "consumes": [ "DNSZone" ], diff --git a/boefjes/boefjes/plugins/kat_crt_sh/normalize.py b/boefjes/boefjes/plugins/kat_crt_sh/normalize.py index aba1315df12..3c430005fb9 100644 --- a/boefjes/boefjes/plugins/kat_crt_sh/normalize.py +++ b/boefjes/boefjes/plugins/kat_crt_sh/normalize.py @@ -16,7 +16,6 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: current = fqdn.lstrip(".") network = Network(name="internet") - yield network network_reference = network.reference unique_domains = set() diff --git a/boefjes/boefjes/plugins/kat_crt_sh/normalizer.json b/boefjes/boefjes/plugins/kat_crt_sh/normalizer.json index 5fd671f9719..130bd2b8301 100644 --- a/boefjes/boefjes/plugins/kat_crt_sh/normalizer.json +++ b/boefjes/boefjes/plugins/kat_crt_sh/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_crt_sh_normalize", + "name": "Certificate Transparency logs (crt.sh)", + "description": "Parses data from certificate transparency logs (crt.sh) into hostnames and X509 certificates.", "consumes": [ "boefje/certificate-search" ], diff --git a/boefjes/boefjes/plugins/kat_cve_2023_34039/boefje.json b/boefjes/boefjes/plugins/kat_cve_2023_34039/boefje.json index 9c82a08ec66..0b0f6b6dc6e 100644 --- a/boefjes/boefjes/plugins/kat_cve_2023_34039/boefje.json +++ b/boefjes/boefjes/plugins/kat_cve_2023_34039/boefje.json @@ -1,7 +1,7 @@ { "id": "CVE-2023-34039", - "name": "CVE_2023_34039", - "description": "Check to see if known keys are usable on VMware CVE-2023-34039", + "name": "CVE-2023-34039 - VMware Aria Operations", + "description": "Checks if there are static SSH keys present that can be used for remote code execution on VWware Aria Operations (CVE-2023-34039). This vulnerability can be used to bypass SSH authentication and gain access to the Aria Operations for Networks CLI.", "consumes": [ "IPService" ], diff --git a/boefjes/boefjes/plugins/kat_cve_2023_34039/main.py b/boefjes/boefjes/plugins/kat_cve_2023_34039/main.py index f6e580e1468..d5bd7f4795a 100644 --- a/boefjes/boefjes/plugins/kat_cve_2023_34039/main.py +++ b/boefjes/boefjes/plugins/kat_cve_2023_34039/main.py @@ -57,7 +57,8 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, str | bytes]]: "\n".join( (str(coutput), f"{key_file} is allowed access to vRealize Network Insight on {ip}:{port}") ), - ) + ), + ({"openkat/finding"}, "CVE-2023-34039"), ] except Exception: # noqa: S112 diff --git a/boefjes/boefjes/plugins/kat_cve_2023_34039/normalize.py b/boefjes/boefjes/plugins/kat_cve_2023_34039/normalize.py deleted file mode 100644 index b379e8158f1..00000000000 --- a/boefjes/boefjes/plugins/kat_cve_2023_34039/normalize.py +++ /dev/null @@ -1,19 +0,0 @@ -from collections.abc import Iterable - -from boefjes.job_models import NormalizerOutput -from octopoes.models import Reference -from octopoes.models.ooi.findings import CVEFindingType, Finding - - -def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: - ooi = Reference.from_str(input_ooi["primary_key"]) - - if "is allowed access to vRealize Network Insight " in raw.decode(): - finding_type = CVEFindingType(id="CVE-2023-34039") - finding = Finding( - finding_type=finding_type.reference, - ooi=ooi, - description="Service is most likely vulnerable to CVE-2023-34039", - ) - yield finding_type - yield finding diff --git a/boefjes/boefjes/plugins/kat_cve_2023_34039/normalizer.json b/boefjes/boefjes/plugins/kat_cve_2023_34039/normalizer.json deleted file mode 100644 index 4cbb1bddda9..00000000000 --- a/boefjes/boefjes/plugins/kat_cve_2023_34039/normalizer.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "id": "kat_cve_2023_normalize", - "consumes": [ - "boefje/CVE-2023-34039" - ], - "produces": [ - "Finding", - "CVEFindingType" - ] -} diff --git a/boefjes/boefjes/plugins/kat_cve_2023_35078/boefje.json b/boefjes/boefjes/plugins/kat_cve_2023_35078/boefje.json index 07525502bf3..52c93d41450 100644 --- a/boefjes/boefjes/plugins/kat_cve_2023_35078/boefje.json +++ b/boefjes/boefjes/plugins/kat_cve_2023_35078/boefje.json @@ -1,7 +1,7 @@ { "id": "CVE_2023_35078", - "name": "CVE_2023_35078", - "description": "Use NFIR script to find CVE-2023-35078", + "name": "CVE-2023-35078 - Ivanti EPMM", + "description": "Checks websites for the presents of the Ivanti EPMM interface and whether the interface is vulnerable to the remote unauthenticated API access vulnerability (CVE-2023-35078). Script contribution by NFIR.", "consumes": [ "Website" ], diff --git a/boefjes/boefjes/plugins/kat_cve_2023_35078/normalizer.json b/boefjes/boefjes/plugins/kat_cve_2023_35078/normalizer.json index 0b7413eede9..c735560eae4 100644 --- a/boefjes/boefjes/plugins/kat_cve_2023_35078/normalizer.json +++ b/boefjes/boefjes/plugins/kat_cve_2023_35078/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_CVE_2023_35078_normalize", + "name": "CVE-2023-35078 Ivanti EPMM", + "description": "Checks if the Ivanti EPMM website is vulnerable to CVE-2023-35078. Produces a finding if it is vulnerable.", "consumes": [ "boefje/CVE_2023_35078" ], diff --git a/boefjes/boefjes/katalogus/dependencies/__init__.py b/boefjes/boefjes/plugins/kat_cve_2024_6387/__init__.py similarity index 100% rename from boefjes/boefjes/katalogus/dependencies/__init__.py rename to boefjes/boefjes/plugins/kat_cve_2024_6387/__init__.py diff --git a/boefjes/boefjes/plugins/kat_cve_2024_6387/normalize.py b/boefjes/boefjes/plugins/kat_cve_2024_6387/normalize.py new file mode 100644 index 00000000000..0948823cecc --- /dev/null +++ b/boefjes/boefjes/plugins/kat_cve_2024_6387/normalize.py @@ -0,0 +1,68 @@ +""" +CVE-2024-6387 checker +Author: Mischa van Geelen <@rickgeex> + +""" + +from collections.abc import Iterable + +from boefjes.job_models import NormalizerOutput +from octopoes.models import Reference +from octopoes.models.ooi.findings import CVEFindingType, Finding +from packaging.version import Version + +VULNERABLE_VERSIONS = [ + "SSH-2.0-OpenSSH_8.5", + "SSH-2.0-OpenSSH_8.6", + "SSH-2.0-OpenSSH_8.7", + "SSH-2.0-OpenSSH_8.8", + "SSH-2.0-OpenSSH_8.9", + "SSH-2.0-OpenSSH_9.0", + "SSH-2.0-OpenSSH_9.1", + "SSH-2.0-OpenSSH_9.2", + "SSH-2.0-OpenSSH_9.3", + "SSH-2.0-OpenSSH_9.4", + "SSH-2.0-OpenSSH_9.5", + "SSH-2.0-OpenSSH_9.6", + "SSH-2.0-OpenSSH_9.7", +] + + +def is_vulnerable(banner: str) -> bool: + if not any(version in banner for version in VULNERABLE_VERSIONS): + return False + + if banner.startswith("SSH-2.0-OpenSSH_9.2p1 Debian-2+deb12u"): + _, security_update = banner.split("deb12u") + if Version(security_update) >= Version("3"): + return False + elif banner.startswith("SSH-2.0-OpenSSH_9.6p1 Ubuntu-3ubuntu"): + _, security_update = banner.split("3ubuntu") + if Version(security_update) >= Version("13.3"): + return False + elif banner.startswith("SSH-2.0-OpenSSH_9.3p1 Ubuntu-1ubuntu"): + _, security_update = banner.split("1ubuntu") + if Version(security_update) >= Version("3.6"): + return False + elif banner.startswith("SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu"): + _, security_update = banner.split("3ubuntu") + if Version(security_update) >= Version("0.10"): + return False + + return True + + +def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: + ooi = Reference.from_str(input_ooi["primary_key"]) + + banner = raw.decode() + + if banner.startswith("SSH-2.0-OpenSSH") and is_vulnerable(banner): + finding_type = CVEFindingType(id="CVE-2024-6387") + finding = Finding( + finding_type=finding_type.reference, + ooi=ooi, + description="Service is most likely vulnerable to CVE-2024-6387", + ) + yield finding_type + yield finding diff --git a/boefjes/boefjes/plugins/kat_cve_2024_6387/normalizer.json b/boefjes/boefjes/plugins/kat_cve_2024_6387/normalizer.json new file mode 100644 index 00000000000..0e06b9d2362 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_cve_2024_6387/normalizer.json @@ -0,0 +1,12 @@ +{ + "id": "kat_cve_2024_6387_normalize", + "name": "CVE-2024-6387 OpenSSH", + "description": "Checks the service banner for a race condition in OpenSSH server which can result in an unauthenticated remote attacker to trigger that some signals are handled in an unsafe manner (CVE-2024-6387). Requires the Service-Banner-boefje to be enabled.", + "consumes": [ + "openkat/service-banner" + ], + "produces": [ + "Finding", + "CVEFindingType" + ] +} diff --git a/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json b/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json index 2b390197290..f1315d93c33 100644 --- a/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json +++ b/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json @@ -1,13 +1,10 @@ { "id": "cve-finding-types", "name": "CVE Finding Types", - "description": "Hydrate information of CVE finding types from the CVE API", + "description": "Hydrate information of Common Vulnerabilities and Exposures (CVE) finding types from the CVE API", "consumes": [ "CVEFindingType" ], - "environment_keys": [ - "CVEAPI_URL" - ], "scan_level": 0, "enabled": true } diff --git a/boefjes/boefjes/plugins/kat_cve_finding_types/normalizer.json b/boefjes/boefjes/plugins/kat_cve_finding_types/normalizer.json index 6e2d52291aa..6ae5590562d 100644 --- a/boefjes/boefjes/plugins/kat_cve_finding_types/normalizer.json +++ b/boefjes/boefjes/plugins/kat_cve_finding_types/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_cve_finding_types_normalize", + "name": "CVE finding types", + "description": "Parses CVE findings.", "consumes": [ "boefje/cve-finding-types" ], diff --git a/boefjes/boefjes/plugins/kat_cwe_finding_types/boefje.json b/boefjes/boefjes/plugins/kat_cwe_finding_types/boefje.json index a3656aa48c6..abeeaa7d9d0 100644 --- a/boefjes/boefjes/plugins/kat_cwe_finding_types/boefje.json +++ b/boefjes/boefjes/plugins/kat_cwe_finding_types/boefje.json @@ -1,7 +1,7 @@ { "id": "cwe-finding-types", "name": "CWE Finding Types", - "description": "Hydrate information of CWE finding types", + "description": "Hydrate information of Common Weakness Enumeration (CWE) finding types", "consumes": [ "CWEFindingType" ], diff --git a/boefjes/boefjes/plugins/kat_cwe_finding_types/normalizer.json b/boefjes/boefjes/plugins/kat_cwe_finding_types/normalizer.json index 7b19ddd4c99..9b939d07df5 100644 --- a/boefjes/boefjes/plugins/kat_cwe_finding_types/normalizer.json +++ b/boefjes/boefjes/plugins/kat_cwe_finding_types/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_cwe_finding_types_normalize", + "name": "CWE finding", + "description": "Parses CWE findings.", "consumes": [ "boefje/cwe-finding-types" ], diff --git a/boefjes/boefjes/plugins/kat_dicom/boefje.json b/boefjes/boefjes/plugins/kat_dicom/boefje.json index 437829787a9..6cfd4e76498 100644 --- a/boefjes/boefjes/plugins/kat_dicom/boefje.json +++ b/boefjes/boefjes/plugins/kat_dicom/boefje.json @@ -1,7 +1,7 @@ { "id": "dicom", "name": "DICOM", - "description": "Find exposed DICOM servers.", + "description": "Find exposed DICOM servers. DICOM servers are used to process medical imaging information.", "consumes": [ "IPAddressV4", "IPAddressV6" diff --git a/boefjes/boefjes/plugins/kat_dicom/normalizer.json b/boefjes/boefjes/plugins/kat_dicom/normalizer.json index b8e5f1dd49c..74519e6e96c 100644 --- a/boefjes/boefjes/plugins/kat_dicom/normalizer.json +++ b/boefjes/boefjes/plugins/kat_dicom/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_dicom_normalize", + "name": "DICOM servers", + "description": "Parses DICOM output into findings and identified software.", "consumes": [ "boefje/dicom" ], diff --git a/boefjes/boefjes/plugins/kat_dns/boefje.json b/boefjes/boefjes/plugins/kat_dns/boefje.json index 76c36ae1775..5773364b9b6 100644 --- a/boefjes/boefjes/plugins/kat_dns/boefje.json +++ b/boefjes/boefjes/plugins/kat_dns/boefje.json @@ -1,13 +1,9 @@ { "id": "dns-records", - "name": "DnsRecords", - "description": "Fetch the DNS record(s) of a hostname", + "name": "DNS records", + "description": "Fetch the DNS record(s) of a hostname.", "consumes": [ "Hostname" ], - "environment_keys": [ - "RECORD_TYPES", - "REMOTE_NS" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_dns/main.py b/boefjes/boefjes/plugins/kat_dns/main.py index 2db21a40685..ce01870911f 100644 --- a/boefjes/boefjes/plugins/kat_dns/main.py +++ b/boefjes/boefjes/plugins/kat_dns/main.py @@ -6,6 +6,7 @@ from os import getenv import dns.resolver +from dns.edns import EDEOption from dns.name import Name from dns.resolver import Answer @@ -28,6 +29,10 @@ } +class TimeoutException(Exception): + pass + + class ZoneNotFoundException(Exception): pass @@ -48,6 +53,9 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]: requested_dns_name = dns.name.from_text(hostname) resolver = dns.resolver.Resolver() + # https://dnspython.readthedocs.io/en/stable/_modules/dns/edns.html + # enable EDE to get the DNSSEC Bogus return values if the server supports it # codespell-ignore + resolver.use_edns(options=[EDEOption(15)]) nameserver = getenv("REMOTE_NS", "1.1.1.1") resolver.nameservers = [nameserver] @@ -76,6 +84,8 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]: "dmarc_response": get_email_security_records(resolver, hostname, "_dmarc"), "dkim_response": get_email_security_records(resolver, hostname, "_domainkey"), } + if not answers_formatted and results["dmarc_response"] == "Timeout" and results["dmarc_response"] == "Timeout": + raise TimeoutException("No answers from DNS-Server due to timeouts.") return [(set(), json.dumps(results))] @@ -96,6 +106,16 @@ def get_email_security_records(resolver: dns.resolver.Resolver, hostname: str, r try: answer = resolver.resolve(f"{record_subdomain}.{hostname}", "TXT", raise_on_no_answer=False) return answer.response.to_text() + except dns.resolver.NoNameservers as error: + # no servers responded happily, we'll check the response from the first + # https://dnspython.readthedocs.io/en/latest/_modules/dns/rcode.html + # https://www.rfc-editor.org/rfc/rfc8914#name-extended-dns-error-code-6-d + firsterror = error.kwargs["errors"][0] + if firsterror[3] == "SERVFAIL": + edeerror = int(firsterror[4].options[0].code) + if edeerror in (1, 2, 5, 6, 7, 8, 9, 10, 11, 12): # DNSSEC error codes defined in RFC 8914 + return "DNSSECFAIL" # returned when the resolver indicates a DNSSEC failure. + raise # Not dnssec related, unhandled, raise. except dns.resolver.NXDOMAIN: return "NXDOMAIN" except dns.resolver.Timeout: diff --git a/boefjes/boefjes/plugins/kat_dns/normalize.py b/boefjes/boefjes/plugins/kat_dns/normalize.py index ab8dda9a799..5dd3912b268 100644 --- a/boefjes/boefjes/plugins/kat_dns/normalize.py +++ b/boefjes/boefjes/plugins/kat_dns/normalize.py @@ -170,7 +170,7 @@ def register_record(record: DNSRecord) -> DNSRecord: # DKIM dkim_results = results["dkim_response"] - if dkim_results not in ["NXDOMAIN", "Timeout"] and dkim_results.split("\n")[2] == "rcode NOERROR": + if dkim_results not in ["NXDOMAIN", "Timeout", "DNSSECFAIL"] and dkim_results.split("\n")[2] == "rcode NOERROR": yield DKIMExists( hostname=input_hostname.reference, ) diff --git a/boefjes/boefjes/plugins/kat_dns/normalizer.json b/boefjes/boefjes/plugins/kat_dns/normalizer.json index e4a2316eda0..fa9c8a73fa6 100644 --- a/boefjes/boefjes/plugins/kat_dns/normalizer.json +++ b/boefjes/boefjes/plugins/kat_dns/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_dns_normalize", + "name": "DNS records", + "description": "Parses the DNS records.", "consumes": [ "boefje/dns-records" ], diff --git a/boefjes/boefjes/katalogus/storage/__init__.py b/boefjes/boefjes/plugins/kat_dns_version/__init__.py similarity index 100% rename from boefjes/boefjes/katalogus/storage/__init__.py rename to boefjes/boefjes/plugins/kat_dns_version/__init__.py diff --git a/boefjes/boefjes/plugins/kat_dns_version/boefje.json b/boefjes/boefjes/plugins/kat_dns_version/boefje.json new file mode 100644 index 00000000000..3aa66ca3cfd --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/boefje.json @@ -0,0 +1,9 @@ +{ + "id": "dns-bind-version", + "name": "DNS software version", + "description": "Uses the DNS VERSION.BIND command to attempt to learn the servers software.", + "consumes": [ + "IPService" + ], + "scan_level": 2 +} diff --git a/boefjes/boefjes/plugins/kat_dns_version/description.md b/boefjes/boefjes/plugins/kat_dns_version/description.md new file mode 100644 index 00000000000..5ac8b8ea5f2 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/description.md @@ -0,0 +1,3 @@ +# Fetch DNS Server software version + +This boefje tries to detect the DNS Server version by doing a VERSION.BIND call. diff --git a/boefjes/boefjes/plugins/kat_dns_version/main.py b/boefjes/boefjes/plugins/kat_dns_version/main.py new file mode 100644 index 00000000000..40631e61f69 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/main.py @@ -0,0 +1,42 @@ +"""Boefje script for getting namserver version""" + +import json +from os import getenv + +import dns +import dns.message +import dns.query + +from boefjes.job_models import BoefjeMeta + + +def run(boefje_meta: BoefjeMeta) -> list[tuple[set, str | bytes]]: + input_ = boefje_meta.arguments["input"] # input is IPService + ip_port = input_["ip_port"] + if input_["service"]["name"] != "domain": + return [({"boefje/error"}, "Not a DNS service")] + + ip = ip_port["address"]["address"] + port = int(ip_port["port"]) + protocol = ip_port["protocol"] + + timeout = float(getenv("TIMEOUT", 30)) + + method = dns.query.udp if protocol == "udp" else dns.query.tcp + + queries = [ + dns.message.make_query("VERSION.BIND", dns.rdatatype.TXT, dns.rdataclass.CHAOS), + dns.message.make_query("VERSION.SERVER", dns.rdatatype.TXT, dns.rdataclass.CHAOS), + ] + + results = [] + for query in queries: + response = method(query, where=ip, timeout=timeout, port=port) + + try: + answer = response.answer[0] + results.append(answer.to_rdataset().pop().strings[0].decode()) + except IndexError: + pass + + return [(set(), json.dumps(results))] diff --git a/boefjes/boefjes/plugins/kat_dns_version/normalize.py b/boefjes/boefjes/plugins/kat_dns_version/normalize.py new file mode 100644 index 00000000000..b3e805cc1c5 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/normalize.py @@ -0,0 +1,36 @@ +import json +from collections.abc import Iterable + +from boefjes.job_models import NormalizerOutput +from octopoes.models import Reference +from octopoes.models.ooi.software import Software, SoftwareInstance + + +def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: + input_ooi_reference = Reference.from_str(input_ooi["primary_key"]) + + results = json.loads(raw) + for version in results: + if version.startswith("bind"): + name = "bind" + version_number = version.split("-")[1] + elif version.startswith("9."): + name = "bind" + version_number = version + elif version.startswith("Microsoft DNS"): + name = "Microsoft DNS" + version_number = version.replace("Microsoft DNS ", "").split(" ")[0] + elif version.startswith("dnsmasq"): + name = "dnsmasq" + version_number = version.split("-")[1] + elif version.startswith("PowerDNS"): + name = "PowerDNS" + version_number = version.replace("PowerDNS Authoritative Server ", "").split(" ")[0] + else: + name = None + version_number = None + + if name and version_number: + software = Software(name=name, version=version_number) + software_instance = SoftwareInstance(ooi=input_ooi_reference, software=software.reference) + yield from [software, software_instance] diff --git a/boefjes/boefjes/plugins/kat_dns_version/normalizer.json b/boefjes/boefjes/plugins/kat_dns_version/normalizer.json new file mode 100644 index 00000000000..4bd2cad202f --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/normalizer.json @@ -0,0 +1,11 @@ +{ + "id": "dns-bind-version-normalize", + "name": "DNS bind version normalizer", + "consumes": [ + "boefje/dns-bind-version" + ], + "produces": [ + "Software", + "SoftwareInstance" + ] +} diff --git a/boefjes/boefjes/plugins/kat_dns_version/schema.json b/boefjes/boefjes/plugins/kat_dns_version/schema.json new file mode 100644 index 00000000000..6a9fbe29348 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_dns_version/schema.json @@ -0,0 +1,13 @@ +{ + "title": "Arguments", + "type": "object", + "properties": { + "TIMEOUT": { + "title": "TIMEOUT", + "type": "integer", + "description": "Timeout for requests to the targeted dns servers", + "default": 30, + "minimum": 0 + } + } +} diff --git a/boefjes/boefjes/plugins/kat_dns_zone/boefje.json b/boefjes/boefjes/plugins/kat_dns_zone/boefje.json index 25df0af7bcd..cc03e079bd1 100644 --- a/boefjes/boefjes/plugins/kat_dns_zone/boefje.json +++ b/boefjes/boefjes/plugins/kat_dns_zone/boefje.json @@ -1,6 +1,6 @@ { "id": "dns-zone", - "name": "DnsZone", + "name": "DNS zone", "description": "Fetch the parent DNS zone of a DNS zone", "consumes": [ "DNSZone" diff --git a/boefjes/boefjes/plugins/kat_dns_zone/normalizer.json b/boefjes/boefjes/plugins/kat_dns_zone/normalizer.json index c4060c833ec..e9e156f6d2c 100644 --- a/boefjes/boefjes/plugins/kat_dns_zone/normalizer.json +++ b/boefjes/boefjes/plugins/kat_dns_zone/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_dns_zone_normalize", + "name": "DNS zone", + "description": "Parses the parent DNS zone into new hostnames and DNS zones.", "consumes": [ "boefje/dns-zone" ], diff --git a/boefjes/boefjes/plugins/kat_dnssec/boefje.json b/boefjes/boefjes/plugins/kat_dnssec/boefje.json index 7b59b0fae25..8b4d156396e 100644 --- a/boefjes/boefjes/plugins/kat_dnssec/boefje.json +++ b/boefjes/boefjes/plugins/kat_dnssec/boefje.json @@ -1,6 +1,6 @@ { "id": "dns-sec", - "name": "Dnssec", + "name": "DNSSEC", "description": "Validates DNSSec of a hostname", "consumes": [ "Hostname" diff --git a/boefjes/boefjes/plugins/kat_dnssec/normalizer.json b/boefjes/boefjes/plugins/kat_dnssec/normalizer.json index 24877c2e897..670f16592f4 100644 --- a/boefjes/boefjes/plugins/kat_dnssec/normalizer.json +++ b/boefjes/boefjes/plugins/kat_dnssec/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_dnssec_normalize", + "name": "DNS records", + "description": "Parses DNSSEC data into findings.", "consumes": [ "boefje/dns-sec" ], diff --git a/boefjes/boefjes/plugins/kat_external_db/boefje.json b/boefjes/boefjes/plugins/kat_external_db/boefje.json index 1f27e7f9d2e..cbf7ee0c927 100644 --- a/boefjes/boefjes/plugins/kat_external_db/boefje.json +++ b/boefjes/boefjes/plugins/kat_external_db/boefje.json @@ -1,16 +1,9 @@ { "id": "external_db", - "name": "External Database", - "description": "Fetch hostnames and IP addresses/netblocks from an external database with API. See `description.md` for more information.", + "name": "External database host fetcher", + "description": "Fetch hostnames and IP addresses/netblocks from an external database with API. See `description.md` for more information. Useful if you have a large network.", "consumes": [ "Network" ], - "environment_keys": [ - "DB_URL", - "DB_ACCESS_TOKEN", - "DB_ORGANIZATION_IDENTIFIER", - "DB_ENDPOINT_FORMAT", - "REQUESTS_CA_BUNDLE" - ], "scan_level": 0 } diff --git a/boefjes/boefjes/plugins/kat_external_db/description.md b/boefjes/boefjes/plugins/kat_external_db/description.md index f40bffd9733..cd69fa4fb6a 100644 --- a/boefjes/boefjes/plugins/kat_external_db/description.md +++ b/boefjes/boefjes/plugins/kat_external_db/description.md @@ -33,4 +33,4 @@ For example: } ``` -The expected ip and domain (item) key lists can be configured in `normalize.py`. Ranges are expected as strings in CIDR notation. Clearance level for fetched items is set to `L0`. Reference implementation of the API server is in the works. +The expected ip and domain (item) key lists can be configured in `normalize.py`. Ranges are expected as strings in CIDR notation. Clearance level for fetched items is set to `L3` when `BOEFJES_SCAN_PROFILE_WHITELIST='{"kat_external_db_normalize": 3}'` is added to the `.env` file otherwise it is set to `L0`. Reference implementation of the API server is in the works. diff --git a/boefjes/boefjes/plugins/kat_external_db/normalize.py b/boefjes/boefjes/plugins/kat_external_db/normalize.py index e1b7fdb99c9..71595001f92 100644 --- a/boefjes/boefjes/plugins/kat_external_db/normalize.py +++ b/boefjes/boefjes/plugins/kat_external_db/normalize.py @@ -17,6 +17,8 @@ IP_ADDRESS_ITEM_PATH = ["address"] DOMAIN_LIST_PATH = ["domains"] DOMAIN_ITEM_PATH = ["name"] +INDEMNIFICATION_ITEM_PATH = ["indemnification_level"] +DEFAULT_INDEMNIFICATION_LEVEL = 3 def follow_path_in_dict(path, path_dict): @@ -29,6 +31,18 @@ def follow_path_in_dict(path, path_dict): return path_dict +def get_indemnification_level(path_dict): + """Return indemnification level from metadata or default.""" + try: + indemnification_level = int(follow_path_in_dict(path=INDEMNIFICATION_ITEM_PATH, path_dict=path_dict)) + if 0 <= indemnification_level < 5: + return indemnification_level + raise ValueError(f"Invalid indemnificationlevel {indemnification_level}, aborting.") + except KeyError: + logging.info("No integer indemnification level found, using default.") + return DEFAULT_INDEMNIFICATION_LEVEL + + def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: """Yields hostnames, IPv4/6 addresses or netblocks.""" results = json.loads(raw) @@ -37,6 +51,7 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: for address_item in follow_path_in_dict(path=IP_ADDRESS_LIST_PATH, path_dict=results): interface = ip_interface(follow_path_in_dict(path=IP_ADDRESS_ITEM_PATH, path_dict=address_item)) + indemnification_level = get_indemnification_level(path_dict=address_item) address, mask_str = interface.with_prefixlen.split("/") mask = int(mask_str) @@ -50,7 +65,7 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: ip_address = address_type(address=address, network=network.reference) yield ip_address - yield DeclaredScanProfile(reference=ip_address.reference, level=3) + yield DeclaredScanProfile(reference=ip_address.reference, level=indemnification_level) addresses_count += 1 if mask < interface.ip.max_prefixlen: @@ -60,15 +75,17 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: network=network.reference, ) yield block - yield DeclaredScanProfile(reference=block.reference, level=3) + yield DeclaredScanProfile(reference=block.reference, level=indemnification_level) blocks_count += 1 - for hostname in follow_path_in_dict(path=DOMAIN_LIST_PATH, path_dict=results): + for hostname_data in follow_path_in_dict(path=DOMAIN_LIST_PATH, path_dict=results): hostname = Hostname( - name=follow_path_in_dict(path=DOMAIN_ITEM_PATH, path_dict=hostname), network=network.reference + name=follow_path_in_dict(path=DOMAIN_ITEM_PATH, path_dict=hostname_data), network=network.reference ) yield hostname - yield DeclaredScanProfile(reference=hostname.reference, level=3) + yield DeclaredScanProfile( + reference=hostname.reference, level=get_indemnification_level(path_dict=hostname_data) + ) hostnames_count += 1 logging.info( diff --git a/boefjes/boefjes/plugins/kat_external_db/normalizer.json b/boefjes/boefjes/plugins/kat_external_db/normalizer.json index 36d425db438..2d9e72d56e9 100644 --- a/boefjes/boefjes/plugins/kat_external_db/normalizer.json +++ b/boefjes/boefjes/plugins/kat_external_db/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_external_db_normalize", + "name": "External database hosts fetcher", + "description": "Parse data the fetched host data from the external database into hostnames and IP-addresses.", "consumes": [ "boefje/external_db" ], diff --git a/boefjes/boefjes/plugins/kat_fierce/boefje.json b/boefjes/boefjes/plugins/kat_fierce/boefje.json index c198875c8a1..1f7d5c677db 100644 --- a/boefjes/boefjes/plugins/kat_fierce/boefje.json +++ b/boefjes/boefjes/plugins/kat_fierce/boefje.json @@ -1,9 +1,9 @@ { "id": "fierce", "name": "Fierce", - "description": "Use a Fierce scan to find subdomains (with their ip)", + "description": "Perform DNS reconnaissance using Fierce, to help locate non-contiguous IP space and hostnames against specified hostnames. No exploitation is performed.", "consumes": [ "Hostname" ], - "scan_level": 3 + "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_fierce/normalizer.json b/boefjes/boefjes/plugins/kat_fierce/normalizer.json index 536944b4995..82589b6565d 100644 --- a/boefjes/boefjes/plugins/kat_fierce/normalizer.json +++ b/boefjes/boefjes/plugins/kat_fierce/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_fierce_normalize", + "name": "Fierce", + "description": "Parse the DNS reconnaissance data from Fierce into hostnames and/or IP addresses.", "consumes": [ "boefje/fierce" ], diff --git a/boefjes/boefjes/katalogus/tests/__init__.py b/boefjes/boefjes/plugins/kat_finding_normalizer/__init__.py similarity index 100% rename from boefjes/boefjes/katalogus/tests/__init__.py rename to boefjes/boefjes/plugins/kat_finding_normalizer/__init__.py diff --git a/boefjes/boefjes/plugins/kat_finding_normalizer/normalize.py b/boefjes/boefjes/plugins/kat_finding_normalizer/normalize.py new file mode 100644 index 00000000000..e00cc7d08fb --- /dev/null +++ b/boefjes/boefjes/plugins/kat_finding_normalizer/normalize.py @@ -0,0 +1,38 @@ +import re +from collections.abc import Iterable + +from boefjes.job_models import NormalizerOutput +from octopoes.models import Reference +from octopoes.models.ooi.findings import CVEFindingType, Finding, KATFindingType, RetireJSFindingType, SnykFindingType + +CVE_PATTERN = re.compile(r"CVE-\d{4}-\d{4,}") + + +def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]: + ooi = Reference.from_str(input_ooi["primary_key"]) + finding_ids_str = raw.decode() + finding_ids_list = [fid.strip().upper() for fid in finding_ids_str.split(",")] + + finding_type_mapping = { + "CVE": CVEFindingType, + "KAT": KATFindingType, + "SNYK": SnykFindingType, + "RETIREJS": RetireJSFindingType, + } + + for finding_id in finding_ids_list: + parts = finding_id.split("-") + prefix = parts[0] + + if prefix in finding_type_mapping: + if prefix == "CVE" and not CVE_PATTERN.match(finding_id): + raise ValueError(f"{finding_id} is not a valid CVE ID") + + finding_type = finding_type_mapping[prefix](id=finding_id) + finding = Finding( + finding_type=finding_type.reference, + ooi=ooi, + description=f"{finding_id} is found on this OOI", + ) + yield finding_type + yield finding diff --git a/boefjes/boefjes/plugins/kat_finding_normalizer/normalizer.json b/boefjes/boefjes/plugins/kat_finding_normalizer/normalizer.json new file mode 100644 index 00000000000..70adfd46c47 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_finding_normalizer/normalizer.json @@ -0,0 +1,11 @@ +{ + "id": "kat_generic_finding_normalize", + "name": "Finding types", + "consumes": [ + "openkat/finding" + ], + "produces": [ + "Finding", + "CVEFindingType" + ] +} diff --git a/boefjes/boefjes/plugins/kat_green_hosting/boefje.json b/boefjes/boefjes/plugins/kat_green_hosting/boefje.json index 9fe34d17ae8..846b05efa33 100644 --- a/boefjes/boefjes/plugins/kat_green_hosting/boefje.json +++ b/boefjes/boefjes/plugins/kat_green_hosting/boefje.json @@ -1,7 +1,7 @@ { "id": "green-hosting", "name": "GreenHosting", - "description": "Use the Green Web Foundation Partner API to check whether the website is hosted on a green server. Meaning it runs on renewable energy and/or offsets its carbon footprint", + "description": "Use the Green Web Foundation Partner API to check whether the website is hosted on a green server. Meaning it runs on renewable energy and/or offsets its carbon footprint. Does not require an API key.", "consumes": [ "Website" ], diff --git a/boefjes/boefjes/plugins/kat_green_hosting/normalizer.json b/boefjes/boefjes/plugins/kat_green_hosting/normalizer.json index 993413d85e4..714628e5587 100644 --- a/boefjes/boefjes/plugins/kat_green_hosting/normalizer.json +++ b/boefjes/boefjes/plugins/kat_green_hosting/normalizer.json @@ -1,5 +1,7 @@ { "id": "kat_green_hosting_normalize", + "description": "Parses the Green Hosting output into findings.", + "name": "Green Hosting", "consumes": [ "boefje/green-hosting" ], diff --git a/boefjes/boefjes/plugins/kat_kat_finding_types/kat_finding_types.json b/boefjes/boefjes/plugins/kat_kat_finding_types/kat_finding_types.json index 102a0868ebe..00fbbba47a4 100644 --- a/boefjes/boefjes/plugins/kat_kat_finding_types/kat_finding_types.json +++ b/boefjes/boefjes/plugins/kat_kat_finding_types/kat_finding_types.json @@ -21,14 +21,14 @@ "recommendation": "This header is not supported by default by Mozilla. If this header is required for your environment: Set the HTTP header X-Permitted-Cross- Domain-Policies: none in all HTTP responses. Use value master-only if a Flash or Acrobat cross- domain configuration file is used that is placed in the root of the web server" }, "KAT-NO-EXPLICIT-XSS-PROTECTION": { - "description": "This is a deprecated header previously used to prevent against Cross-Site-Scripting attacks. Support in modern browsers could introduce XSS attacks again.", + "description": "The 'X-XSS-Protection' header is a deprecated header previously used to prevent against Cross-Site-Scripting attacks. Support in modern browsers could introduce XSS attacks again.", "source": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection", "risk": "recommendation", "impact": "Reflected cross-site scripting attacks may not be blocked.", - "recommendation": "This header is deprecated and should not be used." + "recommendation": "Remove the deprecated header to reduce the chance of XSS attacks." }, "KAT-NO-X-FRAME-OPTIONS": { - "description": "HTTP header 'X-Frame-Options' is missing. It is possible that the website can be loaded via an