Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert better match info #315

Merged
merged 2 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions alembic/versions/587c186d91ee_better_match_information.py

This file was deleted.

23 changes: 0 additions & 23 deletions alembic/versions/a62a93704798_add_distributions.py

This file was deleted.

3 changes: 1 addition & 2 deletions docs/source/database_schema.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ Database Schema
pending_by text,
finished_by text,
commit_hash text,
fail_reason text,
files jsonb
fail_reason text
);

ALTER TABLE ONLY public.download_urls
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,3 @@ omit = [

[tool.coverage.report]
fail_under = 100
exclude_also = ["if TYPE_CHECKING:"]
1 change: 0 additions & 1 deletion src/mainframe/endpoints/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def submit_results(
scan.score = result.score
scan.finished_by = auth.subject
scan.commit_hash = result.commit
scan.distributions = result.distributions

# These are the rules that already have an entry in the database
rules = session.scalars(select(Rule).where(Rule.name.in_(result.rules_matched))).all()
Expand Down
26 changes: 0 additions & 26 deletions src/mainframe/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1 @@
"""Database models."""

from typing import Optional, Any, Type
from pydantic import BaseModel
from sqlalchemy import Dialect, TypeDecorator
from sqlalchemy.dialects.postgresql import JSONB


class Pydantic[T: BaseModel](TypeDecorator[T]):
"""TypeDecorator to convert between Pydantic models and JSONB."""

impl = JSONB
cache_ok = True

def __init__(self, pydantic_type: Type[T]):
super().__init__()
self.pydantic_type = pydantic_type

def process_bind_param(self, value: Optional[T], dialect: Dialect) -> dict[str, Any]:
if value:
return value.model_dump()
else:
return {}

def process_result_value(self, value: Any, dialect: Dialect) -> Optional[T]:
if value:
return self.pydantic_type.model_validate(value)
5 changes: 0 additions & 5 deletions src/mainframe/models/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@
relationship,
)

from mainframe.models import Pydantic
from mainframe.models.schemas import Distributions


class Base(MappedAsDataclass, DeclarativeBase, kw_only=True):
pass
Expand Down Expand Up @@ -102,8 +99,6 @@ class Scan(Base):

commit_hash: Mapped[Optional[str]] = mapped_column(default=None)

distributions: Mapped[Optional[Distributions]] = mapped_column(Pydantic(Distributions), default=None)


Index(None, Scan.status, postgresql_where=or_(Scan.status == Status.QUEUED, Scan.status == Status.PENDING))

Expand Down
62 changes: 3 additions & 59 deletions src/mainframe/models/schemas.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,10 @@
from __future__ import annotations

import datetime
from enum import Enum
from typing import TYPE_CHECKING, Annotated, Any, Optional

from pydantic import BaseModel, Field, field_serializer, ConfigDict, RootModel

if TYPE_CHECKING:
from mainframe.models.orm import Scan

type MetaValue = int | float | bool | str | bytes


class Range(BaseModel):
"""Represents the inclusive range in the source file that was matched."""

start: int
end: int


class Match(BaseModel):
"""Represents a specific match by a pattern in a rule."""

range: Range
data: list[Annotated[int, Field(ge=0, lt=256)]]


class PatternMatch(BaseModel):
"""Represents the data matched by a pattern inside a rule."""
from typing import Any, Optional

identifier: str
matches: list[Match]
from pydantic import BaseModel, Field, field_serializer, ConfigDict


class RuleMatch(BaseModel):
"""Represents the matches of a rule on a file"""

identifier: str
patterns: list[PatternMatch]
metadata: dict[str, MetaValue]


class File(BaseModel):
"""Represents a file and the rule matches for it."""

path: str
matches: list[RuleMatch]


Files = list[File]


class Distribution(BaseModel):
download_url: str
files: Files


Distributions = RootModel[list[Distribution]]
from .orm import Scan


class ServerMetadata(BaseModel):
Expand Down Expand Up @@ -96,8 +44,6 @@ class Package(BaseModel):

commit_hash: Optional[str]

distributions: Optional[Distributions]

@classmethod
def from_db(cls, scan: Scan):
return cls(
Expand All @@ -118,7 +64,6 @@ def from_db(cls, scan: Scan):
finished_at=scan.finished_at,
finished_by=scan.finished_by,
commit_hash=scan.commit_hash,
distributions=scan.distributions,
)

@field_serializer(
Expand Down Expand Up @@ -187,7 +132,6 @@ class PackageScanResult(PackageSpecifier):
score: int = 0
inspector_url: Optional[str] = None
rules_matched: list[str] = []
distributions: Optional[Distributions] = None


class PackageScanResultFail(PackageSpecifier):
Expand Down
44 changes: 0 additions & 44 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,10 @@
from mainframe.json_web_token import AuthenticationData
from mainframe.models.orm import Scan, Status
from mainframe.models.schemas import (
Distribution,
Distributions,
File,
Files,
Match,
Package,
PackageScanResult,
PackageScanResultFail,
PackageSpecifier,
PatternMatch,
Range,
RuleMatch,
)
from mainframe.rules import Rules

Expand Down Expand Up @@ -88,32 +80,6 @@ def test_package_lookup_rejects_invalid_combinations(
assert e.value.status_code == 400


def test_package_lookup_files(db_session: Session):
"""Test that `lookup_package_info` returns detailed file information."""

range_ = Range(start=0, end=4)
match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF])
pattern = PatternMatch(identifier="$pat", matches=[match])
rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5})
file = File(path="dist1/a/b.py", matches=[rule])
files = Files([file])
distros = Distributions([Distribution(download_url="http://example.com", files=files)])
scan = Scan(
name="abc",
version="1.0.0",
status=Status.FINISHED,
queued_by="remmy",
distributions=distros,
)

with db_session.begin():
db_session.add(scan)

package = lookup_package_info(db_session, name="abc", version="1.0.0")[0]

assert package.distributions == distros


def test_handle_success(db_session: Session, test_data: list[Scan], auth: AuthenticationData, rules_state: Rules):
job = get_jobs(db_session, auth, rules_state, batch=1)

Expand All @@ -122,22 +88,13 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen
name = job.name
version = job.version

range_ = Range(start=0, end=4)
match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF])
pattern = PatternMatch(identifier="$pat", matches=[match])
rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5})
file = File(path="dist1/a/b.py", matches=[rule])
files = Files([file])
distros = Distributions([Distribution(download_url="http://example.com", files=files)])

body = PackageScanResult(
name=job.name,
version=job.version,
commit=rules_state.rules_commit,
score=2,
inspector_url="test inspector url",
rules_matched=["a", "b", "c"],
distributions=distros,
)
submit_results(body, db_session, auth)

Expand All @@ -150,7 +107,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen
assert record.score == 2
assert record.inspector_url == "test inspector url"
assert {rule.name for rule in record.rules} == {"a", "b", "c"}
assert record.distributions == distros
else:
assert all(scan.status != Status.QUEUED for scan in test_data)

Expand Down
Loading