Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed some banana seeds edge case & error catching #72

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions bananalyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
from typing import List
from urllib.parse import urlparse
from botocore.exceptions import ClientError

from bananalyzer import AgentRunner
from bananalyzer.data.banana_seeds import download_mhtml
Expand Down Expand Up @@ -322,14 +323,22 @@ def main() -> int:
print("=======================================================================")
return 0

for example in examples:
missing_mhtml = []
for i, example in enumerate(examples):
if example.mhtml_url is not None:
mhtml_path = get_examples_path() / example.id / "index.mhtml"
if not mhtml_path.exists():
mhtml_str = download_mhtml(example.mhtml_url)
mhtml_path.parent.mkdir(parents=True, exist_ok=False)
with open(mhtml_path, "w") as file:
file.write(mhtml_str)
try:
mhtml_str = download_mhtml(example.mhtml_url)
except ClientError:
missing_mhtml.append(i)
continue
else:
mhtml_path.parent.mkdir(parents=True, exist_ok=False)
with open(mhtml_path, "w") as file:
file.write(mhtml_str)

examples = [e for i, e in enumerate(examples) if i not in missing_mhtml]
Comment on lines +326 to +341
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should just delete the id missing mhtml or handle it directly. Don't think we need handling for this (and could cause us to misunderstand how many tests we really have)


# Load the desired tests
generator = PytestTestGenerator()
Expand Down
13 changes: 7 additions & 6 deletions bananalyzer/data/banana_seeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ def download_examples_from_s3(examples_bucket: str) -> List[Dict[str, Any]]:
if example["fetch_id"] == "":
del example["fetch_id"]

for row in example["evals"][0]["expected"]:
row = {k: v for k, v in row.items() if not k.startswith("__")}
if "context" in row:
for key, value in row["context"].items():
row[key] = value
del row["context"]
if isinstance(example["evals"][0]["expected"], list):
for row in example["evals"][0]["expected"]:
row = {k: v for k, v in row.items() if not k.startswith("__")}
if "context" in row:
for key, value in row["context"].items():
row[key] = value
del row["context"]

examples.append(example)

Expand Down
37 changes: 26 additions & 11 deletions bananalyzer/data/fetch_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,37 @@ class File(BaseModel):
class GovernmentContractSchema(BaseModel):
id: str = Field(description="Unique identifier for the contract")
title: str = Field(description="Title of the contract")
description: Optional[str] = Field(default=None,
description="Description or synopsis field. Combine the solicitation summary and additional instructuions section / process.")
location: Optional[str] = Field(default=None,
description="Location of the issuer. May be a combination of city and state")
type: Optional[str] = Field(default=None,
description="Type of contract. May be placed under `Solicitation Type`, `Opportunity Type`, `Market Type`, etc")
category: Optional[str] = Field(default=None, description="Category the contract falls under if given")
description: Optional[str] = Field(
default=None,
description="Description or synopsis field. Combine the solicitation summary and additional instructuions section / process.",
)
location: Optional[str] = Field(
default=None,
description="Location of the issuer. May be a combination of city and state",
)
type: Optional[str] = Field(
default=None,
description="Type of contract. May be placed under `Solicitation Type`, `Opportunity Type`, `Market Type`, etc",
)
category: Optional[str] = Field(
default=None, description="Category the contract falls under if given"
)

posted_date: Optional[datetime] = Field(default=None)
due_date: Optional[datetime] = Field(default=None)

buyer_name: str = Field(description="Name of the company, organization, or agency that issued the contract")
buyer_name: str = Field(
description="Name of the company, organization, or agency that issued the contract"
)
buyer_contact_name: str = Field(
description="Name of the specific individual that is championing the contract, if available")
buyer_contact_number: Optional[str] = Field(default=None, description="Contact number of the issuer")
buyer_contact_email: Optional[str] = Field(default=None, description="Contact email of the issuer")
description="Name of the specific individual that is championing the contract, if available"
)
buyer_contact_number: Optional[str] = Field(
default=None, description="Contact number of the issuer"
)
buyer_contact_email: Optional[str] = Field(
default=None, description="Contact email of the issuer"
)

attachments: List[File] = Field(
default_factory=list,
Expand Down
Loading