Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #49 : Use the pydantic model type in the dspy Signature #51

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ __pycache__/
*$py.class

# tests
logs/
test_logs/
reports/
test_data/
performance_assessment.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think adding test_data/ and performance_assessment.py into the gitignore was intended (nor desirable).


# VS Code
.vscode
Expand Down
189 changes: 189 additions & 0 deletions performance_assessment.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We forgot to remove this from this PR.

Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import os
import time
import json
import shutil
import datetime
import csv
import tempfile
from dotenv import load_dotenv
from pipeline import analyze, LabelStorage, OCR, GPT
from tests import levenshtein_similarity

ACCURACY_THRESHOLD = 80.0


def extract_leaf_fields(
data: dict | list, parent_key: str = ''
) -> dict[str, str | int | float | bool | None]:
leaves: dict[str, str | int | float | bool | None] = {}

if isinstance(data, dict):
for key, value in data.items():
new_key = f"{parent_key}.{key}" if parent_key else key
if isinstance(value, (dict, list)):
leaves.update(extract_leaf_fields(value, new_key))
else:
leaves[new_key] = value
elif isinstance(data, list):
for index, item in enumerate(data):
new_key = f"{parent_key}[{index}]" if parent_key else f"[{index}]"
if isinstance(item, (dict, list)):
leaves.update(extract_leaf_fields(item, new_key))
else:
leaves[new_key] = item

return leaves


def find_test_cases(labels_folder: str) -> list[tuple[list[str], str]]:
test_cases = []
label_directories = sorted(
os.path.join(labels_folder, directory)
for directory in os.listdir(labels_folder)
if os.path.isdir(os.path.join(labels_folder, directory)) and directory.startswith("label_")
)
if len(label_directories) == 0:
raise FileNotFoundError(f"No label directories found in {labels_folder}")

for label_directory in label_directories:
files = os.listdir(label_directory)
image_paths = [
os.path.join(label_directory, file)
for file in files
if file.lower().endswith((".png", ".jpg"))
]
expected_json_path = os.path.join(label_directory, "expected_output.json")

if not image_paths:
raise FileNotFoundError(f"No image files found in {label_directory}")
if not os.path.exists(expected_json_path):
raise FileNotFoundError(f"Expected output JSON not found in {label_directory}")
test_cases.append((image_paths, expected_json_path))

return test_cases


def calculate_accuracy(
expected_fields: dict[str, str],
actual_fields: dict[str, str]
) -> dict[str, dict[str, str | float]]:
accuracy_results = {}
for field_name, expected_value in expected_fields.items():
actual_value = actual_fields.get(field_name, "FIELD_NOT_FOUND")
if actual_value == "FIELD_NOT_FOUND":
score = 0.0
else:
score = levenshtein_similarity(str(expected_value), str(actual_value))
pass_fail = "Pass" if score >= ACCURACY_THRESHOLD else "Fail"
accuracy_results[field_name] = {
'score': score,
'expected_value': expected_value,
'actual_value': actual_value,
'pass_fail': pass_fail,
}
return accuracy_results


def run_test_case(
test_case_number: int, image_paths: list[str], expected_json_path: str
) -> dict[str, any]:
# Copy images to temporary files to prevent deletion due to LabelStorage behavior
copied_image_paths = []
for image_path in image_paths:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(image_path)[1])
shutil.copy2(image_path, temp_file.name)
copied_image_paths.append(temp_file.name)

# Initialize LabelStorage, OCR, GPT
storage = LabelStorage()
for image_path in copied_image_paths:
storage.add_image(image_path)

ocr = OCR(os.getenv("AZURE_API_ENDPOINT"), os.getenv("AZURE_API_KEY"))
gpt = GPT(
os.getenv("AZURE_OPENAI_ENDPOINT"),
os.getenv("AZURE_OPENAI_KEY"),
os.getenv("AZURE_OPENAI_DEPLOYMENT"),
)

# Run performance test
start_time = time.time()
actual_output = analyze(storage, ocr, gpt) # <-- the `analyse` function deletes the images it processes so we don't need to clean up our image copies
performance = time.time() - start_time

# Process actual output
actual_fields = extract_leaf_fields(json.loads(actual_output.model_dump_json()))

# Load expected output
with open(expected_json_path, 'r') as file:
expected_fields = extract_leaf_fields(json.load(file))

# Calculate accuracy
accuracy_results = calculate_accuracy(expected_fields, actual_fields)

# Return results
return {
'test_case_number': test_case_number,
'performance': performance,
'accuracy_results': accuracy_results,
}


def generate_csv_report(results: list[dict[str, any]]) -> None:
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")
os.makedirs("reports", exist_ok=True)
report_path = os.path.join("reports", f"test_results_{timestamp}.csv")

with open(report_path, mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow([
"Test Case",
"Field Name",
"Pass/Fail",
"Accuracy Score",
"Pipeline Speed (seconds)",
"Expected Value",
"Actual Value",
])

for result in results:
test_case_number = result['test_case_number']
performance = result['performance']
for field_name, data in result['accuracy_results'].items():
writer.writerow([
test_case_number,
field_name,
data['pass_fail'],
f"{data['score']:.2f}",
f"{performance:.4f}",
data['expected_value'],
data['actual_value'],
])
print(f"CSV report generated and saved to: {report_path}")


def main() -> None:
load_dotenv()

# Validate required environment variables
required_vars = [
"AZURE_API_ENDPOINT",
"AZURE_API_KEY",
"AZURE_OPENAI_ENDPOINT",
"AZURE_OPENAI_KEY",
"AZURE_OPENAI_DEPLOYMENT",
]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
raise RuntimeError(f"Missing required environment variables: {', '.join(missing_vars)}")

test_cases = find_test_cases("test_data/labels")
results = []
for idx, (image_paths, expected_json_path) in enumerate(test_cases, 1):
result = run_test_case(idx, image_paths, expected_json_path)
results.append(result)
generate_csv_report(results)


if __name__ == "__main__":
main()
13 changes: 5 additions & 8 deletions pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .gpt import GPT # noqa: F401

import os
import json

Check failure on line 7 in pipeline/__init__.py

View workflow job for this annotation

GitHub Actions / lint-test / lint-test

Ruff (F401)

pipeline/__init__.py:7:8: F401 `json` imported but unused
from datetime import datetime

def save_text_to_file(text: str, output_path: str): # pragma: no cover
Expand Down Expand Up @@ -40,15 +40,12 @@
# Generate inspection from extracted text
prediction = gpt.create_inspection(result.content)

# Logs the results from GPT
save_text_to_file(prediction.inspection, f"{log_dir_path}/{now}.json")
save_text_to_file(prediction.rationale, f"{log_dir_path}/{now}.txt")

# Load a JSON from the text
raw_json = json.loads(prediction.inspection)

# Check the coninspectionity of the JSON
inspection = FertilizerInspection(**raw_json)
inspection = prediction.inspection

# Logs the results from GPT
save_text_to_file(prediction.reasoning, f"{log_dir_path}/{now}.txt")
save_text_to_file(inspection.model_dump_json(indent=2), f"{log_dir_path}/{now}.json")

# Clear the label cache
label_storage.clear()
Expand Down
40 changes: 23 additions & 17 deletions pipeline/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,24 @@

from pipeline.inspection import FertilizerInspection

from phoenix.otel import register
from openinference.instrumentation.dspy import DSPyInstrumentor

tracer_provider = register(
project_name="gpt-ferti", # Default is 'default'
endpoint="http://0.0.0.0:4317", # gRPC endpoint given by Phoenix when starting the server (default is "http://localhost:4317")
)

DSPyInstrumentor().instrument(tracer_provider=tracer_provider)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend exercising caution when adding lines 8-17 to the main branch. If the endpoint isn’t valid (i.e., Phoenix isn’t set up), it WILL cause the entire pipeline to stop functioning (I had that happen to me this morning).

To mitigate this, I suggest either removing the observability code for now or commenting it out by default. We can uncomment it when we need to use traces for debugging.

Additionally, I’m unsure if this feature should have its own issue and PR, or if it’s acceptable to include it in this PR.

SUPPORTED_MODELS = {
"gpt-3.5-turbo": {
"max_token": 12000,
"max_tokens": 12000,
"api_version": "2024-02-01",
"response_format": { "type": "json_object" },
},
"gpt-4o": {
"max_token": None,
"max_tokens": None,
"api_version": "2024-02-15-preview",
"response_format": { "type": "json_object" },
}
Expand All @@ -33,10 +43,9 @@ class ProduceLabelForm(dspy.Signature):
Your response should be accurate, intelligible, information in JSON, and contain all the text from the provided text.
"""

text = dspy.InputField(desc="The text of the fertilizer label extracted using OCR.")
json_schema = dspy.InputField(desc="The JSON schema of the object to be returned.")
requirements = dspy.InputField(desc="The instructions and guidelines to follow.")
inspection = dspy.OutputField(desc="Only a complete JSON.")
text : str = dspy.InputField(desc="The text of the fertilizer label extracted using OCR.")
requirements : str = dspy.InputField(desc="The instructions and guidelines to follow.")
inspection : FertilizerInspection = dspy.OutputField(desc="The inspection results.")

class GPT:
def __init__(self, api_endpoint, api_key, deployment_id):
Expand All @@ -47,21 +56,18 @@ def __init__(self, api_endpoint, api_key, deployment_id):
if not config:
raise ValueError(f"The deployment_id {deployment_id} is not supported.")

self.dspy_client = dspy.AzureOpenAI(
user="fertiscan",
self.lm = dspy.LM(
model=f"azure/{deployment_id}",
api_base=api_endpoint,
api_key=api_key,
deployment_id=deployment_id,
# model_type='text',
api_version=config.get("api_version"),
max_tokens=config.get("max_token"),
response_format=config.get("response_format"),
max_tokens=config["max_tokens"],
api_version=config["api_version"],
# response_format=config["response_format"]
)

def create_inspection(self, text) -> Prediction:
with dspy.context(lm=self.dspy_client, experimental=True):
json_schema = FertilizerInspection.model_json_schema()
signature = dspy.ChainOfThought(ProduceLabelForm)
prediction = signature(text=text, json_schema=json_schema, requirements=REQUIREMENTS)
with dspy.context(lm=self.lm, experimental=True):
predictor = dspy.TypedChainOfThought(ProduceLabelForm)
prediction = predictor(text=text, requirements=REQUIREMENTS)

return prediction
78 changes: 78 additions & 0 deletions test_data/labels/label_001/expected_output.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, test_data stuff should not be merged into main yet.

Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{
"company_name": "Nature's aid",
"company_address": null,
"company_website": "http://www.SOIL-AID.com",
"company_phone_number": null,
"manufacturer_name": "Diamond Fertilizers Inc.",
"manufacturer_address": "PO Box 5508 stn Main Hight River, AB CANADA T1V 1M6",
"manufacturer_website": null,
"manufacturer_phone_number": "1 (866) 337-2943",
"fertiliser_name": "Super Crop®",
"registration_number": "2018007A",
"lot_number": null,
"weight": [
{
"value": 10.2,
"unit": "kg"
},
{
"value": 22.5,
"unit": "lb"
}
],
"density": {
"value": null,
"unit": null
},
"volume": {
"value": 10,
"unit": "liter"
},
"npk": null,
"guaranteed_analysis_en": {
"title": "Guaranteed Analysis",
"nutrients": [
{
"nutrient": "Seaweed Extract (Ascophyllum nodosum)",
"value": 8.5,
"unit": "%"
},
{
"nutrient": "Humic acid",
"value": 0.6,
"unit": "%"
}
]
},
"guaranteed_analysis_fr": {
"title": "Analyse Garantie",
"nutrients": [
{
"nutrient": "extraits d'algues (ascophylle noueuse)",
"value": 8.5,
"unit": "%"
},
{
"nutrient": "acide humique",
"value": 0.6,
"unit": "%"
}
]
},
"ingredients_en": null,
"ingredients_fr": null,
"cautions_en": null,
"cautions_fr": null,
"instructions_en": [
"Apply to soil by sprayer or inject into irrigation. May be tank mixed with most all AG-Chemicals. Tank mixing will accelerate the activity and uptake of most AG-Chemicals and micro-nutrients (a jar test is recommended to verify compatibility).",
"NORMAL RATE: 310 ml per ha (125 mL/ac) Case treats approx. 120 ac. Apply with 20 to 100 Liters water per hectare (2-10 gal/ac) Apply 1-3 times to FOLIAGE of all crops (grains, oilseeds, legumes, etc.) during growth stage of plant (2-leaf to flower).",
"HIGH RATE: 410 ml per ha (165 mL/ac) Case treats approx. 160 ac. Apply with 20 to 100 Liters water per hectare (2-10 gal/ac) Apply 1-3 times to FOLIAGE of all crops (grains, oilseeds, legumes, etc.) during growth stage of plant (2-leaf to flower).",
"NOTE: Applying too late in growth stages (filling and maturity) could trigger plant to ripen prematurely."
],
"instructions_fr": [
"Épandez sur le sol par vaporisation ou injectez au système d'irrigation. Peut être mélangé en réservoir avec la plupart des produits agrochimiques. Le mélange en réservoir favorisera l'activité et l'absorption de la plupart des produits agrochimiques et des oligo-éléments (un essai de floculation est recommandé pour s'assurer de la compatibilité du produit).",
"DOSE NORMALE : 310 ml par ha (125 ml/ac) Avec une caisse, on traite environ 80 acres. Épandez le produit mélangé à 20 à 100 litres d'eau par hectare (2 à 10 gal/acre) Répandez sur le FEUILLAGE de toutes vos cultures (céréales, oléagineux, légumineuses, etc.), de une à trois fois, durant la période de croissance de la plante (de l'apparition des 2 feuilles jusqu'à la floraison).",
"DOSE ÉLEVÉE : 410 ml par ha (165 ml/acre) Avec une caisse, on traite environ 40 acres. Épandez le produit mélangé à 20 à 100 litres d'eau par hectare (2 à 10 gal/acre) Répandez sur le FEUILLAGE de toutes vos cultures (céréales, oléagineux, légumineuses, etc.), de une à trois fois, durant la période de croissance de la plante (de l'apparition des 2 feuilles jusqu'à la floraison).",
"NOTE : une application trop tardive lors des étapes de croissance (remplissage et maturation) pourrait déclencher un murissement prématuré de la plante."
]
}
Binary file added test_data/labels/label_001/img_001.png
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idem - should not be merged into main yet.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 2 additions & 3 deletions tests/test_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,8 @@ def check_json(self, extracted_info):

def test_generate_form_gpt(self):
prediction = self.gpt.create_inspection(self.prompt)
result_json = json.loads(prediction.inspection)
# print(json.dumps(result_json, indent=2))
self.check_json(result_json)
self.assertIsNotNone(prediction)
# self.check_json(prediction.inspection)

if __name__ == '__main__':
unittest.main()
5 changes: 4 additions & 1 deletion tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ def test_analyze(self):
# Perform assertions
self.assertIsInstance(inspection, FertilizerInspection, inspection)
self.assertIn(Value(value='25', unit='kg'), inspection.weight, inspection)
self.assertGreater(levenshtein_similarity(inspection.manufacturer_name, "TerraLink"), 0.95, inspection)
try:
self.assertGreater(levenshtein_similarity(inspection.company_name, "TerraLink"), 0.95, inspection)
except AssertionError:
self.assertGreater(levenshtein_similarity(inspection.manufacturer_name, "TerraLink"), 0.95, inspection)
self.assertGreater(levenshtein_similarity(inspection.npk, "10-52-0"), 0.90, inspection)

# Ensure logs are created and then deleted
Expand Down
Loading