Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #12 : Datetime logging fmt #13

Merged
merged 3 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def analyze(label_storage: LabelStorage, ocr: OCR, gpt: GPT, log_dir_path: str =
result = ocr.extract_text(document=document)

# Logs the results from document intelligence
now = datetime.now()
now = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
save_text_to_file(result.content, f"{log_dir_path}/{now}.md")

# Generate form from extracted text
Expand Down
24 changes: 12 additions & 12 deletions pipeline/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,23 @@ def convert_specification_values(cls, v):
return v

class FertiliserForm(BaseModel):
company_name: Optional[str] = ""
company_address: Optional[str] = ""
company_website: Optional[str] = ""
company_phone_number: Optional[str] = ""
manufacturer_name: Optional[str] = ""
manufacturer_address: Optional[str] = ""
manufacturer_website: Optional[str] = ""
manufacturer_phone_number: Optional[str] = ""
fertiliser_name: Optional[str] = ""
registration_number: Optional[str] = ""
lot_number: Optional[str] = ""
company_name: Optional[str] = None
company_address: Optional[str] = None
company_website: Optional[str] = None
company_phone_number: Optional[str] = None
manufacturer_name: Optional[str] = None
manufacturer_address: Optional[str] = None
manufacturer_website: Optional[str] = None
manufacturer_phone_number: Optional[str] = None
fertiliser_name: Optional[str] = None
registration_number: Optional[str] = None
lot_number: Optional[str] = None
weight: List[Value] = []
density: Optional[Value] = None
volume: Optional[Value] = None
npk: Optional[str] = Field(None)
guaranteed_analysis: List[NutrientValue] = []
warranty: Optional[str] = ""
warranty: Optional[str] = None
cautions_en: List[str] = None
instructions_en: List[str] = []
micronutrients_en: List[NutrientValue] = []
Expand Down
56 changes: 41 additions & 15 deletions pipeline/gpt.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,52 @@
import os
import dspy
from dspy import Prediction
from openai.types.chat.completion_create_params import ResponseFormat

# Constants
MODELS_WITH_RESPONSE_FORMAT = [
"ailab-llm",
"ailab-llm-gpt-4o"
] # List of models that support the response_format option

SPECIFICATION = """
Keys:
"company_name"
"company_address"
"company_website"
"company_phone_number"
"manufacturer_name"
"manufacturer_address"
"manufacturer_website"
"manufacturer_phone_number"
"fertiliser_name"
"registration_number" (a series of letters and numbers)
"lot_number"
"weight" (array of objects with "value", and "unit")
"density" (an object with "value", and "unit")
"volume" (an object with "value", and "unit")
"npk" (format: "number-number-number") **important
"guaranteed_analysis" (array of objects with "nutrient", "value", and "unit") **important
"warranty"
"cautions_en" (array of strings)
"instructions_en" (array of strings)
"micronutrients_en" (array of objects with "nutrient", "value", and "unit")
"ingredients_en" (array of objects with "nutrient", "value", and "unit")
"specifications_en" (array of objects with "humidity", "ph", and "solubility")
"first_aid_en" (array of strings)
"cautions_fr" (array of strings)
"instructions_fr" (array of strings)
"micronutrients_fr" (array of objects with "nutrient", "value", and "unit")
"ingredients_fr" (array of objects with "nutrient", "value", and "unit")
"specifications_fr" (array of objects with "humidity", "ph", and "solubility")
"first_aid_fr" (array of strings)

Requirements:
The content of keys with the suffix _en must be in English.
The content of keys with the suffix _fr must be in French.
Translation of the text is prohibited.
You are prohibited from generating any text that is not part of the JSON.
The JSON must contain exclusively keys specified in "keys".
"""

class ProduceLabelForm(dspy.Signature):
"""
You are a fertilizer label inspector working for the Canadian Food Inspection Agency.
Expand Down Expand Up @@ -48,20 +86,8 @@ def __init__(self, api_endpoint, api_key, deployment):
)

def generate_form(self, prompt) -> Prediction:
prompt_path = os.getenv("PROMPT_PATH")
if not prompt_path:
raise EnvironmentError("PROMPT_PATH environment variable is not set.")

try:
with open(prompt_path, 'r') as prompt_file:
system_prompt = prompt_file.read()
except FileNotFoundError:
raise FileNotFoundError(f"Prompt file not found at {prompt_path}")
except Exception as e:
raise IOError(f"An error occurred while reading the prompt file: {e}")

with dspy.context(lm=self.dspy_client, experimental=True):
signature = dspy.ChainOfThought(ProduceLabelForm)
prediction = signature(specification=system_prompt, text=prompt)
prediction = signature(specification=SPECIFICATION, text=prompt)

return prediction
37 changes: 0 additions & 37 deletions prompt.txt

This file was deleted.

Loading