Skip to content

Commit

Permalink
use type instead of isinstance
Browse files Browse the repository at this point in the history
  • Loading branch information
k-allagbe committed Sep 10, 2024
1 parent bed5896 commit a23ca31
Show file tree
Hide file tree
Showing 10 changed files with 739 additions and 35 deletions.
Binary file added .DS_Store
Binary file not shown.
76 changes: 41 additions & 35 deletions pipeline/inspection.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,58 @@
import re
from typing import List, Optional

from pydantic import BaseModel, Field, field_validator, model_validator


class npkError(ValueError):
pass


def extract_first_number(string: str) -> Optional[str]:
if string is not None:
match = re.search(r'\d+(\.\d+)?', string)
match = re.search(r"\d+(\.\d+)?", string)
if match:
return match.group()
return None


class NutrientValue(BaseModel):
nutrient: str
value: Optional[float] = None
unit: Optional[str] = None

@field_validator('value', mode='before', check_fields=False)
@field_validator("value", mode="before", check_fields=False)
def convert_value(cls, v):
if isinstance(v, bool):
return None
elif isinstance(v, (int, float)):
return str(v)
elif isinstance(v, (str)):
if type(v) in (int, float):
return float(v)
if type(v) is str:
return extract_first_number(v)
return None


class Value(BaseModel):
value: Optional[float]
unit: Optional[str]

@field_validator('value', mode='before', check_fields=False)
@field_validator("value", mode="before", check_fields=False)
def convert_value(cls, v):
if isinstance(v, bool):
return None
elif isinstance(v, (int, float)):
return str(v)
elif isinstance(v, (str)):
if type(v) in (int, float):
return float(v)
if type(v) is str:
return extract_first_number(v)
return None


class Specification(BaseModel):
humidity: Optional[float] = Field(..., alias='humidity')
ph: Optional[float] = Field(..., alias='ph')
humidity: Optional[float] = Field(..., alias="humidity")
ph: Optional[float] = Field(..., alias="ph")
solubility: Optional[float]

@field_validator('humidity', 'ph', 'solubility', mode='before', check_fields=False)
def convert_specification_values(cls, v):
if isinstance(v, bool):
return None
elif isinstance(v, (int, float)):
return str(v)
elif isinstance(v, (str)):
@field_validator("humidity", "ph", "solubility", mode="before", check_fields=False)
def convert_value(cls, v):
if type(v) in (int, float):
return float(v)
if type(v) is str:
return extract_first_number(v)
return None


class FertilizerInspection(BaseModel):
company_name: Optional[str] = None
Expand Down Expand Up @@ -86,23 +84,31 @@ class FertilizerInspection(BaseModel):
ingredients_fr: List[NutrientValue] = []
specifications_fr: List[Specification] = []
first_aid_fr: List[str] = None
@field_validator('npk', mode='before')

@field_validator("npk", mode="before")
def validate_npk(cls, v):
if v is not None:
pattern = re.compile(r'^\d+(\.\d+)?-\d+(\.\d+)?-\d+(\.\d+)?$')
pattern = re.compile(r"^\d+(\.\d+)?-\d+(\.\d+)?-\d+(\.\d+)?$")
if not pattern.match(v):
return None
return v

@model_validator(mode='before')
@model_validator(mode="before")
def replace_none_with_empty_list(cls, values):
fields_to_check = [
'cautions_en', 'first_aid_en', 'cautions_fr', 'first_aid_fr',
'instructions_en', 'micronutrients_en', 'ingredients_en',
'specifications_en', 'instructions_fr',
'micronutrients_fr', 'ingredients_fr',
'specifications_fr', 'guaranteed_analysis'
"cautions_en",
"first_aid_en",
"cautions_fr",
"first_aid_fr",
"instructions_en",
"micronutrients_en",
"ingredients_en",
"specifications_en",
"instructions_fr",
"micronutrients_fr",
"ingredients_fr",
"specifications_fr",
"guaranteed_analysis",
]
for field in fields_to_check:
if values.get(field) is None:
Expand Down
7 changes: 7 additions & 0 deletions script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import json

from pydantic import BaseModel
from pipeline.inspection import FertilizerInspection


print(json.dumps(BaseModel.model_dump(FertilizerInspection)))
39 changes: 39 additions & 0 deletions script2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
from pprint import pprint
from tests import curl_file
from dotenv import load_dotenv
# from pipeline.inspection import FertilizerInspection
from pipeline import LabelStorage, OCR, GPT, analyze

# Load environment variables
load_dotenv()

# Set up the required objects
log_dir_path = 'test_logs'
image_path = 'granulaine.png' # Path to your test image

# Ensure the log directory exists
if not os.path.exists(log_dir_path):
os.mkdir(log_dir_path)

# Download the test image
# curl_file(url='https://tlhort.com/cdn/shop/products/10-52-0MAP.jpg', path=image_path)

# Mock environment setup for OCR and GPT
api_endpoint_ocr = os.getenv('AZURE_API_ENDPOINT')
api_key_ocr = os.getenv('AZURE_API_KEY')
api_endpoint_gpt = os.getenv('AZURE_OPENAI_ENDPOINT')
api_key_gpt = os.getenv('AZURE_OPENAI_KEY')
api_deployment_gpt = os.getenv('AZURE_OPENAI_DEPLOYMENT')

# Initialize the objects
label_storage = LabelStorage()
label_storage.add_image(image_path)
ocr = OCR(api_endpoint=api_endpoint_ocr, api_key=api_key_ocr)
gpt = GPT(api_endpoint=api_endpoint_gpt, api_key=api_key_gpt, deployment_id=api_deployment_gpt)

# Run the analyze function
form = analyze(label_storage, ocr, gpt, log_dir_path=log_dir_path)

# Pretty print the form
print(form.model_dump_json())
68 changes: 68 additions & 0 deletions script3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import re
from pydantic import BaseModel, ValidationError, field_validator
from typing import Optional

# Helper function to extract first number
def extract_first_number(string: str) -> Optional[float]:
if string is not None:
match = re.search(r"\d+(\.\d+)?", string)
if match:
return float(match.group())
return None

# NutrientValue model with updated field type
class NutrientValue(BaseModel):
nutrient: str
value: float | None = None
unit: Optional[str] = None

@field_validator("value", mode="before")
def convert_value(cls, v):
if type(v) in (int, float):
return float(v)
if type(v) is str:
return extract_first_number(v)


# Test cases
def test_nutrient_value():
# Case 1: Integer value
nv1 = NutrientValue(nutrient="Protein", value=15, unit="g")
print(nv1.model_dump_json())
assert nv1.value == 15, f"Expected 15, but got {nv1.value} (type: {type(nv1.value)})"

# Case 2: Float value
nv2 = NutrientValue(nutrient="Fat", value=10.5, unit="g")
assert nv2.value == 10.5, f"Expected 10.5, but got {nv2.value} (type: {type(nv2.value)})"

# Case 3: String containing a number
nv3 = NutrientValue(nutrient="Carbohydrates", value="20g", unit="g")
assert nv3.value == 20.0, f"Expected 20.0, but got {nv3.value} (type: {type(nv3.value)})"

# Case 4: String without a number
nv4 = NutrientValue(nutrient="Fiber", value="N/A", unit="g")
assert nv4.value is None, f"Expected None, but got {nv4.value} (type: {type(nv4.value)})"

# Case 5: None value
nv5 = NutrientValue(nutrient="Sugar", value=None, unit="g")
assert nv5.value is None, f"Expected None, but got {nv5.value} (type: {type(nv5.value)})"

# Case 6: Boolean value (should result in ValidationError as it's unsupported input)
try:
NutrientValue(nutrient="Vitamins", value=True, unit="mg")
except ValidationError as e:
print(f"Expected ValidationError, got: {e}")

# Case 7: Boolean value (should result in ValidationError as it's unsupported input)
try:
nv6 = NutrientValue(nutrient="Vitamins", value=False, unit="mg")
print(nv6)
except ValidationError as e:
print(f"Expected ValidationError for boolean input, got: {e}")

if __name__ == "__main__":
try:
test_nutrient_value()
print("All tests passed!")
except AssertionError as e:
print(f"Assertion failed: {e}")
Loading

0 comments on commit a23ca31

Please sign in to comment.