use type instead of isinstance

ai-cfia · Sep 10, 2024 · a23ca31 · a23ca31
1 parent bed5896
commit a23ca31
Show file tree

Hide file tree

Showing 10 changed files with 739 additions and 35 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/pipeline/inspection.py b/pipeline/inspection.py
@@ -1,60 +1,58 @@
 import re
 from typing import List, Optional
+
 from pydantic import BaseModel, Field, field_validator, model_validator
 
+
 class npkError(ValueError):
     pass
 
+
 def extract_first_number(string: str) -> Optional[str]:
     if string is not None:
-        match = re.search(r'\d+(\.\d+)?', string)
+        match = re.search(r"\d+(\.\d+)?", string)
         if match:
             return match.group()
     return None
 
+
 class NutrientValue(BaseModel):
     nutrient: str
     value: Optional[float] = None
     unit: Optional[str] = None
 
-    @field_validator('value', mode='before', check_fields=False)
+    @field_validator("value", mode="before", check_fields=False)
     def convert_value(cls, v):
-        if isinstance(v, bool):
-            return None
-        elif isinstance(v, (int, float)):
-            return str(v)
-        elif isinstance(v, (str)):
+        if type(v) in (int, float):
+            return float(v)
+        if type(v) is str:
             return extract_first_number(v)
-        return None
-    
+
+
 class Value(BaseModel):
     value: Optional[float]
     unit: Optional[str]
 
-    @field_validator('value', mode='before', check_fields=False)
+    @field_validator("value", mode="before", check_fields=False)
     def convert_value(cls, v):
-        if isinstance(v, bool):
-            return None
-        elif isinstance(v, (int, float)):
-            return str(v)
-        elif isinstance(v, (str)):
+        if type(v) in (int, float):
+            return float(v)
+        if type(v) is str:
             return extract_first_number(v)
-        return None
+
 
 class Specification(BaseModel):
-    humidity: Optional[float] = Field(..., alias='humidity')
-    ph: Optional[float] = Field(..., alias='ph')
+    humidity: Optional[float] = Field(..., alias="humidity")
+    ph: Optional[float] = Field(..., alias="ph")
     solubility: Optional[float]
 
-    @field_validator('humidity', 'ph', 'solubility', mode='before', check_fields=False)
-    def convert_specification_values(cls, v):
-        if isinstance(v, bool):
-            return None
-        elif isinstance(v, (int, float)):
-            return str(v)
-        elif isinstance(v, (str)):
+    @field_validator("humidity", "ph", "solubility", mode="before", check_fields=False)
+    def convert_value(cls, v):
+        if type(v) in (int, float):
+            return float(v)
+        if type(v) is str:
             return extract_first_number(v)
-        return None
+
 
 class FertilizerInspection(BaseModel):
     company_name: Optional[str] = None
@@ -86,23 +84,31 @@ class FertilizerInspection(BaseModel):
     ingredients_fr: List[NutrientValue] = []
     specifications_fr: List[Specification] = []
     first_aid_fr: List[str] = None
-    
-    @field_validator('npk', mode='before')
+
+    @field_validator("npk", mode="before")
     def validate_npk(cls, v):
         if v is not None:
-            pattern = re.compile(r'^\d+(\.\d+)?-\d+(\.\d+)?-\d+(\.\d+)?$')
+            pattern = re.compile(r"^\d+(\.\d+)?-\d+(\.\d+)?-\d+(\.\d+)?$")
             if not pattern.match(v):
                 return None
         return v
 
-    @model_validator(mode='before')
+    @model_validator(mode="before")
     def replace_none_with_empty_list(cls, values):
         fields_to_check = [
-            'cautions_en', 'first_aid_en', 'cautions_fr', 'first_aid_fr',
-            'instructions_en', 'micronutrients_en', 'ingredients_en',
-            'specifications_en', 'instructions_fr',
-            'micronutrients_fr', 'ingredients_fr',
-            'specifications_fr', 'guaranteed_analysis'
+            "cautions_en",
+            "first_aid_en",
+            "cautions_fr",
+            "first_aid_fr",
+            "instructions_en",
+            "micronutrients_en",
+            "ingredients_en",
+            "specifications_en",
+            "instructions_fr",
+            "micronutrients_fr",
+            "ingredients_fr",
+            "specifications_fr",
+            "guaranteed_analysis",
         ]
         for field in fields_to_check:
             if values.get(field) is None:

diff --git a/script.py b/script.py
@@ -0,0 +1,7 @@
+import json
+
+from pydantic import BaseModel
+from pipeline.inspection import FertilizerInspection
+
+
+print(json.dumps(BaseModel.model_dump(FertilizerInspection)))
diff --git a/script2.py b/script2.py
@@ -0,0 +1,39 @@
+import os
+from pprint import pprint
+from tests import curl_file
+from dotenv import load_dotenv
+# from pipeline.inspection import FertilizerInspection
+from pipeline import LabelStorage, OCR, GPT, analyze
+
+# Load environment variables
+load_dotenv()
+
+# Set up the required objects
+log_dir_path = 'test_logs'
+image_path = 'granulaine.png'  # Path to your test image
+
+# Ensure the log directory exists
+if not os.path.exists(log_dir_path):
+    os.mkdir(log_dir_path)
+
+# Download the test image
+# curl_file(url='https://tlhort.com/cdn/shop/products/10-52-0MAP.jpg', path=image_path)
+
+# Mock environment setup for OCR and GPT
+api_endpoint_ocr = os.getenv('AZURE_API_ENDPOINT')
+api_key_ocr = os.getenv('AZURE_API_KEY')
+api_endpoint_gpt = os.getenv('AZURE_OPENAI_ENDPOINT')
+api_key_gpt = os.getenv('AZURE_OPENAI_KEY')
+api_deployment_gpt = os.getenv('AZURE_OPENAI_DEPLOYMENT')
+
+# Initialize the objects
+label_storage = LabelStorage()
+label_storage.add_image(image_path)
+ocr = OCR(api_endpoint=api_endpoint_ocr, api_key=api_key_ocr)
+gpt = GPT(api_endpoint=api_endpoint_gpt, api_key=api_key_gpt, deployment_id=api_deployment_gpt)
+
+# Run the analyze function
+form = analyze(label_storage, ocr, gpt, log_dir_path=log_dir_path)
+
+# Pretty print the form
+print(form.model_dump_json())
diff --git a/script3.py b/script3.py
@@ -0,0 +1,68 @@
+import re
+from pydantic import BaseModel, ValidationError, field_validator
+from typing import Optional
+
+# Helper function to extract first number
+def extract_first_number(string: str) -> Optional[float]:
+    if string is not None:
+        match = re.search(r"\d+(\.\d+)?", string)
+        if match:
+            return float(match.group())
+    return None
+
+# NutrientValue model with updated field type
+class NutrientValue(BaseModel):
+    nutrient: str
+    value: float | None = None
+    unit: Optional[str] = None
+
+    @field_validator("value", mode="before")
+    def convert_value(cls, v):
+        if type(v) in (int, float):
+            return float(v)
+        if type(v) is str:
+            return extract_first_number(v)
+
+
+# Test cases
+def test_nutrient_value():
+    # Case 1: Integer value
+    nv1 = NutrientValue(nutrient="Protein", value=15, unit="g")
+    print(nv1.model_dump_json())
+    assert nv1.value == 15, f"Expected 15, but got {nv1.value} (type: {type(nv1.value)})"
+
+    # Case 2: Float value
+    nv2 = NutrientValue(nutrient="Fat", value=10.5, unit="g")
+    assert nv2.value == 10.5, f"Expected 10.5, but got {nv2.value} (type: {type(nv2.value)})"
+
+    # Case 3: String containing a number
+    nv3 = NutrientValue(nutrient="Carbohydrates", value="20g", unit="g")
+    assert nv3.value == 20.0, f"Expected 20.0, but got {nv3.value} (type: {type(nv3.value)})"
+
+    # Case 4: String without a number
+    nv4 = NutrientValue(nutrient="Fiber", value="N/A", unit="g")
+    assert nv4.value is None, f"Expected None, but got {nv4.value} (type: {type(nv4.value)})"
+
+    # Case 5: None value
+    nv5 = NutrientValue(nutrient="Sugar", value=None, unit="g")
+    assert nv5.value is None, f"Expected None, but got {nv5.value} (type: {type(nv5.value)})"
+
+    # Case 6: Boolean value (should result in ValidationError as it's unsupported input)
+    try:
+        NutrientValue(nutrient="Vitamins", value=True, unit="mg")
+    except ValidationError as e:
+        print(f"Expected ValidationError, got: {e}")
+
+    # Case 7: Boolean value (should result in ValidationError as it's unsupported input)
+    try:
+        nv6 = NutrientValue(nutrient="Vitamins", value=False, unit="mg")
+        print(nv6)
+    except ValidationError as e:
+        print(f"Expected ValidationError for boolean input, got: {e}")
+
+if __name__ == "__main__":
+    try:
+        test_nutrient_value()
+        print("All tests passed!")
+    except AssertionError as e:
+        print(f"Assertion failed: {e}")