From 0da17dcf4a6a4660ff4fafd4f5c50bd4fc7d62e0 Mon Sep 17 00:00:00 2001
From: joocer <justin.joyce@joocer.com>
Date: Tue, 5 Sep 2023 22:26:11 +0100
Subject: [PATCH] 1.6.0

---
 README.md                                     | 13 +--
 data_expectations/__init__.py                 | 22 ++++-
 data_expectations/internals/__init__.py       |  1 -
 data_expectations/internals/evaluate.py       |  7 +-
 data_expectations/internals/expectations.py   | 36 +--------
 data_expectations/internals/models.py         | 81 ++++---------------
 data_expectations/version.py                  |  2 +-
 tests/test_documentation.py                   | 23 +++++-
 .../test_expect_column_names_to_match_set.py  | 31 -------
 tests/test_expectation_datamodel.py           | 53 ++++--------
 tests/test_load_expectations.py               | 20 ++---
 11 files changed, 94 insertions(+), 195 deletions(-)
 delete mode 100644 tests/test_expect_column_names_to_match_set.py

diff --git a/README.md b/README.md
index 166aab5..c7ac68f 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,6 @@ Expectations can be used alongside, or in place of a schema validator, however E
 ## Provided Expectations
 
 - **expect_column_to_exist** (column)
-- **expect_column_names_to_match_set** (columns, ignore_excess:true)
 - **expect_column_values_to_not_be_null** (column)
 - **expect_column_values_to_be_of_type** (column, expected_type, ignore_nulls:true)
 - **expect_column_values_to_be_in_type_list** (column, type_list, ignore_nulls:true)
@@ -50,18 +49,20 @@ Data Expectations has no external dependencies, can be used ad hoc and in-the-mo
 
 ~~~python
 import data_expectations as de
+from data_expectations import Expectation
+from data_expectations import Behaviors
 
-TEST_DATA = {"name":"charles","age":12}
+TEST_DATA = {"name": "charles", "age": 12}
 
 set_of_expectations = [
-    {"expectation": "expect_column_to_exist", "column": "name"},
-    {"expectation": "expect_column_to_exist", "column": "age"},
-    {"expectation": "expect_column_values_to_be_between", "column": "age", "minimum": 0, "maximum": 120},
+    Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="name"),
+    Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="age"),
+    Expectation(Behaviors.EXPECT_COLUMN_VALUES_TO_BE_BETWEEN, column="age", config={"minimum": 0, "maximum": 120}),
 ]
 
 expectations = de.Expectations(set_of_expectations)
 try:
     de.evaluate_record(expectations, TEST_DATA)
-except de.errors.ExpectationNotMetError:
+except de.errors.ExpectationNotMetError:  # pragma: no cover
     print("Data Didn't Meet Expectations")
 ~~~
diff --git a/data_expectations/__init__.py b/data_expectations/__init__.py
index 5bedc7f..a7c9445 100644
--- a/data_expectations/__init__.py
+++ b/data_expectations/__init__.py
@@ -1,5 +1,25 @@
+from enum import Enum
+
+
+class Behaviors(str, Enum):
+    EXPECT_COLUMN_TO_EXIST = "expect_column_to_exist"
+    EXPECT_COLUMN_VALUES_TO_NOT_BE_NULL = "expect_column_values_to_not_be_null"
+    EXPECT_COLUMN_VALUES_TO_BE_OF_TYPE = "expect_column_values_to_be_of_type"
+    EXPECT_COLUMN_VALUES_TO_BE_IN_TYPE_LIST = "expect_column_values_to_be_in_type_list"
+    EXPECT_COLUMN_VALUES_TO_BE_MORE_THAN = "expect_column_values_to_be_more_than"
+    EXPECT_COLUMN_VALUES_TO_BE_LESS_THAN = "expect_column_values_to_be_less_than"
+    EXPECT_COLUMN_VALUES_TO_BE_BETWEEN = "expect_column_values_to_be_between"
+    EXPECT_COLUMN_VALUES_TO_BE_INCREASING = "expect_column_values_to_be_increasing"
+    EXPECT_COLUMN_VALUES_TO_BE_DECREASING = "expect_column_values_to_be_decreasing"
+    EXPECT_COLUMN_VALUES_TO_BE_IN_SET = "expect_column_values_to_be_in_set"
+    EXPECT_COLUMN_VALUES_TO_MATCH_REGEX = "expect_column_values_to_match_regex"
+    EXPECT_COLUMN_VALUES_TO_MATCH_LIKE = "expect_column_values_to_match_like"
+    EXPECT_COLUMN_VALUES_LENGTH_TO_BE_BE = "expect_column_values_length_to_be_be"
+    EXPECT_COLUMN_VALUES_LENGTH_TO_BE_BETWEEN = "expect_column_values_length_to_be_between"
+
+
 from data_expectations.internals.expectations import Expectations
-from data_expectations.internals.models import Expectation, ColumnExpectation
+from data_expectations.internals.models import Expectation
 
 from data_expectations.internals.evaluate import evaluate_list
 from data_expectations.internals.evaluate import evaluate_record
diff --git a/data_expectations/internals/__init__.py b/data_expectations/internals/__init__.py
index 8b13789..e69de29 100644
--- a/data_expectations/internals/__init__.py
+++ b/data_expectations/internals/__init__.py
@@ -1 +0,0 @@
-
diff --git a/data_expectations/internals/evaluate.py b/data_expectations/internals/evaluate.py
index 1d5f484..20bdf5a 100644
--- a/data_expectations/internals/evaluate.py
+++ b/data_expectations/internals/evaluate.py
@@ -12,7 +12,6 @@
 
 import typing
 
-from data_expectations import ColumnExpectation
 from data_expectations import Expectations
 from data_expectations.errors import ExpectationNotMetError
 from data_expectations.errors import ExpectationNotUnderstoodError
@@ -40,11 +39,7 @@ def evaluate_record(expectations: Expectations, record: dict, suppress_errors: b
         if expectation not in ALL_EXPECTATIONS:
             raise ExpectationNotUnderstoodError(expectation=expectation)
 
-        base_config = {"row": record, **expectation_definition.config}
-
-        # Conditionally include the 'column' parameter
-        if isinstance(expectation_definition, ColumnExpectation):
-            base_config["column"] = expectation_definition.column
+        base_config = {"row": record, "column": expectation_definition.column, **expectation_definition.config}
 
         if not ALL_EXPECTATIONS[expectation](**base_config):
             if not suppress_errors:
diff --git a/data_expectations/internals/expectations.py b/data_expectations/internals/expectations.py
index 7845d8f..f1f85bc 100644
--- a/data_expectations/internals/expectations.py
+++ b/data_expectations/internals/expectations.py
@@ -41,7 +41,6 @@
 from typing import List
 from typing import Union
 
-from data_expectations.internals.models import ColumnExpectation
 from data_expectations.internals.models import Expectation
 from data_expectations.internals.text import sql_like_to_regex
 
@@ -70,12 +69,9 @@ def __init__(self, set_of_expectations: Iterable[Union[str, dict, Expectation]])
             if isinstance(exp, str):  # Parse JSON string
                 exp = json.loads(exp)
 
-            if isinstance(exp, dict):  # Convert dict to Expectation or ColumnExpectation
-                if "column" in exp:
-                    self.set_of_expectations.append(ColumnExpectation.load(exp))
-                else:
-                    self.set_of_expectations.append(Expectation.load(exp))
-            elif is_dataclass(exp) and (isinstance(exp, Expectation) or isinstance(exp, ColumnExpectation)):
+            if isinstance(exp, dict):  # Convert dict to Expectation
+                self.set_of_expectations.append(Expectation.load(exp))
+            elif is_dataclass(exp) and isinstance(exp, Expectation):
                 self.set_of_expectations.append(exp)
 
     @classmethod
@@ -100,32 +96,6 @@ def reset():
     # COLUMN EXPECTATIONS
     ###################################################################################
 
-    @staticmethod
-    def expect_column_names_to_match_set(
-        *,
-        row: dict,
-        columns: list,
-        ignore_excess: bool = True,
-        **kwargs,
-    ):
-        """
-        Confirms that the columns in a record match the given set.
-
-        Parameters:
-            row: dict
-                The record to be checked.
-            columns: list
-                List of expected column names.
-            ignore_excess: bool
-                If True, ignores columns not in the list. If False, ensures columns match the list exactly.
-
-        Returns: bool
-            True if expectation is met, False otherwise.
-        """
-        if ignore_excess:
-            return all(key in columns for key in row.keys())
-        return sorted(columns) == sorted(list(row.keys()))
-
     @staticmethod
     def expect_column_to_exist(
         *,
diff --git a/data_expectations/internals/models.py b/data_expectations/internals/models.py
index ad7f70a..8c59431 100644
--- a/data_expectations/internals/models.py
+++ b/data_expectations/internals/models.py
@@ -18,6 +18,8 @@
 from typing import Type
 from typing import Union
 
+from data_expectations import Behaviors
+
 
 @dataclass
 class Expectation:
@@ -25,94 +27,45 @@ class Expectation:
     Represents a general Data Expectation.
     """
 
-    expectation: str
+    expectation: Behaviors
+    column: str
     config: Dict[str, Any] = field(default_factory=dict)
+    ignore_nulls: bool = True
 
-    def to_dict(self) -> Dict[str, Any]:
+    def dump(self) -> Dict[str, Any]:
         """
         Converts the Expectation instance to a dictionary representation.
 
         Returns:
             A dictionary containing the expectation and its configuration.
         """
-        return {"expectation": self.expectation, **self.config}
+        return {
+            "expectation": self.expectation,
+            "column": self.column,
+            "ignore_nulls": self.ignore_nulls,
+            **self.config,
+        }
 
     @classmethod
-    def load_base(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> Dict[str, Any]:
+    def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
         """
-        Loads a serialized Expectation and returns it as a dictionary.
+        Loads a serialized Expectation and returns it as an instance.
 
         Parameters:
             serialized: Serialized Expectation as a dictionary or JSON string.
 
         Returns:
-            A dictionary representation of the serialized Expectation.
+            An Expectation instance populated with the serialized data.
         """
         if isinstance(serialized, str):
             serialized = dict(json.loads(serialized))
         serialized_copy: dict = deepcopy(serialized)
         if "expectation" not in serialized_copy:
             raise ValueError("Missing 'expectation' key in Expectation.")
-        return serialized_copy
-
-    @classmethod
-    def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
-        """
-        Loads a serialized Expectation and returns it as an instance.
-
-        Parameters:
-            serialized: Serialized Expectation as a dictionary or JSON string.
-
-        Returns:
-            An Expectation instance populated with the serialized data.
-        """
-        serialized_copy = cls.load_base(serialized)
-        expectation = serialized_copy.pop("expectation")
-        config = serialized_copy
-        return cls(expectation=expectation, config=config)
-
-
-class ColumnExpectation(Expectation):
-    """
-    Represents a Data Expectation related to a specific column.
-    """
-
-    def __init__(self, expectation: str, column: str, config: Dict[str, Any] = None):
-        """
-        Initializes a ColumnExpectation instance.
-
-        Parameters:
-            expectation: The expectation type as a string.
-            column: The column the expectation applies to.
-            config: Additional configuration as a dictionary.
-        """
-        super().__init__(expectation, config or {})
-        self.column = column
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Converts the ColumnExpectation instance to a dictionary representation.
-
-        Returns:
-            A dictionary containing the expectation, column, and its configuration.
-        """
-        return {"expectation": self.expectation, "column": self.column, **self.config}
-
-    @classmethod
-    def load(cls: Type["ColumnExpectation"], serialized: Union[Dict[str, Any], str]) -> "ColumnExpectation":
-        """
-        Loads a serialized ColumnExpectation and returns it as an instance.
-
-        Parameters:
-            serialized: Serialized ColumnExpectation as a dictionary or JSON string.
-
-        Returns:
-            A ColumnExpectation instance populated with the serialized data.
-        """
-        serialized_copy = cls.load_base(serialized)
         if "column" not in serialized_copy:
             raise ValueError("Missing 'column' key in Expectation.")
         expectation = serialized_copy.pop("expectation")
         column = serialized_copy.pop("column")
+        ignore_nulls = serialized_copy.pop("ignore_nulls", True)
         config = serialized_copy
-        return cls(expectation=expectation, column=column, config=config)
+        return cls(expectation=expectation, column=column, ignore_nulls=ignore_nulls, config=config)
diff --git a/data_expectations/version.py b/data_expectations/version.py
index 431daee..5be518b 100644
--- a/data_expectations/version.py
+++ b/data_expectations/version.py
@@ -13,6 +13,6 @@
 # Store the version here so:
 # 1) we don't load dependencies by storing it in __init__.py
 # 2) we can import it in setup.py for the same reason
-__version__ = "1.5.0"
+__version__ = "1.6.0"
 
 # nodoc - don't add to the documentation wiki
diff --git a/tests/test_documentation.py b/tests/test_documentation.py
index 5493ff2..c52650f 100644
--- a/tests/test_documentation.py
+++ b/tests/test_documentation.py
@@ -7,7 +7,7 @@
 sys.path.insert(1, os.path.join(sys.path[0], ".."))
 
 
-def test_example():
+def test_example_legacy():
     import data_expectations as de
 
     TEST_DATA = {"name": "charles", "age": 12}
@@ -30,6 +30,27 @@ def test_example():
         print("Data Didn't Meet Expectations")
 
 
+def test_example():
+    import data_expectations as de
+    from data_expectations import Expectation
+    from data_expectations import Behaviors
+
+    TEST_DATA = {"name": "charles", "age": 12}
+
+    set_of_expectations = [
+        Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="name"),
+        Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="age"),
+        Expectation(Behaviors.EXPECT_COLUMN_VALUES_TO_BE_BETWEEN, column="age", config={"minimum": 0, "maximum": 120}),
+    ]
+
+    expectations = de.Expectations(set_of_expectations)
+    try:
+        de.evaluate_record(expectations, TEST_DATA)
+    except de.errors.ExpectationNotMetError:  # pragma: no cover
+        print("Data Didn't Meet Expectations")
+
+
 if __name__ == "__main__":  # pragma: no cover
     test_example()
+    test_example_legacy()
     print("✅ okay")
diff --git a/tests/test_expect_column_names_to_match_set.py b/tests/test_expect_column_names_to_match_set.py
deleted file mode 100644
index f6ad899..0000000
--- a/tests/test_expect_column_names_to_match_set.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-import sys
-
-sys.path.insert(1, os.path.join(sys.path[0], ".."))
-
-import data_expectations as de
-
-
-def test_expect_column_names_to_match_set():
-    test_func = de.Expectations.expect_column_names_to_match_set
-
-    assert test_func(row={"number": 7, "string": "d"}, columns=("number", "string"))
-    assert test_func(
-        row={"number": 7, "string": "d"},
-        columns=("number", "string"),
-        ignore_excess=True,
-    )
-    assert test_func(
-        row={"number": 7, "string": "d"},
-        columns=("number", "string"),
-        ignore_excess=False,
-    )
-
-    assert test_func(row={"number": 7}, columns=("number", "string"))
-    assert test_func(row={"number": 7}, columns=("number", "string"), ignore_excess=True)
-    assert not test_func(row={"number": 7}, columns=("number", "string"), ignore_excess=False)
-
-
-if __name__ == "__main__":  # pragma: no cover
-    test_expect_column_names_to_match_set()
-    print("✅ okay")
diff --git a/tests/test_expectation_datamodel.py b/tests/test_expectation_datamodel.py
index ea4576d..6701c15 100644
--- a/tests/test_expectation_datamodel.py
+++ b/tests/test_expectation_datamodel.py
@@ -6,67 +6,46 @@
 
 import json
 import pytest
-from data_expectations import Expectation, ColumnExpectation
-
-
-def test_expectation_to_dict():
-    exp = Expectation("test_expectation", {"some_key": "some_value"})
-    assert exp.to_dict() == {"expectation": "test_expectation", "some_key": "some_value"}
-
-
-def test_expectation_load_from_dict():
-    serialized = {"expectation": "test_expectation", "some_key": "some_value"}
-    exp = Expectation.load(serialized)
-    assert exp.expectation == "test_expectation"
-    assert exp.config == {"some_key": "some_value"}
-
-
-def test_expectation_load_from_json_str():
-    serialized = json.dumps({"expectation": "test_expectation", "some_key": "some_value"})
-    exp = Expectation.load(serialized)
-    assert exp.expectation == "test_expectation"
-    assert exp.config == {"some_key": "some_value"}
-
-
-def test_expectation_load_missing_key():
-    serialized = {"some_key": "some_value"}
-    with pytest.raises(ValueError):
-        Expectation.load(serialized)
+from data_expectations import Expectation
 
 
 def test_column_expectation_to_dict():
-    exp = ColumnExpectation("test_expectation", "test_column", {"some_key": "some_value"})
-    assert exp.to_dict() == {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}
+    exp = Expectation("test_expectation", "test_column", {"some_key": "some_value"})
+    assert exp.dump() == {
+        "expectation": "test_expectation",
+        "column": "test_column",
+        "some_key": "some_value",
+        "ignore_nulls": True,
+    }
 
 
 def test_column_expectation_load_from_dict():
     serialized = {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}
-    exp = ColumnExpectation.load(serialized)
+    exp = Expectation.load(serialized)
     assert exp.expectation == "test_expectation"
     assert exp.column == "test_column"
     assert exp.config == {"some_key": "some_value"}
+    assert exp.ignore_nulls == True
 
 
 def test_column_expectation_load_from_json_str():
-    serialized = json.dumps({"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"})
-    exp = ColumnExpectation.load(serialized)
+    serialized = json.dumps(
+        {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value", "ignore_nulls": False}
+    )
+    exp = Expectation.load(serialized)
     assert exp.expectation == "test_expectation"
     assert exp.column == "test_column"
     assert exp.config == {"some_key": "some_value"}
+    assert exp.ignore_nulls == False
 
 
 def test_column_expectation_load_missing_key():
     serialized = {"expectation": "test_expectation", "some_key": "some_value"}
     with pytest.raises(ValueError):
-        ColumnExpectation.load(serialized)
+        Expectation.load(serialized)
 
 
 if __name__ == "__main__":  # pragma: no cover
-    test_expectation_to_dict()
-    test_expectation_load_from_dict()
-    test_expectation_load_from_json_str()
-    test_expectation_load_missing_key()
-
     test_column_expectation_to_dict()
     test_column_expectation_load_from_dict()
     test_column_expectation_load_from_json_str()
diff --git a/tests/test_load_expectations.py b/tests/test_load_expectations.py
index a9e3d50..68d7329 100644
--- a/tests/test_load_expectations.py
+++ b/tests/test_load_expectations.py
@@ -5,35 +5,27 @@
 
 sys.path.insert(1, os.path.join(sys.path[0], ".."))
 
-from data_expectations import Expectations, Expectation, ColumnExpectation
+from data_expectations import Expectations, Expectation
 
 
 def test_expectations_initializer():
     # Create expectations as different types
-    exp1 = Expectation("expect_test", {"key1": "value1"})
-    exp2_dict = {"expectation": "expect_test2", "key2": "value2"}
-    exp2 = Expectation.load(exp2_dict)
-    exp3_json = json.dumps({"expectation": "expect_test3", "key3": "value3"})
-    exp3 = Expectation.load(json.loads(exp3_json))
 
-    col_exp1 = ColumnExpectation("expect_test_col", "col1", {"key1": "value1"})
+    col_exp1 = Expectation("expect_test_col", "col1", {"key1": "value1"})
     col_exp2_dict = {"expectation": "expect_test_col2", "column": "col2", "key2": "value2"}
-    col_exp2 = ColumnExpectation.load(col_exp2_dict)
+    col_exp2 = Expectation.load(col_exp2_dict)
     col_exp3_json = json.dumps({"expectation": "expect_test_col3", "column": "col3", "key3": "value3"})
-    col_exp3 = ColumnExpectation.load(json.loads(col_exp3_json))
+    col_exp3 = Expectation.load(json.loads(col_exp3_json))
 
     # Initialize Expectations class
-    expectations = Expectations([exp1, exp2_dict, exp3_json, col_exp1, col_exp2_dict, col_exp3_json])
+    expectations = Expectations([col_exp1, col_exp2_dict, col_exp3_json])
 
     # Validate
-    assert len(expectations.set_of_expectations) == 6
+    assert len(expectations.set_of_expectations) == 3
 
     assert isinstance(expectations.set_of_expectations[0], Expectation)
     assert isinstance(expectations.set_of_expectations[1], Expectation)
     assert isinstance(expectations.set_of_expectations[2], Expectation)
-    assert isinstance(expectations.set_of_expectations[3], ColumnExpectation)
-    assert isinstance(expectations.set_of_expectations[4], ColumnExpectation)
-    assert isinstance(expectations.set_of_expectations[5], ColumnExpectation)
 
 
 if __name__ == "__main__":  # pragma: no cover