Skip to content

Commit

Permalink
1.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Sep 5, 2023
1 parent 3d3ca92 commit b77f7da
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/regression_suite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
pip install -r $GITHUB_WORKSPACE/tests/requirements.txt
- name: Run Regression Tests
run: coverage run -m pytest
run: coverage run -m pytest --color=yes

- name: Check Coverage
run: coverage report --include=data_expectations/** --fail-under=95 -m
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/secrets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,5 @@ jobs:
name: Fides Secrets Scanner
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: fides-scanner
uses: joocer/fides@master
uses: joocer/fides@main
7 changes: 4 additions & 3 deletions data_expectations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .internals.expectations import Expectations
from data_expectations.internals.expectations import Expectations
from data_expectations.internals.models import Expectation, ColumnExpectation

from .internals.evaluate import evaluate_list
from .internals.evaluate import evaluate_record
from data_expectations.internals.evaluate import evaluate_list
from data_expectations.internals.evaluate import evaluate_record
10 changes: 6 additions & 4 deletions data_expectations/internals/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
ALL_EXPECTATIONS = Expectations.all_expectations()


def evaluate_record(expectations, record: dict, suppress_errors: bool = False):
def evaluate_record(expectations: Expectations, record: dict, suppress_errors: bool = False) -> bool:
"""
Test a single record against a defined set of expectations.
Expand All @@ -34,20 +34,22 @@ def evaluate_record(expectations, record: dict, suppress_errors: bool = False):
"""
for expectation_definition in expectations.set_of_expectations:
# get the name of the expectation
expectation = expectation_definition["expectation"]
expectation = expectation_definition.expectation

if expectation not in ALL_EXPECTATIONS:
raise ExpectationNotUnderstoodError(expectation=expectation)

if not ALL_EXPECTATIONS[expectation](row=record, **expectation_definition):
if not ALL_EXPECTATIONS[expectation](
row=record, column=expectation_definition.column, **expectation_definition.config
):
if not suppress_errors:
raise ExpectationNotMetError(expectation, record)
return False # data failed to meet expectation

return True


def evaluate_list(expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False):
def evaluate_list(expectations: Expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False) -> bool:
"""
Evaluate a set of records against a defined set of Expectations.
Expand Down
21 changes: 19 additions & 2 deletions data_expectations/internals/expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,18 @@
- if data doesn't match, I'm not cross, I'm just disappointed.
"""
import json
import re
from dataclasses import is_dataclass
from inspect import getmembers
from typing import Any
from typing import Dict
from typing import Iterable
from typing import List
from typing import Union

from data_expectations.internals.models import ColumnExpectation
from data_expectations.internals.models import Expectation
from data_expectations.internals.text import sql_like_to_regex

GLOBAL_TRACKER: Dict[str, Any] = {}
Expand All @@ -58,8 +64,19 @@ def wrapper(*args, **kwargs):


class Expectations:
def __init__(self, set_of_expectations: Iterable[dict]):
self.set_of_expectations = set_of_expectations
def __init__(self, set_of_expectations: Iterable[Union[str, dict, Expectation]]):
self.set_of_expectations: List[Expectation] = []
for exp in set_of_expectations:
if isinstance(exp, str): # Parse JSON string
exp = json.loads(exp)

if isinstance(exp, dict): # Convert dict to Expectation or ColumnExpectation
if "column" in exp:
self.set_of_expectations.append(ColumnExpectation.load(exp))
else:
self.set_of_expectations.append(Expectation.load(exp))
elif is_dataclass(exp) and (isinstance(exp, Expectation) or isinstance(exp, ColumnExpectation)):
self.set_of_expectations.append(exp)

@classmethod
def all_expectations(cls):
Expand Down
58 changes: 58 additions & 0 deletions data_expectations/internals/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import json
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import Type
from typing import Union


@dataclass
class Expectation:
expectation: str
config: Dict[str, Any] = field(default_factory=dict)

def to_dict(self) -> Dict[str, Any]:
return {"expectation": self.expectation, **self.config}

@classmethod
def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
if isinstance(serialized, str):
serialized = json.loads(serialized)

serialized_copy = deepcopy(serialized)

if "expectation" not in serialized_copy:
raise ValueError("Missing 'expectation' key in Expectation." + str(serialized_copy))

expectation = serialized_copy.pop("expectation")
config = serialized_copy

return cls(expectation=expectation, config=config)


class ColumnExpectation(Expectation):
def __init__(self, expectation: str, column: str, config: Dict[str, Any] = None):
super().__init__(expectation, config if config is not None else {})
self.column = column

def to_dict(self) -> Dict[str, Any]:
return {"expectation": self.expectation, "column": self.column, **self.config}

@classmethod
def load(cls: Type["ColumnExpectation"], serialized: Union[Dict[str, Any], str]) -> "ColumnExpectation":
if isinstance(serialized, str):
serialized = json.loads(serialized)

serialized_copy = deepcopy(serialized)

if "expectation" not in serialized_copy:
raise ValueError("Missing 'expectation' key in Expectation")
if "column" not in serialized_copy:
raise ValueError("Missing 'column' key in Expectation")

expectation = serialized_copy.pop("expectation")
column = serialized_copy.pop("column")
config = serialized_copy
return cls(expectation=expectation, column=column, config=config)
2 changes: 1 addition & 1 deletion data_expectations/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# Store the version here so:
# 1) we don't load dependencies by storing it in __init__.py
# 2) we can import it in setup.py for the same reason
__version__ = "1.4.0"
__version__ = "1.5.0"

# nodoc - don't add to the documentation wiki
75 changes: 75 additions & 0 deletions tests/test_expectation_datamodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], ".."))


import json
import pytest
from data_expectations import Expectation, ColumnExpectation


def test_expectation_to_dict():
exp = Expectation("test_expectation", {"some_key": "some_value"})
assert exp.to_dict() == {"expectation": "test_expectation", "some_key": "some_value"}


def test_expectation_load_from_dict():
serialized = {"expectation": "test_expectation", "some_key": "some_value"}
exp = Expectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.config == {"some_key": "some_value"}


def test_expectation_load_from_json_str():
serialized = json.dumps({"expectation": "test_expectation", "some_key": "some_value"})
exp = Expectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.config == {"some_key": "some_value"}


def test_expectation_load_missing_key():
serialized = {"some_key": "some_value"}
with pytest.raises(ValueError):
Expectation.load(serialized)


def test_column_expectation_to_dict():
exp = ColumnExpectation("test_expectation", "test_column", {"some_key": "some_value"})
assert exp.to_dict() == {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}


def test_column_expectation_load_from_dict():
serialized = {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}
exp = ColumnExpectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.column == "test_column"
assert exp.config == {"some_key": "some_value"}


def test_column_expectation_load_from_json_str():
serialized = json.dumps({"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"})
exp = ColumnExpectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.column == "test_column"
assert exp.config == {"some_key": "some_value"}


def test_column_expectation_load_missing_key():
serialized = {"expectation": "test_expectation", "some_key": "some_value"}
with pytest.raises(ValueError):
ColumnExpectation.load(serialized)


if __name__ == "__main__": # pragma: no cover
test_expectation_to_dict()
test_expectation_load_from_dict()
test_expectation_load_from_json_str()
test_expectation_load_missing_key()

test_column_expectation_to_dict()
test_column_expectation_load_from_dict()
test_column_expectation_load_from_json_str()
test_column_expectation_load_missing_key()

print("✅ okay")
42 changes: 42 additions & 0 deletions tests/test_load_expectations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import sys
import json


sys.path.insert(1, os.path.join(sys.path[0], ".."))

from data_expectations import Expectations, Expectation, ColumnExpectation


def test_expectations_initializer():
# Create expectations as different types
exp1 = Expectation("expect_test", {"key1": "value1"})
exp2_dict = {"expectation": "expect_test2", "key2": "value2"}
exp2 = Expectation.load(exp2_dict)
exp3_json = json.dumps({"expectation": "expect_test3", "key3": "value3"})
exp3 = Expectation.load(json.loads(exp3_json))

col_exp1 = ColumnExpectation("expect_test_col", "col1", {"key1": "value1"})
col_exp2_dict = {"expectation": "expect_test_col2", "column": "col2", "key2": "value2"}
col_exp2 = ColumnExpectation.load(col_exp2_dict)
col_exp3_json = json.dumps({"expectation": "expect_test_col3", "column": "col3", "key3": "value3"})
col_exp3 = ColumnExpectation.load(json.loads(col_exp3_json))

# Initialize Expectations class
expectations = Expectations([exp1, exp2_dict, exp3_json, col_exp1, col_exp2_dict, col_exp3_json])

# Validate
assert len(expectations.set_of_expectations) == 6

assert isinstance(expectations.set_of_expectations[0], Expectation)
assert isinstance(expectations.set_of_expectations[1], Expectation)
assert isinstance(expectations.set_of_expectations[2], Expectation)
assert isinstance(expectations.set_of_expectations[3], ColumnExpectation)
assert isinstance(expectations.set_of_expectations[4], ColumnExpectation)
assert isinstance(expectations.set_of_expectations[5], ColumnExpectation)


if __name__ == "__main__": # pragma: no cover
test_expectations_initializer()

print("✅ okay")

0 comments on commit b77f7da

Please sign in to comment.