Skip to content

Commit

Permalink
Merge pull request #12 from joocer/1.5.0
Browse files Browse the repository at this point in the history
1.5.0
  • Loading branch information
joocer committed Sep 5, 2023
2 parents 96ae19c + 3cd2d6a commit 95052a3
Show file tree
Hide file tree
Showing 9 changed files with 272 additions and 35 deletions.
22 changes: 1 addition & 21 deletions .github/workflows/regression_suite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
pip install -r $GITHUB_WORKSPACE/tests/requirements.txt
- name: Run Regression Tests
run: coverage run -m pytest
run: coverage run -m pytest --color=yes

- name: Check Coverage
run: coverage report --include=data_expectations/** --fail-under=95 -m
Expand All @@ -31,23 +31,3 @@ jobs:
uses: codecov/codecov-action@v1
with:
fail_ci_if_error: false

mypy:
name: Type Checks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set Up Environment
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install Requirements
run: |
python -m pip install --upgrade pip mypy
pip install -r $GITHUB_WORKSPACE/tests/requirements.txt
- name: Execute Test
run: mypy data_expectations
5 changes: 1 addition & 4 deletions .github/workflows/secrets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,5 @@ jobs:
name: Fides Secrets Scanner
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: fides-scanner
uses: joocer/fides@master
uses: joocer/fides@main
7 changes: 4 additions & 3 deletions data_expectations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .internals.expectations import Expectations
from data_expectations.internals.expectations import Expectations
from data_expectations.internals.models import Expectation, ColumnExpectation

from .internals.evaluate import evaluate_list
from .internals.evaluate import evaluate_record
from data_expectations.internals.evaluate import evaluate_list
from data_expectations.internals.evaluate import evaluate_record
15 changes: 11 additions & 4 deletions data_expectations/internals/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@

import typing

from data_expectations import ColumnExpectation
from data_expectations import Expectations
from data_expectations.errors import ExpectationNotMetError
from data_expectations.errors import ExpectationNotUnderstoodError

ALL_EXPECTATIONS = Expectations.all_expectations()


def evaluate_record(expectations, record: dict, suppress_errors: bool = False):
def evaluate_record(expectations: Expectations, record: dict, suppress_errors: bool = False) -> bool:
"""
Test a single record against a defined set of expectations.
Expand All @@ -34,20 +35,26 @@ def evaluate_record(expectations, record: dict, suppress_errors: bool = False):
"""
for expectation_definition in expectations.set_of_expectations:
# get the name of the expectation
expectation = expectation_definition["expectation"]
expectation = expectation_definition.expectation

if expectation not in ALL_EXPECTATIONS:
raise ExpectationNotUnderstoodError(expectation=expectation)

if not ALL_EXPECTATIONS[expectation](row=record, **expectation_definition):
base_config = {"row": record, **expectation_definition.config}

# Conditionally include the 'column' parameter
if isinstance(expectation_definition, ColumnExpectation):
base_config["column"] = expectation_definition.column

if not ALL_EXPECTATIONS[expectation](**base_config):
if not suppress_errors:
raise ExpectationNotMetError(expectation, record)
return False # data failed to meet expectation

return True


def evaluate_list(expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False):
def evaluate_list(expectations: Expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False) -> bool:
"""
Evaluate a set of records against a defined set of Expectations.
Expand Down
21 changes: 19 additions & 2 deletions data_expectations/internals/expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,18 @@
- if data doesn't match, I'm not cross, I'm just disappointed.
"""
import json
import re
from dataclasses import is_dataclass
from inspect import getmembers
from typing import Any
from typing import Dict
from typing import Iterable
from typing import List
from typing import Union

from data_expectations.internals.models import ColumnExpectation
from data_expectations.internals.models import Expectation
from data_expectations.internals.text import sql_like_to_regex

GLOBAL_TRACKER: Dict[str, Any] = {}
Expand All @@ -58,8 +64,19 @@ def wrapper(*args, **kwargs):


class Expectations:
def __init__(self, set_of_expectations: Iterable[dict]):
self.set_of_expectations = set_of_expectations
def __init__(self, set_of_expectations: Iterable[Union[str, dict, Expectation]]):
self.set_of_expectations: List[Expectation] = []
for exp in set_of_expectations:
if isinstance(exp, str): # Parse JSON string
exp = json.loads(exp)

if isinstance(exp, dict): # Convert dict to Expectation or ColumnExpectation
if "column" in exp:
self.set_of_expectations.append(ColumnExpectation.load(exp))
else:
self.set_of_expectations.append(Expectation.load(exp))
elif is_dataclass(exp) and (isinstance(exp, Expectation) or isinstance(exp, ColumnExpectation)):
self.set_of_expectations.append(exp)

@classmethod
def all_expectations(cls):
Expand Down
118 changes: 118 additions & 0 deletions data_expectations/internals/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import Type
from typing import Union


@dataclass
class Expectation:
"""
Represents a general Data Expectation.
"""

expectation: str
config: Dict[str, Any] = field(default_factory=dict)

def to_dict(self) -> Dict[str, Any]:
"""
Converts the Expectation instance to a dictionary representation.
Returns:
A dictionary containing the expectation and its configuration.
"""
return {"expectation": self.expectation, **self.config}

@classmethod
def load_base(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> Dict[str, Any]:
"""
Loads a serialized Expectation and returns it as a dictionary.
Parameters:
serialized: Serialized Expectation as a dictionary or JSON string.
Returns:
A dictionary representation of the serialized Expectation.
"""
if isinstance(serialized, str):
serialized = dict(json.loads(serialized))
serialized_copy: dict = deepcopy(serialized)
if "expectation" not in serialized_copy:
raise ValueError("Missing 'expectation' key in Expectation.")
return serialized_copy

@classmethod
def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
"""
Loads a serialized Expectation and returns it as an instance.
Parameters:
serialized: Serialized Expectation as a dictionary or JSON string.
Returns:
An Expectation instance populated with the serialized data.
"""
serialized_copy = cls.load_base(serialized)
expectation = serialized_copy.pop("expectation")
config = serialized_copy
return cls(expectation=expectation, config=config)


class ColumnExpectation(Expectation):
"""
Represents a Data Expectation related to a specific column.
"""

def __init__(self, expectation: str, column: str, config: Dict[str, Any] = None):
"""
Initializes a ColumnExpectation instance.
Parameters:
expectation: The expectation type as a string.
column: The column the expectation applies to.
config: Additional configuration as a dictionary.
"""
super().__init__(expectation, config or {})
self.column = column

def to_dict(self) -> Dict[str, Any]:
"""
Converts the ColumnExpectation instance to a dictionary representation.
Returns:
A dictionary containing the expectation, column, and its configuration.
"""
return {"expectation": self.expectation, "column": self.column, **self.config}

@classmethod
def load(cls: Type["ColumnExpectation"], serialized: Union[Dict[str, Any], str]) -> "ColumnExpectation":
"""
Loads a serialized ColumnExpectation and returns it as an instance.
Parameters:
serialized: Serialized ColumnExpectation as a dictionary or JSON string.
Returns:
A ColumnExpectation instance populated with the serialized data.
"""
serialized_copy = cls.load_base(serialized)
if "column" not in serialized_copy:
raise ValueError("Missing 'column' key in Expectation.")
expectation = serialized_copy.pop("expectation")
column = serialized_copy.pop("column")
config = serialized_copy
return cls(expectation=expectation, column=column, config=config)
2 changes: 1 addition & 1 deletion data_expectations/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# Store the version here so:
# 1) we don't load dependencies by storing it in __init__.py
# 2) we can import it in setup.py for the same reason
__version__ = "1.4.0"
__version__ = "1.5.0"

# nodoc - don't add to the documentation wiki
75 changes: 75 additions & 0 deletions tests/test_expectation_datamodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], ".."))


import json
import pytest
from data_expectations import Expectation, ColumnExpectation


def test_expectation_to_dict():
exp = Expectation("test_expectation", {"some_key": "some_value"})
assert exp.to_dict() == {"expectation": "test_expectation", "some_key": "some_value"}


def test_expectation_load_from_dict():
serialized = {"expectation": "test_expectation", "some_key": "some_value"}
exp = Expectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.config == {"some_key": "some_value"}


def test_expectation_load_from_json_str():
serialized = json.dumps({"expectation": "test_expectation", "some_key": "some_value"})
exp = Expectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.config == {"some_key": "some_value"}


def test_expectation_load_missing_key():
serialized = {"some_key": "some_value"}
with pytest.raises(ValueError):
Expectation.load(serialized)


def test_column_expectation_to_dict():
exp = ColumnExpectation("test_expectation", "test_column", {"some_key": "some_value"})
assert exp.to_dict() == {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}


def test_column_expectation_load_from_dict():
serialized = {"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"}
exp = ColumnExpectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.column == "test_column"
assert exp.config == {"some_key": "some_value"}


def test_column_expectation_load_from_json_str():
serialized = json.dumps({"expectation": "test_expectation", "column": "test_column", "some_key": "some_value"})
exp = ColumnExpectation.load(serialized)
assert exp.expectation == "test_expectation"
assert exp.column == "test_column"
assert exp.config == {"some_key": "some_value"}


def test_column_expectation_load_missing_key():
serialized = {"expectation": "test_expectation", "some_key": "some_value"}
with pytest.raises(ValueError):
ColumnExpectation.load(serialized)


if __name__ == "__main__": # pragma: no cover
test_expectation_to_dict()
test_expectation_load_from_dict()
test_expectation_load_from_json_str()
test_expectation_load_missing_key()

test_column_expectation_to_dict()
test_column_expectation_load_from_dict()
test_column_expectation_load_from_json_str()
test_column_expectation_load_missing_key()

print("✅ okay")
42 changes: 42 additions & 0 deletions tests/test_load_expectations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import sys
import json


sys.path.insert(1, os.path.join(sys.path[0], ".."))

from data_expectations import Expectations, Expectation, ColumnExpectation


def test_expectations_initializer():
# Create expectations as different types
exp1 = Expectation("expect_test", {"key1": "value1"})
exp2_dict = {"expectation": "expect_test2", "key2": "value2"}
exp2 = Expectation.load(exp2_dict)
exp3_json = json.dumps({"expectation": "expect_test3", "key3": "value3"})
exp3 = Expectation.load(json.loads(exp3_json))

col_exp1 = ColumnExpectation("expect_test_col", "col1", {"key1": "value1"})
col_exp2_dict = {"expectation": "expect_test_col2", "column": "col2", "key2": "value2"}
col_exp2 = ColumnExpectation.load(col_exp2_dict)
col_exp3_json = json.dumps({"expectation": "expect_test_col3", "column": "col3", "key3": "value3"})
col_exp3 = ColumnExpectation.load(json.loads(col_exp3_json))

# Initialize Expectations class
expectations = Expectations([exp1, exp2_dict, exp3_json, col_exp1, col_exp2_dict, col_exp3_json])

# Validate
assert len(expectations.set_of_expectations) == 6

assert isinstance(expectations.set_of_expectations[0], Expectation)
assert isinstance(expectations.set_of_expectations[1], Expectation)
assert isinstance(expectations.set_of_expectations[2], Expectation)
assert isinstance(expectations.set_of_expectations[3], ColumnExpectation)
assert isinstance(expectations.set_of_expectations[4], ColumnExpectation)
assert isinstance(expectations.set_of_expectations[5], ColumnExpectation)


if __name__ == "__main__": # pragma: no cover
test_expectations_initializer()

print("✅ okay")

0 comments on commit 95052a3

Please sign in to comment.