diff --git a/README.md b/README.md index 42c7e3b..32d1056 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Data Expectations is a Python library which takes a delarative approach to asser Expectations can be used alongside, or in place of a schema validator, however Expectations is intended to perform validation of the data in a dataset, not just the structure of a table. Records should be a Python dictionary (or dictionary-like object) and can be processed one-by-one, or against an entire list of dictionaries. -[Data Expectations](https://github.com/joocer/data_expectations) was inspired by the great [Great Expectations](https://github.com/great-expectations/great_expectations) library, but I wanted something which was easier to quickly set up and run. Data Expectations can do less, but it does it with a fraction of the effort and has zero dependencies. Data Expectations was written to run as a step in data processing pipelines, testing the data before it is committed to the warehouse. +[Data Expectations](https://github.com/joocer/data_expectations) was inspired by the great [Great Expectations](https://github.com/great-expectations/great_expectations) library, but we wanted something lighter and easier to quickly set up and run. Data Expectations can do less, but it does it with a fraction of the effort and has zero dependencies. Data Expectations was written to run as a step in data processing pipelines, testing the data before it is committed to the warehouse. ## Provided Expectations diff --git a/data_expectations/__init__.py b/data_expectations/__init__.py index 0370f98..ac82773 100644 --- a/data_expectations/__init__.py +++ b/data_expectations/__init__.py @@ -1,3 +1,4 @@ -from .internals import Expectations +from .internals.expectations import Expectations + from .internals.evaluate import evaluate_list from .internals.evaluate import evaluate_record diff --git a/data_expectations/internals/__init__.py b/data_expectations/internals/__init__.py index 5e68c7e..8b13789 100644 --- a/data_expectations/internals/__init__.py +++ b/data_expectations/internals/__init__.py @@ -1,2 +1 @@ -from .evaluate import evaluate_record -from .expectations import Expectations + diff --git a/data_expectations/internals/evaluate.py b/data_expectations/internals/evaluate.py index 33e936e..088d5b6 100644 --- a/data_expectations/internals/evaluate.py +++ b/data_expectations/internals/evaluate.py @@ -12,16 +12,25 @@ import typing +from data_expectations import Expectations from data_expectations.errors import ExpectationNotMetError from data_expectations.errors import ExpectationNotUnderstoodError -from data_expectations.internals.expectations import all_expectations -ALL_EXPECTATIONS = all_expectations() +ALL_EXPECTATIONS = Expectations.all_expectations() def evaluate_record(expectations, record: dict, suppress_errors: bool = False): """ - Test 'record' against a defined set of 'expectations'. + Test a single record against a defined set of expectations. + + Args: + expectations: The Expectations instance. + record: The dictionary record to be tested. + all_expectations: The dictionary of all available expectations. + suppress_errors: Whether to suppress expectation errors and return False instead. + + Returns: + True if all expectations are met, False otherwise. """ for expectation_definition in expectations.set_of_expectations: # get the name of the expectation @@ -40,10 +49,14 @@ def evaluate_record(expectations, record: dict, suppress_errors: bool = False): def evaluate_list(expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False): """ - Execute the expectation test against an iterable of dictionaries + Evaluate a set of records against a defined set of Expectations. + + Args: + expectations: The Expectations instance. + dictset: The iterable set of dictionary records to be tested. + suppress_errors: Whether to suppress expectation errors and return False for the entire set. + + Returns: + True if all records meet all Expectations, False otherwise. """ - for record in dictset: - result = evaluate_record(expectations, record, suppress_errors) - if not result: - return False - return True + return all(evaluate_record(expectations, record, suppress_errors) for record in dictset) diff --git a/data_expectations/internals/expectations.py b/data_expectations/internals/expectations.py index 26f0b64..9874541 100644 --- a/data_expectations/internals/expectations.py +++ b/data_expectations/internals/expectations.py @@ -33,44 +33,58 @@ """ import re from inspect import getmembers +from typing import Any +from typing import Dict from typing import Iterable from data_expectations.internals.text import sql_like_to_regex -try: - # added 3.9 - from functools import cache -except ImportError: # pragma: no cover - from functools import lru_cache +GLOBAL_TRACKER: Dict[str, Any] = {} - cache = lru_cache(1) +def track_previous(func): + def wrapper(*args, **kwargs): + column = kwargs.get("column") + key = f"{func.__name__}/{str(column)}" + if "previous_value" in kwargs: + previous_value = kwargs.pop("previous_value") + else: + previous_value = GLOBAL_TRACKER.get(key) + result = func(previous_value=previous_value, *args, **kwargs) + GLOBAL_TRACKER[key] = kwargs.get("row", {}).get(column) or previous_value + return result -@cache -def all_expectations(): - """ - Programatically get the list of expectations and build them into a dictionary. - We then use this dictionary to look up the methods to test the expectations in - the set of expectations for a dataset. - """ - expectations = {} - for handle, member in getmembers(Expectations(None)): - if callable(member) and handle.startswith("expect_"): - expectations[handle] = member - return expectations + return wrapper class Expectations: def __init__(self, set_of_expectations: Iterable[dict]): self.set_of_expectations = set_of_expectations - self._tracker: dict = {} + + @classmethod + def all_expectations(cls): + """ + Programmatically get the list of expectations and build them into a dictionary. + We then use this dictionary to look up the methods to test the expectations in + the set of expectations for a dataset. + """ + expectations = {} + for handle, member in getmembers(cls): + if callable(member) and handle.startswith("expect_"): + expectations[handle] = member + return expectations + + @staticmethod + def reset(): + global GLOBAL_TRACKER + GLOBAL_TRACKER = {} ################################################################################### # COLUMN EXPECTATIONS ################################################################################### + @staticmethod def expect_column_names_to_match_set( - self, *, row: dict, columns: list, @@ -78,53 +92,69 @@ def expect_column_names_to_match_set( **kwargs, ): """ - Confirms that the columns in a record matches a given set. - - Ignore_excess, ignore columns not on the list, set to False to test against a - fixed set. + Confirms that the columns in a record match the given set. + + Parameters: + row: dict + The record to be checked. + columns: list + List of expected column names. + ignore_excess: bool + If True, ignores columns not in the list. If False, ensures columns match the list exactly. + + Returns: bool + True if expectation is met, False otherwise. """ if ignore_excess: return all(key in columns for key in row.keys()) return sorted(columns) == sorted(list(row.keys())) + @staticmethod def expect_column_to_exist( - self, *, row: dict, column: str, **kwargs, ): """ - Confirms that a named column exists. + Confirms that a specified column exists in the record. - Paramters: - row: dictionary - The dictionary to be tested - column: string - The name of the column we expect to exist + Parameters: + row: dict + The record to be checked. + column: str + Name of the column to check for existence. - Returns: - True if the expectation is met + Returns: bool + True if column exists, False otherwise. """ if isinstance(row, dict): return column in row.keys() return False + @staticmethod def expect_column_values_to_not_be_null( - self, *, row: dict, column: str, **kwargs, ): """ - Confirms the value in a column is not null, note that non-existant values - are considered to be null. + Confirms that the value in a column is not null. Non-existent values are considered null. + + Parameters: + row: dict + The record containing the column. + column: str + The column's name whose value should not be null. + + Returns: bool + True if the value in the column is not null, False otherwise. """ return row.get(column) is not None + @staticmethod def expect_column_values_to_be_of_type( - self, *, row: dict, column: str, @@ -132,13 +162,29 @@ def expect_column_values_to_be_of_type( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column is of the expected type. + + Parameters: + row: dict + The record to be checked. + column: str + The column's name to validate the type of its value. + expected_type: + Expected type of the column value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the type matches or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return type(value).__name__ == expected_type return ignore_nulls + @staticmethod def expect_column_values_to_be_in_type_list( - self, *, row: dict, column: str, @@ -146,13 +192,29 @@ def expect_column_values_to_be_in_type_list( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the type of value in a specific column is one of the specified types. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate the type of its value. + type_list: Iterable + List of expected types for the column value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the type is in the type list or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return type(value).__name__ in type_list return ignore_nulls + @staticmethod def expect_column_values_to_be_between( - self, *, row: dict, column: str, @@ -161,45 +223,93 @@ def expect_column_values_to_be_between( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column is between two values. + + Parameters: + row: dict + The record to check. + column: str + The column's name to validate its value. + minimum: + Lower bound of the value. + maximum: + Upper bound of the value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is between the two bounds or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return value >= minimum and value <= maximum return ignore_nulls + @staticmethod + @track_previous def expect_column_values_to_be_increasing( - self, *, row: dict, column: str, ignore_nulls: bool = True, + previous_value=None, **kwargs, ): + """ + Confirms that the values in a specific column are in an increasing order. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + previous_value: [type] + The value of the column from the previous record. + + Returns: bool + True if the current value is greater than or equal to the previous value or if the value is null and ignore_nulls is True. False otherwise. + """ value = row.get(column) if value: - key = f"expect_column_values_to_be_increasing/{str(column)}" - last_value = self._tracker.get(key) - self._tracker[key] = value - return last_value is None or last_value <= value + return previous_value is None or previous_value <= value return ignore_nulls + @staticmethod + @track_previous def expect_column_values_to_be_decreasing( - self, *, row: dict, column: str, ignore_nulls: bool = True, + previous_value=None, **kwargs, ): + """ + Confirms that the values in a specific column are in a decreasing order. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + previous_value: [type] + The value of the column from the previous record. + + Returns: bool + True if the current value is less than or equal to the previous value or if the value is null and ignore_nulls is True. False otherwise. + """ value = row.get(column) if value: - key = f"expect_column_values_to_be_decreasing/{str(column)}" - last_value = self._tracker.get(key) - self._tracker[key] = value - return last_value is None or last_value >= value + return previous_value is None or previous_value >= value return ignore_nulls + @staticmethod def expect_column_values_to_be_in_set( - self, *, row: dict, column: str, @@ -207,13 +317,29 @@ def expect_column_values_to_be_in_set( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column is within a predefined set. + + Parameters: + row: dict + The record to check. + column: str + The column's name to validate its value. + symbols: Iterable + The set of allowed values for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is in the provided set or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return value in symbols return ignore_nulls + @staticmethod def expect_column_values_to_match_regex( - self, *, row: dict, column: str, @@ -221,13 +347,29 @@ def expect_column_values_to_match_regex( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column matches a given regular expression pattern. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + regex: str + The regular expression pattern to match against the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value matches the regex or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return re.compile(regex).match(str(value)) is not None return ignore_nulls + @staticmethod def expect_column_values_to_match_like( - self, *, row: dict, column: str, @@ -235,13 +377,29 @@ def expect_column_values_to_match_like( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column matches a given SQL-like pattern. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + like: str + The SQL-like pattern to match against the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value matches the pattern or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return sql_like_to_regex(like).match(str(value)) is not None return ignore_nulls + @staticmethod def expect_column_values_length_to_be( - self, *, row: dict, column: str, @@ -249,7 +407,22 @@ def expect_column_values_length_to_be( ignore_nulls: bool = True, **kwargs, ): - """Confirms the string length of the value in a column is a given length""" + """ + Confirms that the length of the value in a specific column is equal to a specified length. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value length. + length: int + The expected length for the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the length of the value matches the specified length or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: if not hasattr(value, "__len__"): @@ -257,8 +430,8 @@ def expect_column_values_length_to_be( return len(value) == length return ignore_nulls + @staticmethod def expect_column_values_length_to_be_between( - self, *, row: dict, column: str, @@ -267,6 +440,24 @@ def expect_column_values_length_to_be_between( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the length of the value in a specific column falls within a specified range. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value length. + minimum: int + The minimum acceptable length for the column's value. + maximum: int + The maximum acceptable length for the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the length of the value is within the specified range or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: if not hasattr(value, "__len__"): @@ -274,8 +465,8 @@ def expect_column_values_length_to_be_between( return len(value) >= minimum and len(value) <= maximum return ignore_nulls + @staticmethod def expect_column_values_to_be_more_than( - self, *, row: dict, column: str, @@ -283,13 +474,29 @@ def expect_column_values_to_be_more_than( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column is greater than a given threshold. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + threshold: [type] + The minimum acceptable value for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is greater than the threshold or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return value > threshold return ignore_nulls + @staticmethod def expect_column_values_to_be_less_than( - self, *, row: dict, column: str, @@ -297,6 +504,22 @@ def expect_column_values_to_be_less_than( ignore_nulls: bool = True, **kwargs, ): + """ + Confirms that the value in a specific column is less than a given threshold. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + threshold: [type] + The maximum acceptable value for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is less than the threshold or if the value is null and ignore_nulls is True, False otherwise. + """ value = row.get(column) if value: return value < threshold diff --git a/data_expectations/version.py b/data_expectations/version.py index 241f9c5..77ad9dc 100644 --- a/data_expectations/version.py +++ b/data_expectations/version.py @@ -13,6 +13,6 @@ # Store the version here so: # 1) we don't load dependencies by storing it in __init__.py # 2) we can import it in setup.py for the same reason -__version__ = "1.3.0" +__version__ = "1.4.0" # nodoc - don't add to the documentation wiki diff --git a/pyproject.toml b/pyproject.toml index c7a524e..f2e68d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -line-length = 100 +line-length = 120 target-version = ['py310'] fast = true jobs = 4 @@ -7,9 +7,9 @@ cache = true [tool.isort] profile = "black" -extend_skip_glob = ["tests/**", "*.pyx", "testdata/**"] +extend_skip_glob = ["tests/**", "*/__init__.py"] skip_gitignore = true -line_length = 100 +line_length = 120 multi_line_output = 9 force_single_line = true float_to_top = true diff --git a/tests/requirements.txt b/tests/requirements.txt index 20af081..e36ca7a 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,2 @@ -rich coverage pytest \ No newline at end of file diff --git a/tests/test_data_expectations.py b/tests/test_data_expectations.py index 40fb993..0bfe069 100644 --- a/tests/test_data_expectations.py +++ b/tests/test_data_expectations.py @@ -1,17 +1,15 @@ import datetime import os import sys + import pytest sys.path.insert(1, os.path.join(sys.path[0], "..")) + import data_expectations as de -from data_expectations.errors import ( - ExpectationNotMetError, - ExpectationNotUnderstoodError, -) -from rich import traceback +from data_expectations.errors import ExpectationNotMetError +from data_expectations.errors import ExpectationNotUnderstoodError -traceback.install() # fmt:off set_of_expectations = [ diff --git a/tests/test_expect_column_names_to_match_set.py b/tests/test_expect_column_names_to_match_set.py index 7712c1e..f6ad899 100644 --- a/tests/test_expect_column_names_to_match_set.py +++ b/tests/test_expect_column_names_to_match_set.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_names_to_match_set(): - test_func = de.Expectations([]).expect_column_names_to_match_set + test_func = de.Expectations.expect_column_names_to_match_set assert test_func(row={"number": 7, "string": "d"}, columns=("number", "string")) assert test_func( diff --git a/tests/test_expect_column_to_exist.py b/tests/test_expect_column_to_exist.py index 48f318e..507e0d0 100644 --- a/tests/test_expect_column_to_exist.py +++ b/tests/test_expect_column_to_exist.py @@ -5,10 +5,8 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de # fmt:off @@ -22,7 +20,7 @@ def test_expect_column_to_exist(): - test_func = de.Expectations([]).expect_column_to_exist + test_func = de.Expectations.expect_column_to_exist assert not test_func(row='{"number":1}', column="number") diff --git a/tests/test_expect_column_values_length_to_be.py b/tests/test_expect_column_values_length_to_be.py index 9506b34..a0dcda4 100644 --- a/tests/test_expect_column_values_length_to_be.py +++ b/tests/test_expect_column_values_length_to_be.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_length_to_be(): - test_func = de.Expectations([]).expect_column_values_length_to_be + test_func = de.Expectations.expect_column_values_length_to_be assert test_func(row={"string": "value"}, column="string", length=5) assert not test_func(row={"string": "main"}, column="string", length=5) diff --git a/tests/test_expect_column_values_length_to_be_between.py b/tests/test_expect_column_values_length_to_be_between.py index 8413910..48d4bf0 100644 --- a/tests/test_expect_column_values_length_to_be_between.py +++ b/tests/test_expect_column_values_length_to_be_between.py @@ -2,21 +2,17 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_length_to_be_between(): - test_func = de.Expectations([]).expect_column_values_length_to_be_between + test_func = de.Expectations.expect_column_values_length_to_be_between assert test_func(row={"string": "value"}, column="string", minimum=3, maximum=7) assert not test_func(row={"string": "main"}, column="string", minimum=5, maximum=7) assert test_func(row={"string": None}, column="string", minimum=5, maximum=7) - assert not test_func( - row={"string": None}, column="string", minimum=5, maximum=7, ignore_nulls=False - ) + assert not test_func(row={"string": None}, column="string", minimum=5, maximum=7, ignore_nulls=False) assert test_func(row={"list": ["a", "b", "c"]}, column="list", minimum=1, maximum=5) assert test_func(row={"num": 100}, column="num", minimum=1, maximum=7) diff --git a/tests/test_expect_column_values_to_be_between.py b/tests/test_expect_column_values_to_be_between.py index b1a8725..4d03a48 100644 --- a/tests/test_expect_column_values_to_be_between.py +++ b/tests/test_expect_column_values_to_be_between.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_between(): - test_func = de.Expectations([]).expect_column_values_to_be_between + test_func = de.Expectations.expect_column_values_to_be_between assert test_func(row={"key": "b"}, column="key", minimum="a", maximum="c") assert test_func(row={"key": "b"}, column="key", minimum="a", maximum="b") diff --git a/tests/test_expect_column_values_to_be_decreasing.py b/tests/test_expect_column_values_to_be_decreasing.py index 616b197..f111250 100644 --- a/tests/test_expect_column_values_to_be_decreasing.py +++ b/tests/test_expect_column_values_to_be_decreasing.py @@ -2,10 +2,8 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de # fmt:off @@ -40,7 +38,8 @@ def test_expect_column_values_to_be_decreasing_valid(): - test_func = de.Expectations([]).expect_column_values_to_be_decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + de.Expectations.reset() # valid data is always valid for row in VALID_DATA: @@ -49,7 +48,8 @@ def test_expect_column_values_to_be_decreasing_valid(): def test_expect_column_values_to_be_decreasing_valid_with_nulls(): - test_func = de.Expectations([]).expect_column_values_to_be_decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + de.Expectations.reset() # valid data is always valid for row in VALID_DATA_SPARSE: @@ -58,7 +58,8 @@ def test_expect_column_values_to_be_decreasing_valid_with_nulls(): def test_expect_column_values_to_be_decreasing_valid_with_nulls_which_arent_ignored(): - test_func = de.Expectations([]).expect_column_values_to_be_decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + de.Expectations.reset() for i, row in enumerate(VALID_DATA_SPARSE): if i in (2,): @@ -70,7 +71,8 @@ def test_expect_column_values_to_be_decreasing_valid_with_nulls_which_arent_igno def test_expect_column_values_to_be_decreasing_invalid(): - test_func = de.Expectations([]).expect_column_values_to_be_decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + de.Expectations.reset() # invalid data is valid the first cycle for i, row in enumerate(INVALID_DATA): @@ -83,7 +85,8 @@ def test_expect_column_values_to_be_decreasing_invalid(): def test_expect_column_values_to_be_decreasing_invalid_with_nulls(): - test_func = de.Expectations([]).expect_column_values_to_be_decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + de.Expectations.reset() # invalid data is valid the first cycle for i, row in enumerate(INVALID_DATA_SPARSE): @@ -95,10 +98,60 @@ def test_expect_column_values_to_be_decreasing_invalid_with_nulls(): assert not test_func(row=row, column="string") +def test_expect_column_values_to_be_decreasing_true(): + # Scenario where the column value is decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + record1 = {"column_name": 10} + record2 = {"column_name": 5} + + assert test_func(row=record1, column="column_name", previous_value=15) + assert test_func(row=record2, column="column_name", previous_value=10) + + +def test_expect_column_values_to_be_decreasing_false(): + # Scenario where the column value is not decreasing + test_func = de.Expectations.expect_column_values_to_be_decreasing + record = {"column_name": 15} + + assert not test_func(row=record, column="column_name", previous_value=10) + + +def test_expect_column_values_to_be_decreasing_with_null(): + test_func = de.Expectations.expect_column_values_to_be_decreasing + # Scenario where the column value is null + record = {"column_name": None} + + assert test_func(row=record, column="column_name", ignore_nulls=True, previous_value=10) + assert not test_func(row=record, column="column_name", ignore_nulls=False, previous_value=10) + + +def test_expect_column_values_to_be_decreasing_no_previous_value(): + # Scenario where there's no previous value (first row in a dataset, for example) + test_func = de.Expectations.expect_column_values_to_be_decreasing + record = {"column_name": 10} + + assert test_func(row=record, column="column_name", previous_value=None) + + +def test_expect_column_values_to_be_decreasing_column_missing(): + # Scenario where the column is missing + test_func = de.Expectations.expect_column_values_to_be_decreasing + record = {"other_column": 5} + + assert test_func(row=record, column="column_name", previous_value=10) + + if __name__ == "__main__": # pragma: no cover test_expect_column_values_to_be_decreasing_valid() test_expect_column_values_to_be_decreasing_valid_with_nulls() test_expect_column_values_to_be_decreasing_valid_with_nulls_which_arent_ignored() test_expect_column_values_to_be_decreasing_invalid() test_expect_column_values_to_be_decreasing_invalid_with_nulls() + + test_expect_column_values_to_be_decreasing_column_missing() + test_expect_column_values_to_be_decreasing_false() + test_expect_column_values_to_be_decreasing_true() + test_expect_column_values_to_be_decreasing_no_previous_value() + test_expect_column_values_to_be_decreasing_with_null() + print("✅ okay") diff --git a/tests/test_expect_column_values_to_be_in_set.py b/tests/test_expect_column_values_to_be_in_set.py index bfdf86a..eb5659a 100644 --- a/tests/test_expect_column_values_to_be_in_set.py +++ b/tests/test_expect_column_values_to_be_in_set.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_in_set(): - test_func = de.Expectations([]).expect_column_values_to_be_in_set + test_func = de.Expectations.expect_column_values_to_be_in_set assert test_func(row={"key": "a"}, column="key", symbols=("a", "b", "c")) assert test_func(row={"key": None}, column="key", symbols=("a", "b", "c")) diff --git a/tests/test_expect_column_values_to_be_in_type_list.py b/tests/test_expect_column_values_to_be_in_type_list.py index c436007..2ac1585 100644 --- a/tests/test_expect_column_values_to_be_in_type_list.py +++ b/tests/test_expect_column_values_to_be_in_type_list.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_in_type_list(): - test_func = de.Expectations([]).expect_column_values_to_be_in_type_list + test_func = de.Expectations.expect_column_values_to_be_in_type_list assert test_func(row={"key": "value"}, column="key", type_list=["str", "int"]) assert test_func(row={"key": 10}, column="key", type_list=["str", "int"]) diff --git a/tests/test_expect_column_values_to_be_increasing.py b/tests/test_expect_column_values_to_be_increasing.py index 4fe76cc..cd8eee6 100644 --- a/tests/test_expect_column_values_to_be_increasing.py +++ b/tests/test_expect_column_values_to_be_increasing.py @@ -5,10 +5,8 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de # fmt:off @@ -43,7 +41,8 @@ def test_expect_column_values_to_be_increasing_valid(): - test_func = de.Expectations([]).expect_column_values_to_be_increasing + test_func = de.Expectations.expect_column_values_to_be_increasing + de.Expectations.reset() # valid data is always valid for row in VALID_DATA: @@ -52,7 +51,8 @@ def test_expect_column_values_to_be_increasing_valid(): def test_expect_column_values_to_be_increasing_valid_with_nulls(): - test_func = de.Expectations([]).expect_column_values_to_be_increasing + test_func = de.Expectations.expect_column_values_to_be_increasing + de.Expectations.reset() # valid data is always valid for row in VALID_DATA_SPARSE: @@ -61,7 +61,8 @@ def test_expect_column_values_to_be_increasing_valid_with_nulls(): def test_expect_column_values_to_be_increasing_valid_with_nulls_which_arent_ignored(): - test_func = de.Expectations([]).expect_column_values_to_be_increasing + test_func = de.Expectations.expect_column_values_to_be_increasing + de.Expectations.reset() for i, row in enumerate(VALID_DATA_SPARSE): if i in (1,): @@ -73,7 +74,8 @@ def test_expect_column_values_to_be_increasing_valid_with_nulls_which_arent_igno def test_expect_column_values_to_be_increasing_invalid(): - test_func = de.Expectations([]).expect_column_values_to_be_increasing + test_func = de.Expectations.expect_column_values_to_be_increasing + de.Expectations.reset() # invalid data is valid the first cycle for i, row in enumerate(INVALID_DATA): @@ -86,7 +88,8 @@ def test_expect_column_values_to_be_increasing_invalid(): def test_expect_column_values_to_be_increasing_invalid_with_nulls(): - test_func = de.Expectations([]).expect_column_values_to_be_increasing + test_func = de.Expectations.expect_column_values_to_be_increasing + de.Expectations.reset() # invalid data is valid the first cycle for i, row in enumerate(INVALID_DATA_SPARSE): @@ -98,10 +101,65 @@ def test_expect_column_values_to_be_increasing_invalid_with_nulls(): assert not test_func(row=row, column="string") +def test_expect_column_values_to_be_increasing_true(): + test_func = de.Expectations.expect_column_values_to_be_increasing + + # Scenario where the column value is increasing + record1 = {"column_name": 5} + record2 = {"column_name": 10} + + assert test_func(row=record1, column="column_name", previous_value=3) + assert test_func(row=record2, column="column_name", previous_value=5) + + +def test_expect_column_values_to_be_increasing_false(): + test_func = de.Expectations.expect_column_values_to_be_increasing + + # Scenario where the column value is not increasing + record = {"column_name": 5} + + assert not test_func(row=record, column="column_name", previous_value=10) + + +def test_expect_column_values_to_be_increasing_with_null(): + test_func = de.Expectations.expect_column_values_to_be_increasing + + # Scenario where the column value is null + record = {"column_name": None} + + assert test_func(row=record, column="column_name", ignore_nulls=True, previous_value=5) + assert not test_func(row=record, column="column_name", ignore_nulls=False, previous_value=5) + + +def test_expect_column_values_to_be_increasing_no_previous_value(): + test_func = de.Expectations.expect_column_values_to_be_increasing + + # Scenario where there's no previous value (first row in a dataset, for example) + record = {"column_name": 5} + + assert test_func(row=record, column="column_name", previous_value=None) + + +def test_expect_column_values_to_be_increasing_column_missing(): + test_func = de.Expectations.expect_column_values_to_be_increasing + + # Scenario where the column is missing + record = {"other_column": 10} + + assert test_func(row=record, column="column_name", previous_value=5) + + if __name__ == "__main__": # pragma: no cover test_expect_column_values_to_be_increasing_valid() test_expect_column_values_to_be_increasing_valid_with_nulls() test_expect_column_values_to_be_increasing_valid_with_nulls_which_arent_ignored() test_expect_column_values_to_be_increasing_invalid() test_expect_column_values_to_be_increasing_invalid_with_nulls() + + test_expect_column_values_to_be_increasing_column_missing() + test_expect_column_values_to_be_increasing_false() + test_expect_column_values_to_be_increasing_true() + test_expect_column_values_to_be_increasing_no_previous_value() + test_expect_column_values_to_be_increasing_with_null() + print("✅ okay") diff --git a/tests/test_expect_column_values_to_be_less_than.py b/tests/test_expect_column_values_to_be_less_than.py index 1902e6d..a8a9275 100644 --- a/tests/test_expect_column_values_to_be_less_than.py +++ b/tests/test_expect_column_values_to_be_less_than.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_less_than(): - test_func = de.Expectations([]).expect_column_values_to_be_less_than + test_func = de.Expectations.expect_column_values_to_be_less_than assert test_func(row={"key": "b"}, column="key", threshold="c") assert test_func(row={"key": "b"}, column="key", threshold="f") diff --git a/tests/test_expect_column_values_to_be_more_than.py b/tests/test_expect_column_values_to_be_more_than.py index aece3c2..c87857d 100644 --- a/tests/test_expect_column_values_to_be_more_than.py +++ b/tests/test_expect_column_values_to_be_more_than.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_more_than(): - test_func = de.Expectations([]).expect_column_values_to_be_more_than + test_func = de.Expectations.expect_column_values_to_be_more_than assert test_func(row={"key": "b"}, column="key", threshold="a") assert test_func(row={"key": "z"}, column="key", threshold="f") diff --git a/tests/test_expect_column_values_to_be_of_type.py b/tests/test_expect_column_values_to_be_of_type.py index f89bccc..e69ca9a 100644 --- a/tests/test_expect_column_values_to_be_of_type.py +++ b/tests/test_expect_column_values_to_be_of_type.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_be_of_type(): - test_func = de.Expectations([]).expect_column_values_to_be_of_type + test_func = de.Expectations.expect_column_values_to_be_of_type assert test_func(row={"key": "value"}, column="key", expected_type="str") assert not test_func(row={"key": 10}, column="key", expected_type="str") diff --git a/tests/test_expect_column_values_to_match_like.py b/tests/test_expect_column_values_to_match_like.py index b1783bb..2ccfed7 100644 --- a/tests/test_expect_column_values_to_match_like.py +++ b/tests/test_expect_column_values_to_match_like.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_match_like(): - test_func = de.Expectations([]).expect_column_values_to_match_like + test_func = de.Expectations.expect_column_values_to_match_like assert test_func(row={"string": "surname"}, column="string", like="%name") assert not test_func(row={"string": "main"}, column="string", like="%name") diff --git a/tests/test_expect_column_values_to_match_regex.py b/tests/test_expect_column_values_to_match_regex.py index 9d675f4..80fb976 100644 --- a/tests/test_expect_column_values_to_match_regex.py +++ b/tests/test_expect_column_values_to_match_regex.py @@ -2,14 +2,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_match_regex(): - test_func = de.Expectations([]).expect_column_values_to_match_regex + test_func = de.Expectations.expect_column_values_to_match_regex assert test_func(row={"string": "test"}, column="string", regex="^test$") assert not test_func(row={"string": "main"}, column="string", regex="^test$") diff --git a/tests/test_expect_column_values_to_not_be_null.py b/tests/test_expect_column_values_to_not_be_null.py index 32233b6..e14d48d 100644 --- a/tests/test_expect_column_values_to_not_be_null.py +++ b/tests/test_expect_column_values_to_not_be_null.py @@ -5,14 +5,12 @@ import sys sys.path.insert(1, os.path.join(sys.path[0], "..")) -import data_expectations as de -from rich import traceback -traceback.install() +import data_expectations as de def test_expect_column_values_to_not_be_null(): - test_func = de.Expectations([]).expect_column_values_to_not_be_null + test_func = de.Expectations.expect_column_values_to_not_be_null assert test_func(row={"key": "value"}, column="key") assert not test_func(row={"key": None}, column="key")