Skip to content

Commit

Permalink
Enable setting datetime value for dbt_valid_to when the record is cur…
Browse files Browse the repository at this point in the history
…rent (#10780)
  • Loading branch information
gshank authored Oct 10, 2024
1 parent 6743e32 commit c7d8693
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 6 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240925-120855.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable specification of dbt_valid_to for current records
time: 2024-09-25T12:08:55.926848-04:00
custom:
Author: gshank
Issue: "10187"
1 change: 1 addition & 0 deletions core/dbt/artifacts/resources/v1/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class SnapshotConfig(NodeConfig):
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
default_factory=SnapshotMetaColumnNames
)
dbt_valid_to_current: Optional[str] = None

@property
def snapshot_table_column_names(self):
Expand Down
24 changes: 23 additions & 1 deletion schemas/dbt/manifest/v12.json
Original file line number Diff line number Diff line change
Expand Up @@ -6685,6 +6685,17 @@
}
},
"additionalProperties": false
},
"dbt_valid_to_current": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": true
Expand Down Expand Up @@ -16511,6 +16522,17 @@
}
},
"additionalProperties": false
},
"dbt_valid_to_current": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": true
Expand Down Expand Up @@ -22476,4 +22498,4 @@
"unit_tests"
],
"$id": "https://schemas.getdbt.com/dbt/manifest/v12.json"
}
}
1 change: 1 addition & 0 deletions tests/functional/artifacts/expected_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def get_rendered_snapshot_config(**updates):
"dbt_updated_at": None,
"dbt_scd_id": None,
},
"dbt_valid_to_current": None,
"tags": [],
"persist_docs": {},
"full_refresh": None,
Expand Down
1 change: 1 addition & 0 deletions tests/functional/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def expect_snapshot_output(self, happy_path_project): # noqa: F811
"persist_docs": {},
"target_database": happy_path_project.database,
"target_schema": happy_path_project.test_schema,
"dbt_valid_to_current": None,
"snapshot_meta_column_names": {
"dbt_scd_id": None,
"dbt_updated_at": None,
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/logging/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_invalid_event_value(project, logs_dir):
with pytest.raises(Exception) as excinfo:
fire_event(InvalidOptionYAML(option_name=1))

assert str(excinfo.value) == "[InvalidOptionYAML]: Unable to parse dict {'option_name': 1}"
assert "[InvalidOptionYAML]: Unable to parse logging event dictionary." in str(excinfo.value)


groups_yml = """
Expand Down
82 changes: 82 additions & 0 deletions tests/functional/snapshots/data/seed_dbt_valid_to.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
create table {database}.{schema}.seed (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),
updated_at TIMESTAMP WITHOUT TIME ZONE
);

create table {database}.{schema}.snapshot_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- snapshotting fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
test_valid_from TIMESTAMP WITHOUT TIME ZONE,
test_valid_to TIMESTAMP WITHOUT TIME ZONE,
test_scd_id TEXT,
test_updated_at TIMESTAMP WITHOUT TIME ZONE
);


-- seed inserts
-- use the same email for two users to verify that duplicated check_cols values
-- are handled appropriately
insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values
(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'),
(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'),
(3, 'Rachel', 'Moreno', '[email protected]', 'Female', '31.222.249.23', '2016-04-05 02:05:30'),
(4, 'Ralph', 'Turner', '[email protected]', 'Male', '157.83.76.114', '2016-08-08 00:06:51'),
(5, 'Laura', 'Gonzales', '[email protected]', 'Female', '30.54.105.168', '2016-09-01 08:25:38'),
(6, 'Katherine', 'Lopez', '[email protected]', 'Female', '169.138.46.89', '2016-08-30 18:52:11'),
(7, 'Jeremy', 'Hamilton', '[email protected]', 'Male', '231.189.13.133', '2016-07-17 02:09:46'),
(8, 'Heather', 'Rose', '[email protected]', 'Female', '87.165.201.65', '2015-12-29 22:03:56'),
(9, 'Gregory', 'Kelly', '[email protected]', 'Male', '154.209.99.7', '2016-03-24 21:18:16'),
(10, 'Rachel', 'Lopez', '[email protected]', 'Female', '237.165.82.71', '2016-08-20 15:44:49'),
(11, 'Donna', 'Welch', '[email protected]', 'Female', '103.33.110.138', '2016-02-27 01:41:48'),
(12, 'Russell', 'Lawrence', '[email protected]', 'Male', '189.115.73.4', '2016-06-11 03:07:09'),
(13, 'Michelle', 'Montgomery', '[email protected]', 'Female', '243.220.95.82', '2016-06-18 16:27:19'),
(14, 'Walter', 'Castillo', '[email protected]', 'Male', '71.159.238.196', '2016-10-06 01:55:44'),
(15, 'Robin', 'Mills', '[email protected]', 'Female', '172.190.5.50', '2016-10-31 11:41:21'),
(16, 'Raymond', 'Holmes', '[email protected]', 'Male', '148.153.166.95', '2016-10-03 08:16:38'),
(17, 'Gary', 'Bishop', '[email protected]', 'Male', '161.108.182.13', '2016-08-29 19:35:20'),
(18, 'Anna', 'Riley', '[email protected]', 'Female', '253.31.108.22', '2015-12-11 04:34:27'),
(19, 'Sarah', 'Knight', '[email protected]', 'Female', '222.220.3.177', '2016-09-26 00:49:06'),
(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19');


-- populate snapshot table
insert into {database}.{schema}.snapshot_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
test_valid_from,
test_valid_to,
test_updated_at,
test_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by snapshotting
updated_at as test_valid_from,
date('2099-12-31') as test_valid_to,
updated_at as test_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
from {database}.{schema}.seed;
97 changes: 97 additions & 0 deletions tests/functional/snapshots/test_snapshot_column_names.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os

import pytest
Expand All @@ -7,6 +8,7 @@
get_manifest,
run_dbt,
run_dbt_and_capture,
run_sql_with_adapter,
update_config_file,
)

Expand Down Expand Up @@ -232,3 +234,98 @@ def test_snapshot_invalid_column_names(self, project):
assert len(results) == 1
assert "Compilation Error in snapshot snapshot_actual" in log_output
assert "Snapshot target is missing configured columns" in log_output


snapshots_valid_to_current_yml = """
snapshots:
- name: snapshot_actual
config:
strategy: timestamp
updated_at: updated_at
dbt_valid_to_current: "date('2099-12-31')"
snapshot_meta_column_names:
dbt_valid_to: test_valid_to
dbt_valid_from: test_valid_from
dbt_scd_id: test_scd_id
dbt_updated_at: test_updated_at
"""

update_with_current_sql = """
-- insert v2 of the 11 - 21 records
insert into {database}.{schema}.snapshot_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
test_valid_from,
test_valid_to,
test_updated_at,
test_scd_id
)
select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by snapshotting
updated_at as test_valid_from,
date('2099-12-31') as test_valid_to,
updated_at as test_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
from {database}.{schema}.seed
where id >= 10 and id <= 20;
"""


class TestSnapshotDbtValidToCurrent:
@pytest.fixture(scope="class")
def snapshots(self):
return {"snapshot.sql": snapshot_actual_sql}

@pytest.fixture(scope="class")
def models(self):
return {
"snapshots.yml": snapshots_valid_to_current_yml,
"ref_snapshot.sql": ref_snapshot_sql,
}

def test_valid_to_current(self, project):
path = os.path.join(project.test_data_dir, "seed_dbt_valid_to.sql")
project.run_sql_file(path)
results = run_dbt(["snapshot"])
assert len(results) == 1

original_snapshot = run_sql_with_adapter(
project.adapter,
"select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual",
"all",
)
assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0)
assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0)

project.run_sql(invalidate_sql)
project.run_sql(update_with_current_sql)

results = run_dbt(["snapshot"])
assert len(results) == 1

updated_snapshot = run_sql_with_adapter(
project.adapter,
"select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual",
"all",
)
assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0)
# Original row that was updated now has a non-current (2099/12/31) date
assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49)
# Updated row has a current date
assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0)

check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"])
5 changes: 1 addition & 4 deletions tests/unit/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,7 @@ def test_bad_serialization():
with pytest.raises(Exception) as excinfo:
types.Note(param_event_doesnt_have="This should break")

assert (
str(excinfo.value)
== "[Note]: Unable to parse dict {'param_event_doesnt_have': 'This should break'}"
)
assert 'has no field named "param_event_doesnt_have" at "Note"' in str(excinfo.value)


def test_single_run_error():
Expand Down

0 comments on commit c7d8693

Please sign in to comment.