diff --git a/.changes/unreleased/Fixes-20230626-105156.yaml b/.changes/unreleased/Fixes-20230626-105156.yaml new file mode 100644 index 000000000..d1c6b9e25 --- /dev/null +++ b/.changes/unreleased/Fixes-20230626-105156.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: remove call to dataset update if dataset has not changed +time: 2023-06-26T10:51:56.698483-07:00 +custom: + Author: colin-rogers-dbt + Issue: "770" diff --git a/dbt/adapters/bigquery/dataset.py b/dbt/adapters/bigquery/dataset.py index ebffe1072..c886637d7 100644 --- a/dbt/adapters/bigquery/dataset.py +++ b/dbt/adapters/bigquery/dataset.py @@ -6,15 +6,15 @@ logger = AdapterLogger("BigQuery") -def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset: - """Idempotently adds an access entry to a dataset +def is_access_entry_in_dataset(dataset: Dataset, access_entry: AccessEntry) -> bool: + """Check if the access entry already exists in the dataset. Args: dataset (Dataset): the dataset to be updated access_entry (AccessEntry): the access entry to be added to the dataset Returns: - Dataset + bool: True if entry exists in dataset, False otherwise """ access_entries: List[AccessEntry] = dataset.access_entries # we can't simply check if an access entry is in the list as the current equality check @@ -24,8 +24,22 @@ def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> entity_type_match = existing_entry.entity_type == access_entry.entity_type property_match = existing_entry._properties.items() <= access_entry._properties.items() if role_match and entity_type_match and property_match: - logger.warning(f"Access entry {access_entry} " f"already exists in dataset") - return dataset + return True + return False + + +def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset: + """Adds an access entry to a dataset, always use access_entry_present_in_dataset to check + if the access entry already exists before calling this function. + + Args: + dataset (Dataset): the dataset to be updated + access_entry (AccessEntry): the access entry to be added to the dataset + + Returns: + Dataset: the updated dataset + """ + access_entries: List[AccessEntry] = dataset.access_entries access_entries.append(access_entry) dataset.access_entries = access_entries return dataset diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 7c4470b93..b3d397722 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -29,7 +29,7 @@ from dbt.adapters.bigquery.column import get_nested_column_data_types from dbt.adapters.bigquery.relation import BigQueryRelation -from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset +from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset from dbt.adapters.bigquery import BigQueryColumn from dbt.adapters.bigquery import BigQueryConnectionManager from dbt.adapters.bigquery.python_submissions import ( @@ -885,8 +885,12 @@ def grant_access_to(self, entity, entity_type, role, grant_target_dict): dataset_ref = self.connections.dataset_ref(grant_target.project, grant_target.dataset) dataset = client.get_dataset(dataset_ref) access_entry = AccessEntry(role, entity_type, entity) - dataset = add_access_entry_to_dataset(dataset, access_entry) - client.update_dataset(dataset, ["access_entries"]) + # only perform update if access entry not in dataset + if is_access_entry_in_dataset(dataset, access_entry): + logger.warning(f"Access entry {access_entry} " f"already exists in dataset") + else: + dataset = add_access_entry_to_dataset(dataset, access_entry) + client.update_dataset(dataset, ["access_entries"]) @available.parse_none def get_dataset_location(self, relation): diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 53109e5cf..6e2c44ef1 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1,10 +1,10 @@ -from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset +from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset from dbt.adapters.bigquery import BigQueryRelation from google.cloud.bigquery import Dataset, AccessEntry, DatasetReference -def test_add_access_entry_to_dataset_idempotently_adds_entries(): +def test_add_access_entry_to_dataset_updates_dataset(): database = "someDb" dataset = "someDataset" entity = BigQueryRelation.from_dict( @@ -19,11 +19,9 @@ def test_add_access_entry_to_dataset_idempotently_adds_entries(): access_entry = AccessEntry(None, "table", entity) dataset = add_access_entry_to_dataset(dataset, access_entry) assert access_entry in dataset.access_entries - dataset = add_access_entry_to_dataset(dataset, access_entry) - assert len(dataset.access_entries) == 1 -def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries(): +def test_add_access_entry_to_dataset_updates_with_pre_existing_entries(): database = "someOtherDb" dataset = "someOtherDataset" entity_2 = BigQueryRelation.from_dict( @@ -40,4 +38,45 @@ def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries(): dataset.access_entries = [initial_entry] access_entry = AccessEntry(None, "view", entity_2) dataset = add_access_entry_to_dataset(dataset, access_entry) - assert len(dataset.access_entries) == 1 + assert len(dataset.access_entries) == 2 + + +def test_is_access_entry_in_dataset_returns_true_if_entry_in_dataset(): + database = "someDb" + dataset = "someDataset" + entity = BigQueryRelation.from_dict( + { + "type": None, + "path": { + "database": "test-project", + "schema": "test_schema", + "identifier": "my_table", + }, + "quote_policy": {"identifier": False}, + } + ).to_dict() + dataset_ref = DatasetReference(project=database, dataset_id=dataset) + dataset = Dataset(dataset_ref) + access_entry = AccessEntry(None, "table", entity) + dataset = add_access_entry_to_dataset(dataset, access_entry) + assert is_access_entry_in_dataset(dataset, access_entry) + + +def test_is_access_entry_in_dataset_returns_false_if_entry_not_in_dataset(): + database = "someDb" + dataset = "someDataset" + entity = BigQueryRelation.from_dict( + { + "type": None, + "path": { + "database": "test-project", + "schema": "test_schema", + "identifier": "my_table", + }, + "quote_policy": {"identifier": False}, + } + ).to_dict() + dataset_ref = DatasetReference(project=database, dataset_id=dataset) + dataset = Dataset(dataset_ref) + access_entry = AccessEntry(None, "table", entity) + assert not is_access_entry_in_dataset(dataset, access_entry)