Skip to content

Commit

Permalink
Avoid unnecessary grant call (#791) (#793)
Browse files Browse the repository at this point in the history
* remove call to dataset update if dataset has not changed

* add changie

* fix unit test naming

* seperate entry check from update

---------

Co-authored-by: Mike Alfare <[email protected]>
(cherry picked from commit 85efa2a)

Co-authored-by: colin-rogers-dbt <[email protected]>
  • Loading branch information
github-actions[bot] and colin-rogers-dbt authored Jun 27, 2023
1 parent 5dd9e7e commit 3251587
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 14 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230626-105156.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: remove call to dataset update if dataset has not changed
time: 2023-06-26T10:51:56.698483-07:00
custom:
Author: colin-rogers-dbt
Issue: "770"
24 changes: 19 additions & 5 deletions dbt/adapters/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
logger = AdapterLogger("BigQuery")


def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset:
"""Idempotently adds an access entry to a dataset
def is_access_entry_in_dataset(dataset: Dataset, access_entry: AccessEntry) -> bool:
"""Check if the access entry already exists in the dataset.
Args:
dataset (Dataset): the dataset to be updated
access_entry (AccessEntry): the access entry to be added to the dataset
Returns:
Dataset
bool: True if entry exists in dataset, False otherwise
"""
access_entries: List[AccessEntry] = dataset.access_entries
# we can't simply check if an access entry is in the list as the current equality check
Expand All @@ -24,8 +24,22 @@ def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) ->
entity_type_match = existing_entry.entity_type == access_entry.entity_type
property_match = existing_entry._properties.items() <= access_entry._properties.items()
if role_match and entity_type_match and property_match:
logger.warning(f"Access entry {access_entry} " f"already exists in dataset")
return dataset
return True
return False


def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset:
"""Adds an access entry to a dataset, always use access_entry_present_in_dataset to check
if the access entry already exists before calling this function.
Args:
dataset (Dataset): the dataset to be updated
access_entry (AccessEntry): the access entry to be added to the dataset
Returns:
Dataset: the updated dataset
"""
access_entries: List[AccessEntry] = dataset.access_entries
access_entries.append(access_entry)
dataset.access_entries = access_entries
return dataset
10 changes: 7 additions & 3 deletions dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

from dbt.adapters.bigquery.column import get_nested_column_data_types
from dbt.adapters.bigquery.relation import BigQueryRelation
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset
from dbt.adapters.bigquery import BigQueryColumn
from dbt.adapters.bigquery import BigQueryConnectionManager
from dbt.adapters.bigquery.python_submissions import (
Expand Down Expand Up @@ -885,8 +885,12 @@ def grant_access_to(self, entity, entity_type, role, grant_target_dict):
dataset_ref = self.connections.dataset_ref(grant_target.project, grant_target.dataset)
dataset = client.get_dataset(dataset_ref)
access_entry = AccessEntry(role, entity_type, entity)
dataset = add_access_entry_to_dataset(dataset, access_entry)
client.update_dataset(dataset, ["access_entries"])
# only perform update if access entry not in dataset
if is_access_entry_in_dataset(dataset, access_entry):
logger.warning(f"Access entry {access_entry} " f"already exists in dataset")
else:
dataset = add_access_entry_to_dataset(dataset, access_entry)
client.update_dataset(dataset, ["access_entries"])

@available.parse_none
def get_dataset_location(self, relation):
Expand Down
51 changes: 45 additions & 6 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset
from dbt.adapters.bigquery import BigQueryRelation

from google.cloud.bigquery import Dataset, AccessEntry, DatasetReference


def test_add_access_entry_to_dataset_idempotently_adds_entries():
def test_add_access_entry_to_dataset_updates_dataset():
database = "someDb"
dataset = "someDataset"
entity = BigQueryRelation.from_dict(
Expand All @@ -19,11 +19,9 @@ def test_add_access_entry_to_dataset_idempotently_adds_entries():
access_entry = AccessEntry(None, "table", entity)
dataset = add_access_entry_to_dataset(dataset, access_entry)
assert access_entry in dataset.access_entries
dataset = add_access_entry_to_dataset(dataset, access_entry)
assert len(dataset.access_entries) == 1


def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries():
def test_add_access_entry_to_dataset_updates_with_pre_existing_entries():
database = "someOtherDb"
dataset = "someOtherDataset"
entity_2 = BigQueryRelation.from_dict(
Expand All @@ -40,4 +38,45 @@ def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries():
dataset.access_entries = [initial_entry]
access_entry = AccessEntry(None, "view", entity_2)
dataset = add_access_entry_to_dataset(dataset, access_entry)
assert len(dataset.access_entries) == 1
assert len(dataset.access_entries) == 2


def test_is_access_entry_in_dataset_returns_true_if_entry_in_dataset():
database = "someDb"
dataset = "someDataset"
entity = BigQueryRelation.from_dict(
{
"type": None,
"path": {
"database": "test-project",
"schema": "test_schema",
"identifier": "my_table",
},
"quote_policy": {"identifier": False},
}
).to_dict()
dataset_ref = DatasetReference(project=database, dataset_id=dataset)
dataset = Dataset(dataset_ref)
access_entry = AccessEntry(None, "table", entity)
dataset = add_access_entry_to_dataset(dataset, access_entry)
assert is_access_entry_in_dataset(dataset, access_entry)


def test_is_access_entry_in_dataset_returns_false_if_entry_not_in_dataset():
database = "someDb"
dataset = "someDataset"
entity = BigQueryRelation.from_dict(
{
"type": None,
"path": {
"database": "test-project",
"schema": "test_schema",
"identifier": "my_table",
},
"quote_policy": {"identifier": False},
}
).to_dict()
dataset_ref = DatasetReference(project=database, dataset_id=dataset)
dataset = Dataset(dataset_ref)
access_entry = AccessEntry(None, "table", entity)
assert not is_access_entry_in_dataset(dataset, access_entry)

0 comments on commit 3251587

Please sign in to comment.