Skip to content

Commit

Permalink
handle missing data_type
Browse files Browse the repository at this point in the history
  • Loading branch information
MichelleArk committed Jul 10, 2023
1 parent b761089 commit d45b973
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 13 deletions.
32 changes: 20 additions & 12 deletions dbt/adapters/bigquery/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def column_to_bq_schema(self) -> SchemaField:
def get_nested_column_data_types(
columns: Dict[str, Dict[str, Any]],
constraints: Optional[Dict[str, str]] = None,
) -> Dict[str, Dict[str, str]]:
) -> Dict[str, Dict[str, Optional[str]]]:
"""
columns:
* Dictionary where keys are of flat columns names and values are dictionary of column attributes
Expand Down Expand Up @@ -161,16 +161,16 @@ def get_nested_column_data_types(
"""
constraints = constraints or {}

nested_column_data_types: Dict[str, Union[str, Dict]] = {}
nested_column_data_types: Dict[str, Optional[Union[str, Dict]]] = {}
for column in columns.values():
_update_nested_column_data_types(
column["name"],
column["data_type"],
column.get("data_type"),
constraints.get(column["name"]),
nested_column_data_types,
)

formatted_nested_column_data_types: Dict[str, Dict[str, str]] = {}
formatted_nested_column_data_types: Dict[str, Dict[str, Optional[str]]] = {}
for column_name, unformatted_column_type in nested_column_data_types.items():
formatted_nested_column_data_types[column_name] = {
"name": column_name,
Expand All @@ -193,9 +193,9 @@ def get_nested_column_data_types(

def _update_nested_column_data_types(
column_name: str,
column_data_type: str,
column_data_type: Optional[str],
column_rendered_constraint: Optional[str],
nested_column_data_types: Dict[str, Union[str, Dict]],
nested_column_data_types: Dict[str, Optional[Union[str, Dict]]],
) -> None:
"""
Recursively update nested_column_data_types given a column_name, column_data_type, and optional column_rendered_constraint.
Expand All @@ -218,9 +218,13 @@ def _update_nested_column_data_types(
if len(column_name_parts) == 1:
# Base case: column is not nested - store its data_type concatenated with constraint if provided.
column_data_type_and_constraints = (
column_data_type
if column_rendered_constraint is None
else f"{column_data_type} {column_rendered_constraint}"
(
column_data_type
if column_rendered_constraint is None
else f"{column_data_type} {column_rendered_constraint}"
)
if column_data_type
else None
)

if existing_nested_column_data_type := nested_column_data_types.get(root_column_name):
Expand Down Expand Up @@ -258,7 +262,9 @@ def _update_nested_column_data_types(
)


def _format_nested_data_type(unformatted_nested_data_type: Union[str, Dict[str, Any]]) -> str:
def _format_nested_data_type(
unformatted_nested_data_type: Optional[Union[str, Dict[str, Any]]]
) -> Optional[str]:
"""
Recursively format a (STRUCT) data type given an arbitrarily nested data type structure.
Expand All @@ -270,15 +276,17 @@ def _format_nested_data_type(unformatted_nested_data_type: Union[str, Dict[str,
>>> BigQueryAdapter._format_nested_data_type({'c': 'string not_null', 'd': {'e': 'string'}})
'struct<c string not_null, d struct<e string>>'
"""
if isinstance(unformatted_nested_data_type, str):
if unformatted_nested_data_type is None:
return None
elif isinstance(unformatted_nested_data_type, str):
return unformatted_nested_data_type
else:
parent_data_type, *parent_constraints = unformatted_nested_data_type.pop(
_PARENT_DATA_TYPE_KEY, ""
).split() or [None]

formatted_nested_types = [
f"{column_name} {_format_nested_data_type(column_type)}"
f"{column_name} {_format_nested_data_type(column_type) or ''}".strip()
for column_name, column_type in unformatted_nested_data_type.items()
]

Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def nest_column_data_types(
cls,
columns: Dict[str, Dict[str, Any]],
constraints: Optional[Dict[str, str]] = None,
) -> Dict[str, Dict[str, str]]:
) -> Dict[str, Dict[str, Optional[str]]]:
return get_nested_column_data_types(columns, constraints)

def get_columns_in_relation(self, relation: BigQueryRelation) -> List[BigQueryColumn]:
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
None,
{"a": {"name": "a", "data_type": "string"}},
),
# Flat column - missing data_type
(
{"a": {"name": "a"}},
None,
{"a": {"name": "a", "data_type": None}},
),
# Flat column - with constraints
(
{"a": {"name": "a", "data_type": "string"}},
Expand All @@ -32,12 +38,24 @@
None,
{"b": {"name": "b", "data_type": "struct<nested string>"}},
),
# Single nested column, 1 level - missing data_type
(
{"b.nested": {"name": "b.nested"}},
None,
{"b": {"name": "b", "data_type": "struct<nested>"}},
),
# Single nested column, 1 level - with constraints
(
{"b.nested": {"name": "b.nested", "data_type": "string"}},
{"b.nested": "not null"},
{"b": {"name": "b", "data_type": "struct<nested string not null>"}},
),
# Single nested column, 1 level - with constraints, missing data_type (constraints not valid without data_type)
(
{"b.nested": {"name": "b.nested"}},
{"b.nested": "not null"},
{"b": {"name": "b", "data_type": "struct<nested>"}},
),
# Single nested column, 1 level - with constraints + other keys
(
{"b.nested": {"name": "b.nested", "data_type": "string", "other": "unpreserved"}},
Expand Down Expand Up @@ -151,6 +169,28 @@
},
},
),
# Nested columns, multiple levels - missing data_type
(
{
"b.user.name.first": {
"name": "b.user.name.first",
"data_type": "string",
},
"b.user.name.last": {
"name": "b.user.name.last",
"data_type": "string",
},
"b.user.id": {"name": "b.user.id", "data_type": "int64"},
"b.user.country": {"name": "b.user.country"}, # missing data_type
},
None,
{
"b": {
"name": "b",
"data_type": "struct<user struct<name struct<first string, last string>, id int64, country>>",
},
},
),
# Nested columns, multiple levels - with constraints!
(
{
Expand Down

0 comments on commit d45b973

Please sign in to comment.