Skip to content

Commit

Permalink
Feat/117 new algo module semantic (#120)
Browse files Browse the repository at this point in the history
* feat: add new algo module for looking at semantic models

* chore: add sample artifacts of SL

* feat: working semantic relationship detection

* feat: add dataclasses

* feat: implement find_related_nodes_by_id

* test: add unittest

* chore: add py3.12

* chore: add check for p3.12

* docs: update mkdocs

* chore: update docs [skip ci]
  • Loading branch information
datnguye authored Jul 28, 2024
1 parent 1855328 commit 627833d
Show file tree
Hide file tree
Showing 22 changed files with 27,084 additions and 278 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

Generate the ERD-as-a-code ([DBML](https://dbdiagram.io/d), [Mermaid](https://mermaid-js.github.io/mermaid-live-editor/), [PlantUML](https://plantuml.com/ie-diagram), [GraphViz](https://graphviz.org/), [D2](https://d2lang.com/)) from dbt artifact files (`dbt Core`) or from dbt metadata (`dbt Cloud`)

Entity Relationships are configurably detected by ([docs](https://dbterd.datnguyen.de/latest/nav/guide/cli-references.html#dbterd-run-algo-a)):

- [Test Relationships](https://docs.getdbt.com/reference/resource-properties/data-tests#relationships) (default)
- [Semantic Entities](https://docs.getdbt.com/docs/build/entities) (use `-a` option)

[![PyPI version](https://badge.fury.io/py/dbterd.svg)](https://pypi.org/project/dbterd/)
![python-cli](https://img.shields.io/badge/CLI-Python-FFCE3E?labelColor=14354C&logo=python&logoColor=white)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![python](https://img.shields.io/badge/Python-3.9|3.10|3.11-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
[![python](https://img.shields.io/badge/Python-3.9|3.10|3.11|3.12-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
[![codecov](https://codecov.io/gh/datnguye/dbterd/branch/main/graph/badge.svg?token=N7DMQBLH4P)](https://codecov.io/gh/datnguye/dbterd)

```bash
Expand Down
22 changes: 22 additions & 0 deletions dbterd/adapters/algos/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click

from dbterd.adapters.filter import is_selected_table
from dbterd.adapters.meta import Column, Ref, Table
from dbterd.constants import (
DEFAULT_ALGO_RULE,
Expand Down Expand Up @@ -96,6 +97,27 @@ def get_tables(manifest: Manifest, catalog: Catalog, **kwargs) -> List[Table]:
return tables


def filter_tables_based_on_selection(tables: List[Table], **kwargs) -> List[Table]:
"""Filter list of tables based on the Selection Rules
Args:
tables (List[Table]): Parsed tables
Returns:
List[Table]: Filtered tables
"""
return [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]


def enrich_tables_from_relationships(
tables: List[Table], relationships: List[Ref]
) -> List[Table]:
Expand Down
189 changes: 189 additions & 0 deletions dbterd/adapters/algos/semantic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from typing import List, Tuple, Union

from dbterd.adapters.algos import base
from dbterd.adapters.meta import Ref, SemanticEntity, Table
from dbterd.constants import TEST_META_RELATIONSHIP_TYPE
from dbterd.helpers.log import logger
from dbterd.types import Catalog, Manifest


def parse_metadata(data, **kwargs) -> Tuple[List[Table], List[Ref]]:
raise NotImplementedError() # pragma: no cover


def parse(
manifest: Manifest, catalog: Union[str, Catalog], **kwargs
) -> Tuple[List[Table], List[Ref]]:
# Parse metadata
if catalog == "metadata": # pragma: no cover
return parse_metadata(data=manifest, **kwargs)

# Parse Table
tables = base.get_tables(manifest=manifest, catalog=catalog, **kwargs)
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = _get_relationships(manifest=manifest, **kwargs)
relationships = base.make_up_relationships(
relationships=relationships, tables=tables
)

# Fulfill columns in Tables (due to `select *`)
tables = base.enrich_tables_from_relationships(
tables=tables, relationships=relationships
)

logger.info(
f"Collected {len(tables)} table(s) and {len(relationships)} relationship(s)"
)
return (
sorted(tables, key=lambda tbl: tbl.node_name),
sorted(relationships, key=lambda rel: rel.name),
)


def find_related_nodes_by_id(
manifest: Union[Manifest, dict], node_unique_id: str, type: str = None, **kwargs
) -> List[str]:
"""Find FK/PK nodes which are linked to the given node
Args:
manifest (Union[Manifest, dict]): Manifest data
node_unique_id (str): Manifest model node unique id
type (str, optional): Manifest type (local file or metadata). Defaults to None.
Returns:
List[str]: Manifest nodes' unique ID
"""
found_nodes = [node_unique_id]
if type == "metadata": # pragma: no cover
return found_nodes # not supported yet, returned input only

entities = _get_linked_semantic_entities(manifest=manifest)
for foreign, primary in entities:
if primary.model == node_unique_id:
found_nodes.append(foreign.model)
if foreign.model == node_unique_id:
found_nodes.append(primary.model)

return list(set(found_nodes))


def _get_relationships(manifest: Manifest, **kwargs) -> List[Ref]:
"""_summary_
Args:
manifest (Manifest): Extract relationships from dbt artifacts based on Semantic Entities
Returns:
List[Ref]: List of parsed relationship
"""
entities = _get_linked_semantic_entities(manifest=manifest)
return base.get_unique_refs(
refs=[
Ref(
name=primary_entity.semantic_model,
table_map=(primary_entity.model, foreign_entity.model),
column_map=(
primary_entity.column_name,
foreign_entity.column_name,
),
type=primary_entity.relationship_type,
)
for foreign_entity, primary_entity in entities
]
)


def _get_linked_semantic_entities(
manifest: Manifest,
) -> List[Tuple[SemanticEntity, SemanticEntity]]:
"""Get filtered list of Semantic Entities which are linked
Args:
manifest (Manifest): Manifest data
Returns:
List[Tuple[SemanticEntity, SemanticEntity]]: List of (FK, PK) objects
"""
foreigns, primaries = _get_semantic_entities(manifest=manifest)
linked_entities = []
for foreign_entity in foreigns:
for primary_entity in primaries:
if foreign_entity.entity_name == primary_entity.entity_name:
linked_entities.append((foreign_entity, primary_entity))
return linked_entities


def _get_semantic_entities(
manifest: Manifest,
) -> Tuple[List[SemanticEntity], List[SemanticEntity]]:
"""Get all Semantic Entities
Args:
manifest (Manifest): Manifest data
Returns:
Tuple[List[SemanticEntity], List[SemanticEntity]]: FK list and PK list
"""
FK = "foreign"
PK = "primary"

semantic_entities = []
for x in _get_semantic_nodes(manifest=manifest):
semantic_node = manifest.semantic_models[x]
for e in semantic_node.entities:
if e.type.value in [PK, FK]:
semantic_entities.append(
SemanticEntity(
semantic_model=x,
model=semantic_node.depends_on.nodes[0],
entity_name=e.name,
entity_type=e.type.value,
column_name=e.expr or e.name,
relationship_type=semantic_node.config.meta.get(
TEST_META_RELATIONSHIP_TYPE, ""
),
)
)
if semantic_node.primary_entity:
semantic_entities.append(
SemanticEntity(
semantic_model=x,
model=semantic_node.depends_on.nodes[0],
entity_name=semantic_node.primary_entity,
entity_type=PK,
column_name=semantic_node.primary_entity,
relationship_type=semantic_node.config.meta.get(
TEST_META_RELATIONSHIP_TYPE, ""
),
)
)

return (
[x for x in semantic_entities if x.entity_type == FK],
[x for x in semantic_entities if x.entity_type == PK],
)


def _get_semantic_nodes(manifest: Manifest) -> List:
"""Extract the Semantic Models
Args:
manifest (Manifest): Manifest data
Returns:
List: List of Semantic Models
"""
if not hasattr(manifest, "semantic_models"):
logger.warning(
"No relationships will be captured"
"since dbt version is NOT supported for the Semantic Models"
)
return []

return [
x
for x in manifest.semantic_models
if len(manifest.semantic_models[x].depends_on.nodes)
]
30 changes: 2 additions & 28 deletions dbterd/adapters/algos/test_relationship.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import List, Tuple, Union

from dbterd.adapters.algos import base
from dbterd.adapters.filter import is_selected_table
from dbterd.adapters.meta import Ref, Table
from dbterd.helpers.log import logger
from dbterd.types import Catalog, Manifest
Expand All @@ -22,18 +21,7 @@ def parse_metadata(data, **kwargs) -> Tuple[List[Table], List[Ref]]:

# Parse Table
tables = base.get_tables_from_metadata(data=data, **kwargs)

# Apply selection
tables = [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = base.get_relationships_from_metadata(data=data, **kwargs)
Expand Down Expand Up @@ -68,18 +56,7 @@ def parse(

# Parse Table
tables = base.get_tables(manifest=manifest, catalog=catalog, **kwargs)

# Apply selection
tables = [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = base.get_relationships(manifest=manifest, **kwargs)
Expand Down Expand Up @@ -113,9 +90,6 @@ def find_related_nodes_by_id(
node_unique_id (str): Manifest node unique ID
type (str, optional): Manifest type (local file or metadata). Defaults to None.
Raises:
click.BadParameter: Not Supported manifest type
Returns:
List[str]: Manifest nodes' unique ID
"""
Expand Down
12 changes: 12 additions & 0 deletions dbterd/adapters/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ class Ref:
type: str = "n1"


@dataclass
class SemanticEntity:
"""Parsed Semantic Model's Entity object"""

semantic_model: str
model: str
entity_name: str
entity_type: str
column_name: str
relationship_type: str


class SelectionType(Enum):
START_WITH_NAME = ""
EXACT_NAME = "exact"
Expand Down
2 changes: 1 addition & 1 deletion dbterd/adapters/targets/mermaid.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def parse(manifest: Manifest, catalog: Catalog, **kwargs) -> str:
key_to = f'"{rel.table_map[0]}"'
reference_text = replace_column_name(rel.column_map[0])
if rel.column_map[0] != rel.column_map[1]:
reference_text += f"--{ replace_column_name(rel.column_map[1])}"
reference_text += f"--{replace_column_name(rel.column_map[1])}"
mermaid += f" {key_from.upper()} {get_rel_symbol(rel.type)} {key_to.upper()}: {reference_text}\n"

return mermaid
Expand Down
3 changes: 2 additions & 1 deletion dbterd/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click

from dbterd import default
from dbterd.adapters.base import Executor
from dbterd.cli import params
from dbterd.helpers import jsonify
Expand Down Expand Up @@ -51,7 +52,7 @@ def invoke(self, args: List[str]):
@click.pass_context
def dbterd(ctx, **kwargs):
"""Tools for producing diagram-as-code"""
logger.info(f"Run with dbterd=={__version__}")
logger.info(f"Run with dbterd=={__version__} [{default.default_algo()}]")


# dbterd run
Expand Down
13 changes: 7 additions & 6 deletions dbterd/default.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import os
from pathlib import Path
from typing import List


def default_artifact_path() -> str:
return str(Path.cwd() / "target")
return os.environ.get("DBTERD_ARTIFACT_PATH", str(Path.cwd() / "target"))


def default_output_path() -> str:
return str(Path.cwd() / "target")
return os.environ.get("DBTERD_OUTPUT_PATH", str(Path.cwd() / "target"))


def default_target() -> str:
return "dbml"
return os.environ.get("DBTERD_TARGET", "dbml")


def default_algo() -> str:
return "test_relationship"
return os.environ.get("DBTERD_ALGO", "test_relationship")


def default_resource_types() -> List[str]:
return ["model"]
return os.environ.get("DBTERD_RESOURCE_TYPES", ["model"])


def default_entity_name_format() -> str:
return "resource.package.model"
return os.environ.get("DBTERD_ENTITY_NAME_FORMAT", "resource.package.model")
Loading

0 comments on commit 627833d

Please sign in to comment.