Skip to content

Commit

Permalink
Backport #9147 to 1.7.latest (#9156)
Browse files Browse the repository at this point in the history
* Fixups for deps lock file (#9147)

* Update git revision with commit SHA

* Use PackageRenderer for lock file

* add unit tests for git and tarball packages

* deepcopy unrendered_packages_data before iteration, fix remaining tests

* Add functional tests

* Add changelog entries

* Assert one more

---------

Co-authored-by: Michelle Ark <[email protected]>

* Restore warning on unpinned git packages

---------

Co-authored-by: Michelle Ark <[email protected]>
  • Loading branch information
jtcohen6 and MichelleArk authored Nov 29, 2023
1 parent f94bf2b commit 09f5bb3
Show file tree
Hide file tree
Showing 17 changed files with 507 additions and 350 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231127-154310.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: 'deps: Lock git packages to commit SHA during resolution'
time: 2023-11-27T15:43:10.122069+01:00
custom:
Author: jtcohen6
Issue: "9050"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231127-154347.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: 'deps: Use PackageRenderer to read package-lock.json'
time: 2023-11-27T15:43:47.842423+01:00
custom:
Author: jtcohen6
Issue: "9127"
2 changes: 1 addition & 1 deletion core/dbt/clients/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def checkout(cwd, repo, revision=None):
def get_current_sha(cwd):
out, err = run_cmd(cwd, ["git", "rev-parse", "HEAD"], env={"LC_ALL": "C"})

return out.decode("utf-8")
return out.decode("utf-8").strip()


def remove_remote(cwd):
Expand Down
18 changes: 14 additions & 4 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,19 @@ def package_and_project_data_from_root(project_root):
return packages_dict, packages_specified_path


def package_config_from_data(packages_data: Dict[str, Any]) -> PackageConfig:
def package_config_from_data(
packages_data: Dict[str, Any],
unrendered_packages_data: Optional[Dict[str, Any]] = None,
) -> PackageConfig:
if not packages_data:
packages_data = {"packages": []}

# this depends on the two lists being in the same order
if unrendered_packages_data:
unrendered_packages_data = deepcopy(unrendered_packages_data)
for i in range(0, len(packages_data.get("packages", []))):
packages_data["packages"][i]["unrendered"] = unrendered_packages_data["packages"][i]

if PACKAGE_LOCK_HASH_KEY in packages_data:
packages_data.pop(PACKAGE_LOCK_HASH_KEY)
try:
Expand Down Expand Up @@ -326,7 +335,7 @@ def render(self, renderer: DbtProjectYamlRenderer) -> "Project":

def render_package_metadata(self, renderer: PackageRenderer) -> ProjectPackageMetadata:
packages_data = renderer.render_data(self.packages_dict)
packages_config = package_config_from_data(packages_data)
packages_config = package_config_from_data(packages_data, self.packages_dict)
if not self.project_name:
raise DbtProjectError("Package dbt_project.yml must have a name!")
return ProjectPackageMetadata(self.project_name, packages_config.packages)
Expand Down Expand Up @@ -461,8 +470,9 @@ def create_project(self, rendered: RenderComponents) -> "Project":
on_run_end: List[str] = value_or(cfg.on_run_end, [])

query_comment = _query_comment_from_cfg(cfg.query_comment)

packages: PackageConfig = package_config_from_data(rendered.packages_dict)
packages: PackageConfig = package_config_from_data(
rendered.packages_dict, unrendered.packages_dict
)
selectors = selector_config_from_data(rendered.selectors_dict)
manifest_selectors: Dict[str, Any] = {}
if rendered.selectors_dict and rendered.selectors_dict["selectors"]:
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class Package(dbtClassMixin, Replaceable):
@dataclass
class LocalPackage(Package):
local: str
unrendered: Dict[str, Any] = field(default_factory=dict)


# `float` also allows `int`, according to PEP484 (and jsonschema!)
Expand All @@ -56,6 +57,7 @@ class LocalPackage(Package):
class TarballPackage(Package):
tarball: str
name: str
unrendered: Dict[str, Any] = field(default_factory=dict)


@dataclass
Expand All @@ -64,6 +66,7 @@ class GitPackage(Package):
revision: Optional[RawVersion] = None
warn_unpinned: Optional[bool] = field(default=None, metadata={"alias": "warn-unpinned"})
subdirectory: Optional[str] = None
unrendered: Dict[str, Any] = field(default_factory=dict)

def get_revisions(self) -> List[str]:
if self.revision is None:
Expand All @@ -77,6 +80,7 @@ class RegistryPackage(Package):
package: str
version: Union[RawVersion, List[RawVersion]]
install_prerelease: Optional[bool] = False
unrendered: Dict[str, Any] = field(default_factory=dict)

def get_versions(self) -> List[str]:
if isinstance(self.version, list):
Expand Down
28 changes: 21 additions & 7 deletions core/dbt/deps/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
GitPackage,
)
from dbt.deps.base import PinnedPackage, UnpinnedPackage, get_downloads_path
from dbt.exceptions import ExecutableError, MultipleVersionGitDepsError
from dbt.exceptions import ExecutableError, MultipleVersionGitDepsError, scrub_secrets, env_secrets
from dbt.events.functions import fire_event, warn_or_error
from dbt.events.types import EnsureGitInstalled, DepsUnpinned
from dbt.events.types import EnsureGitInstalled, DepsUnpinned, DepsScrubbedPackageName
from dbt.utils import md5


Expand All @@ -23,10 +23,12 @@ class GitPackageMixin:
def __init__(
self,
git: str,
git_unrendered: str,
subdirectory: Optional[str] = None,
) -> None:
super().__init__()
self.git = git
self.git_unrendered = git_unrendered
self.subdirectory = subdirectory

@property
Expand All @@ -41,19 +43,23 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
def __init__(
self,
git: str,
git_unrendered: str,
revision: str,
warn_unpinned: bool = True,
subdirectory: Optional[str] = None,
) -> None:
super().__init__(git, subdirectory)
super().__init__(git, git_unrendered, subdirectory)
self.revision = revision
self.warn_unpinned = warn_unpinned
self.subdirectory = subdirectory
self._checkout_name = md5sum(self.name)

def to_dict(self) -> Dict[str, str]:
git_scrubbed = scrub_secrets(self.git_unrendered, env_secrets())
if self.git_unrendered != git_scrubbed:
warn_or_error(DepsScrubbedPackageName(package_name=git_scrubbed))
ret = {
"git": self.git,
"git": git_scrubbed,
"revision": self.revision,
}
if self.subdirectory:
Expand Down Expand Up @@ -96,8 +102,13 @@ def _fetch_metadata(
) -> ProjectPackageMetadata:
path = self._checkout()

# raise warning (or error) if this package is not pinned
if (self.revision == "HEAD" or self.revision in ("main", "master")) and self.warn_unpinned:
warn_or_error(DepsUnpinned(git=self.git))
warn_or_error(DepsUnpinned(revision=self.revision, git=self.git))

# now overwrite 'revision' with actual commit SHA
self.revision = git.get_current_sha(path)

partial = PartialProject.from_project_root(path)
return partial.render_package_metadata(renderer)

Expand All @@ -116,11 +127,12 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
def __init__(
self,
git: str,
git_unrendered: str,
revisions: List[str],
warn_unpinned: bool = True,
subdirectory: Optional[str] = None,
) -> None:
super().__init__(git, subdirectory)
super().__init__(git, git_unrendered, subdirectory)
self.revisions = revisions
self.warn_unpinned = warn_unpinned
self.subdirectory = subdirectory
Expand All @@ -133,6 +145,7 @@ def from_contract(cls, contract: GitPackage) -> "GitUnpinnedPackage":
warn_unpinned = contract.warn_unpinned is not False
return cls(
git=contract.git,
git_unrendered=(contract.unrendered.get("git") or contract.git),
revisions=revisions,
warn_unpinned=warn_unpinned,
subdirectory=contract.subdirectory,
Expand All @@ -157,6 +170,7 @@ def incorporate(self, other: "GitUnpinnedPackage") -> "GitUnpinnedPackage":

return GitUnpinnedPackage(
git=self.git,
git_unrendered=self.git_unrendered,
revisions=self.revisions + other.revisions,
warn_unpinned=warn_unpinned,
subdirectory=self.subdirectory,
Expand All @@ -168,9 +182,9 @@ def resolved(self) -> GitPinnedPackage:
requested = {"HEAD"}
elif len(requested) > 1:
raise MultipleVersionGitDepsError(self.name, requested)

return GitPinnedPackage(
git=self.git,
git_unrendered=self.git_unrendered,
revision=requested.pop(),
warn_unpinned=self.warn_unpinned,
subdirectory=self.subdirectory,
Expand Down
32 changes: 24 additions & 8 deletions core/dbt/deps/tarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

from dbt.contracts.project import RegistryPackageMetadata, TarballPackage
from dbt.deps.base import PinnedPackage, UnpinnedPackage
from dbt.exceptions import scrub_secrets, env_secrets
from dbt.events.functions import warn_or_error
from dbt.events.types import DepsScrubbedPackageName


class TarballPackageMixin:
def __init__(self, tarball: str) -> None:
def __init__(self, tarball: str, tarball_unrendered: str) -> None:
super().__init__()
self.tarball = tarball
self.tarball_unrendered = tarball_unrendered

@property
def name(self):
Expand All @@ -18,8 +22,8 @@ def source_type(self) -> str:


class TarballPinnedPackage(TarballPackageMixin, PinnedPackage):
def __init__(self, tarball: str, package: str) -> None:
super().__init__(tarball)
def __init__(self, tarball: str, tarball_unrendered: str, package: str) -> None:
super().__init__(tarball, tarball_unrendered)
# setup to recycle RegistryPinnedPackage fns
self.package = package
self.version = "tarball"
Expand All @@ -29,8 +33,11 @@ def name(self):
return self.package

def to_dict(self) -> Dict[str, str]:
tarball_scrubbed = scrub_secrets(self.tarball_unrendered, env_secrets())
if self.tarball_unrendered != tarball_scrubbed:
warn_or_error(DepsScrubbedPackageName(package_name=tarball_scrubbed))
return {
"tarball": self.tarball,
"tarball": tarball_scrubbed,
"name": self.package,
}

Expand Down Expand Up @@ -64,19 +71,28 @@ class TarballUnpinnedPackage(TarballPackageMixin, UnpinnedPackage[TarballPinnedP
def __init__(
self,
tarball: str,
tarball_unrendered: str,
package: str,
) -> None:
super().__init__(tarball)
super().__init__(tarball, tarball_unrendered)
# setup to recycle RegistryPinnedPackage fns
self.package = package
self.version = "tarball"

@classmethod
def from_contract(cls, contract: TarballPackage) -> "TarballUnpinnedPackage":
return cls(tarball=contract.tarball, package=contract.name)
return cls(
tarball=contract.tarball,
tarball_unrendered=(contract.unrendered.get("tarball") or contract.tarball),
package=contract.name,
)

def incorporate(self, other: "TarballUnpinnedPackage") -> "TarballUnpinnedPackage":
return TarballUnpinnedPackage(tarball=self.tarball, package=self.package)
return TarballUnpinnedPackage(
tarball=self.tarball, tarball_unrendered=self.tarball_unrendered, package=self.package
)

def resolved(self) -> TarballPinnedPackage:
return TarballPinnedPackage(tarball=self.tarball, package=self.package)
return TarballPinnedPackage(
tarball=self.tarball, tarball_unrendered=self.tarball_unrendered, package=self.package
)
10 changes: 10 additions & 0 deletions core/dbt/events/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1641,6 +1641,16 @@ message DepsVersionMissingMsg{
DepsVersionMissing data = 2;
}

//M035
message DepsScrubbedPackageName{
string package_name = 1;
}

message DepsScrubbedPackageNameMsg{
EventInfo info = 1;
DepsScrubbedPackageName data = 2;
}

// Q - Node execution

// Q001
Expand Down
13 changes: 12 additions & 1 deletion core/dbt/events/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ def message(self) -> str:
elif self.revision in ("main", "master"):
unpinned_msg = f'pinned to the "{self.revision}" branch'
else:
unpinned_msg = None
unpinned_msg = "not pinned to any branch, tag, or commit"

msg = (
f'The git package "{self.git}" \n\tis {unpinned_msg}.\n\tThis can introduce '
Expand Down Expand Up @@ -1538,6 +1538,17 @@ def message(self) -> str:
return f"Found duplicate package in packages.yml, removing: {self.removed_package}"


# M034 is missing


class DepsScrubbedPackageName(WarnLevel):
def code(self):
return "M035"

def message(self) -> str:
return f"Detected secret env var in {self.package_name}. dbt will write a scrubbed representation to the lock file. This will cause issues with subsequent 'dbt deps' using the lock file, requiring 'dbt deps --upgrade'"


# =======================================================
# Q - Node execution
# =======================================================
Expand Down
638 changes: 321 additions & 317 deletions core/dbt/events/types_pb2.py

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions core/dbt/task/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dbt.exceptions
import json

from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.config.renderer import PackageRenderer
from dbt.config.project import package_config_from_data, load_yml_dict
from dbt.constants import PACKAGE_LOCK_FILE_NAME, PACKAGE_LOCK_HASH_KEY
from dbt.deps.base import downloads_directory
Expand Down Expand Up @@ -231,15 +231,18 @@ def run(self) -> None:

packages_lock_dict = load_yml_dict(f"{self.project.project_root}/{PACKAGE_LOCK_FILE_NAME}")

packages_lock_config = package_config_from_data(packages_lock_dict).packages
renderer = PackageRenderer(self.cli_vars)
packages_lock_config = package_config_from_data(
renderer.render_data(packages_lock_dict), packages_lock_dict
).packages

if not packages_lock_config:
fire_event(DepsNoPackagesFound())
return

with downloads_directory():
lock_defined_deps = resolve_lock_packages(packages_lock_config)
renderer = DbtProjectYamlRenderer(None, self.cli_vars)
renderer = PackageRenderer(self.cli_vars)

packages_to_upgrade = []

Expand Down
Loading

0 comments on commit 09f5bb3

Please sign in to comment.