From e3584dfe04d1055e53cb63ecabec583a55f831d5 Mon Sep 17 00:00:00 2001 From: Simon Gerber Date: Tue, 3 Sep 2024 17:28:56 +0200 Subject: [PATCH] Fix `fetch_git_dependency()` to clobber files in the destination if `force_fetch=True` When force fetching Git dependencies, we need to be able to overwrite read-only files in the destination, since Git repos may contain read-only pack and index files in `.git/`. To do so, this commit introduces flag `clobber_files` for `utils.copy_tree()` which will completely clobber existing files in the destination by setting a custom `copy_function` for `shutil.copytree()` which deletes existing destination files before copying them with `shutil.copy2()`. With this change, `utils.copy_tree()` with `clobber_files=True` behaves pretty much identically to distutils' `copy_tree()` which was used in previous versions of Kapitan. --- kapitan/dependency_manager/base.py | 4 ++- kapitan/utils.py | 26 ++++++++++++++-- tests/test_utils.py | 49 +++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/kapitan/dependency_manager/base.py b/kapitan/dependency_manager/base.py index 3aae6b3a7..d19678c91 100644 --- a/kapitan/dependency_manager/base.py +++ b/kapitan/dependency_manager/base.py @@ -129,7 +129,9 @@ def fetch_git_dependency(dep_mapping, save_dir, force, item_type="Dependency"): "{} {}: subdir {} not found in repo".format(item_type, source, sub_dir) ) if force: - copied = copy_tree(copy_src_path, output_path) + # We need `clobber_files=True` here, since we otherwise can't overwrite read-only Git + # index and pack files if the destination already contains a copy of the Git repo. + copied = copy_tree(copy_src_path, output_path, clobber_files=True) else: copied = safe_copy_tree(copy_src_path, output_path) if copied: diff --git a/kapitan/utils.py b/kapitan/utils.py index 7093842fb..977ad1cb9 100644 --- a/kapitan/utils.py +++ b/kapitan/utils.py @@ -628,11 +628,28 @@ def safe_copy_tree(src, dst): return outputs -def copy_tree(src: str, dst: str) -> list: +def force_copy_file(src: str, dst: str, *args, **kwargs): + """Copy file from `src` to `dst`, forcibly replacing `dst` if it's a file, but preserving the + source file's metadata. + + This is suitable to use as `copy_function` in `shutil.copytree()` if the behavior of distutils' + `copy_tree` should be mimicked as closely as possible. + """ + if os.path.isfile(dst): + os.unlink(dst) + shutil.copy2(src, dst, *args, **kwargs) + + +def copy_tree(src: str, dst: str, clobber_files=False) -> list: """Recursively copy a given directory from `src` to `dst`. If `dst` or a parent of `dst` doesn't exist, the missing directories are created. + If `clobber_files` is set to true, existing files in the destination directory are completely + clobbered. This is necessary to allow use of this function when copying a Git repo into a + destination directory which may already contain an old copy of the repo. Files that are + overwritten this way won't be listed in the return value. + Returns a list of the copied files. """ if not os.path.isdir(src): @@ -643,6 +660,11 @@ def copy_tree(src: str, dst: str) -> list: # this will generate an empty set if `dst` doesn't exist before = set(glob.iglob(f"{dst}/*", recursive=True)) - shutil.copytree(src, dst, dirs_exist_ok=True) + if clobber_files: + # use `force_copy_file` to more closely mimic distutils' `copy_tree` behavior + copy_function = force_copy_file + else: + copy_function = shutil.copy2 + shutil.copytree(src, dst, dirs_exist_ok=True, copy_function=copy_function) after = set(glob.iglob(f"{dst}/*", recursive=True)) return list(after - before) diff --git a/tests/test_utils.py b/tests/test_utils.py index 0e607cafd..e070a440d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -10,10 +10,11 @@ import glob import os import shutil +import stat import tempfile import unittest -from kapitan.utils import SafeCopyError, copy_tree, directory_hash +from kapitan.utils import SafeCopyError, copy_tree, directory_hash, force_copy_file TEST_PWD = os.getcwd() TEST_RESOURCES_PATH = os.path.join(os.getcwd(), "tests/test_resources") @@ -57,5 +58,51 @@ def test_copy_dir_missing_dst(self): copied_hash = directory_hash(dst) self.assertEqual(copied_hash, original_hash) + def test_copy_dir_overwrite_readonly_file(self): + src = os.path.join(self.temp_dir, "source") + os.makedirs(src, exist_ok=True) + f = os.path.join(src, "ro.txt") + with open(f, "w", encoding="utf-8") as fh: + fh.write("Hello!\n") + os.chmod(f, 0o444) + + dst = os.path.join(self.temp_dir, "dest") + copied = copy_tree(src, dst) + self.assertEqual(copied, [os.path.join(dst, "ro.txt")]) + self.assertEqual(stat.S_IMODE(os.stat(copied[0]).st_mode), 0o444) + + with self.assertRaises(shutil.Error): + copy_tree(src, dst) + + copied2 = copy_tree(src, dst, clobber_files=True) + self.assertEqual(copied2, []) + self.assertEqual(stat.S_IMODE(os.stat(copied[0]).st_mode), 0o444) + + def test_force_copy_file(self): + src = os.path.join(self.temp_dir, "test.txt") + with open(src, "w", encoding="utf-8") as f: + f.write("Test\n") + + # Test that we don't delete `dst` if it's not a file + dst1 = os.path.join(self.temp_dir, "path") + os.makedirs(dst1, exist_ok=True) + force_copy_file(src, dst1) + self.assertTrue(os.path.isfile(os.path.join(dst1, "test.txt"))) + + # Test that we can create file `dst` if it doesn't exist + dst2 = os.path.join(self.temp_dir, "test2.txt") + self.assertFalse(os.path.exists(dst2)) + force_copy_file(src, dst2) + self.assertTrue(os.path.isfile(dst2)) + + # Test that we correctly overwrite a readonly file pointed to by `dst` + os.chmod(dst2, 0o444) + with open(src, "w", encoding="utf-8") as f: + f.write("Test2\n") + force_copy_file(src, dst2) + self.assertTrue(os.path.isfile(dst2)) + with open(dst2, "r", encoding="utf-8") as f: + self.assertEqual(f.read(), "Test2\n") + def tearDown(self): shutil.rmtree(self.temp_dir)