Skip to content

Commit

Permalink
Factor out all_files into its own module covered_files
Browse files Browse the repository at this point in the history
Signed-off-by: Carmen Bianca BAKKER <[email protected]>
  • Loading branch information
carmenbianca committed Jul 8, 2024
1 parent 9e61733 commit 94cfe20
Show file tree
Hide file tree
Showing 5 changed files with 440 additions and 317 deletions.
120 changes: 120 additions & 0 deletions src/reuse/covered_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""The REUSE Specification has a concept called Covered Files; files which must
contain licensing information. Some files in a project are not Covered Files,
and thus needn't contain licensing information. This module contains all that
logic.
"""

import contextlib
import logging
import os
from pathlib import Path
from typing import Generator, Optional

from . import (
_IGNORE_DIR_PATTERNS,
_IGNORE_FILE_PATTERNS,
_IGNORE_MESON_PARENT_DIR_PATTERNS,
)
from ._util import StrPath
from .vcs import VCSStrategy

_LOGGER = logging.getLogger(__name__)


def is_path_ignored(
path: Path,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> bool:
"""Is *path* ignored by some mechanism?"""
# pylint: disable=too-many-return-statements,too-many-branches
name = path.name
parent_parts = path.parent.parts
parent_dir = parent_parts[-1] if len(parent_parts) > 0 else ""

if path.is_symlink():
_LOGGER.debug("skipping symlink '%s'", path)
return True

if path.is_file():
for pattern in _IGNORE_FILE_PATTERNS:
if pattern.match(name):
return True
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
if path.stat().st_size == 0:
_LOGGER.debug("skipping 0-sized file '%s'", path)
return True

elif path.is_dir():
for pattern in _IGNORE_DIR_PATTERNS:
if pattern.match(name):
return True
if not include_meson_subprojects:
for pattern in _IGNORE_MESON_PARENT_DIR_PATTERNS:
if pattern.match(parent_dir):
_LOGGER.info(
"ignoring '%s' because it is a Meson subproject", path
)
return True
if (
not include_submodules
and vcs_strategy
and vcs_strategy.is_submodule(path)
):
_LOGGER.info("ignoring '%s' because it is a submodule", path)
return True

if vcs_strategy and vcs_strategy.is_ignored(path):
return True

return False


def all_files(
directory: StrPath,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> Generator[Path, None, None]:
"""Yield all Covered Files in *directory* and its subdirectories according
to the REUSE Specification.
"""
directory = Path(directory)

for root_str, dirs, files in os.walk(directory):
root = Path(root_str)
_LOGGER.debug("currently walking in '%s'", root)

# Don't walk ignored directories
for dir_ in list(dirs):
the_dir = root / dir_
if is_path_ignored(
the_dir,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
):
_LOGGER.debug("ignoring '%s'", the_dir)
dirs.remove(dir_)

# Filter files.
for file_ in files:
the_file = root / file_
if is_path_ignored(
the_file,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
):
_LOGGER.debug("ignoring '%s'", the_file)
continue

_LOGGER.debug("yielding '%s'", the_file)
yield the_file
104 changes: 2 additions & 102 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,7 @@

from binaryornot.check import is_binary

from . import (
_IGNORE_DIR_PATTERNS,
_IGNORE_FILE_PATTERNS,
_IGNORE_MESON_PARENT_DIR_PATTERNS,
IdentifierNotFound,
ReuseInfo,
)
from . import IdentifierNotFound, ReuseInfo
from ._licenses import EXCEPTION_MAP, LICENSE_MAP
from ._util import (
_LICENSEREF_PATTERN,
Expand All @@ -44,6 +38,7 @@
relative_from_root,
reuse_info_of_file,
)
from .covered_files import all_files
from .global_licensing import (
GlobalLicensing,
NestedReuseTOML,
Expand Down Expand Up @@ -431,98 +426,3 @@ def _detect_vcs_strategy(cls, root: StrPath) -> VCSStrategy:
).format(root)
)
return VCSStrategyNone(root)


def is_path_ignored(
path: Path,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> bool:
"""Is *path* ignored by some mechanism?"""
# pylint: disable=too-many-return-statements,too-many-branches
name = path.name
parent_parts = path.parent.parts
parent_dir = parent_parts[-1] if len(parent_parts) > 0 else ""

if path.is_symlink():
_LOGGER.debug("skipping symlink '%s'", path)
return True

if path.is_file():
for pattern in _IGNORE_FILE_PATTERNS:
if pattern.match(name):
return True
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
if path.stat().st_size == 0:
_LOGGER.debug("skipping 0-sized file '%s'", path)
return True

elif path.is_dir():
for pattern in _IGNORE_DIR_PATTERNS:
if pattern.match(name):
return True
if not include_meson_subprojects:
for pattern in _IGNORE_MESON_PARENT_DIR_PATTERNS:
if pattern.match(parent_dir):
_LOGGER.info(
"ignoring '%s' because it is a Meson subproject", path
)
return True
if (
not include_submodules
and vcs_strategy
and vcs_strategy.is_submodule(path)
):
_LOGGER.info("ignoring '%s' because it is a submodule", path)
return True

if vcs_strategy and vcs_strategy.is_ignored(path):
return True

return False


def all_files(
directory: StrPath,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> Generator[Path, None, None]:
"""Yield all Covered Files in *directory* and its subdirectories according
to the REUSE Specification.
"""
directory = Path(directory)

for root_str, dirs, files in os.walk(directory):
root = Path(root_str)
_LOGGER.debug("currently walking in '%s'", root)

# Don't walk ignored directories
for dir_ in list(dirs):
the_dir = root / dir_
if is_path_ignored(
the_dir,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
):
_LOGGER.debug("ignoring '%s'", the_dir)
dirs.remove(dir_)

# Filter files.
for file_ in files:
the_file = root / file_
if is_path_ignored(
the_file,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
):
_LOGGER.debug("ignoring '%s'", the_file)
continue

_LOGGER.debug("yielding '%s'", the_file)
yield the_file
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
sys.implementation.name != "cpython", reason="only CPython supported"
)
git = pytest.mark.skipif(not GIT_EXE, reason="requires git")
hg = pytest.mark.skipif(not HG_EXE, reason="requires mercurial")
pijul = pytest.mark.skipif(not PIJUL_EXE, reason="requires pijul")
no_root = pytest.mark.xfail(is_root, reason="fails when user is root")
posix = pytest.mark.skipif(not is_posix, reason="Windows not supported")

Expand Down
Loading

0 comments on commit 94cfe20

Please sign in to comment.