Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve extract performance via ignoring directories early during os.walk #968

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,25 @@ def _strip(line: str):
comments[:] = map(_strip, comments)


def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
subdir = os.path.basename(dirpath)
# Legacy default behavior: ignore dot and underscore directories
return not (subdir.startswith('.') or subdir.startswith('_'))
def make_default_directory_filter(
method_map: Iterable[tuple[str, str]],
root_dir: str | os.PathLike[str],
):
def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
subdir = os.path.basename(dirpath)
# Legacy default behavior: ignore dot and underscore directories
if subdir.startswith('.') or subdir.startswith('_'):
return False

dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')

for pattern, method in method_map:
if method == "ignore" and pathmatch(pattern, dir_rel):
return False

return True

return directory_filter


def extract_from_dir(
Expand Down Expand Up @@ -196,13 +211,19 @@ def extract_from_dir(
"""
if dirname is None:
dirname = os.getcwd()

if options_map is None:
options_map = {}

dirname = os.path.abspath(dirname)

if directory_filter is None:
directory_filter = default_directory_filter
directory_filter = make_default_directory_filter(
method_map=method_map,
root_dir=dirname,
)

absname = os.path.abspath(dirname)
for root, dirnames, filenames in os.walk(absname):
for root, dirnames, filenames in os.walk(dirname):
dirnames[:] = [
subdir for subdir in dirnames
if directory_filter(os.path.join(root, subdir))
Expand All @@ -220,7 +241,7 @@ def extract_from_dir(
keywords,
comment_tags,
strip_comment_tags,
dirpath=absname,
dirpath=dirname,
)


Expand Down
Loading