From 884bb2a52ab800cc5aa2c621d73053eac8146d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Gr=C3=BCbel?= Date: Thu, 20 Jul 2023 09:30:45 +0200 Subject: [PATCH] fix(terraform): leverage read_file_with_any_encoding to safely look for modules (#5360) leverage read_file_with_any_encoding to safely look for modules --- .../terraform/module_loading/module_finder.py | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/checkov/terraform/module_loading/module_finder.py b/checkov/terraform/module_loading/module_finder.py index 0d35ab186d4..2e2e0125269 100644 --- a/checkov/terraform/module_loading/module_finder.py +++ b/checkov/terraform/module_loading/module_finder.py @@ -6,6 +6,7 @@ from typing import List, Callable from checkov.common.parallelizer.parallel_runner import parallel_runner +from checkov.common.util.file_utils import read_file_with_any_encoding from checkov.terraform.module_loading.registry import module_loader_registry MODULE_SOURCE_PATTERN = re.compile(r'[^#]*\bsource\s*=\s*"(?P.*)"') @@ -33,41 +34,41 @@ def find_modules(path: str) -> List[ModuleDownload]: for file_name in full_file_names: if not file_name.endswith('.tf'): continue - with open(os.path.join(path, root, file_name)) as f: - content = f.read() + + try: + content = read_file_with_any_encoding(file_path=os.path.join(path, root, file_name)) if "module " not in content: # if there is no "module " ref in the whole file, then no need to search line by line continue - try: - curr_md = None - for line in content.splitlines(): - if not curr_md: - if line.startswith('module'): - curr_md = ModuleDownload(os.path.dirname(os.path.join(root, file_name))) - continue - else: - if line.startswith('}'): - if curr_md.module_link is None: - logging.warning(f'A module at {curr_md.source_dir} had no source, skipping') - else: - modules_found.append(curr_md) - curr_md = None + curr_md = None + for line in content.splitlines(): + if not curr_md: + if line.startswith('module'): + curr_md = ModuleDownload(os.path.dirname(os.path.join(root, file_name))) + continue + else: + if line.startswith('}'): + if curr_md.module_link is None: + logging.warning(f'A module at {curr_md.source_dir} had no source, skipping') + else: + modules_found.append(curr_md) + curr_md = None + continue + + if "source" in line: + match = re.match(MODULE_SOURCE_PATTERN, line) + if match: + curr_md.module_link = match.group('link') continue - if "source" in line: - match = re.match(MODULE_SOURCE_PATTERN, line) - if match: - curr_md.module_link = match.group('link') - continue - - if "version" in line: - match = re.match(MODULE_VERSION_PATTERN, line) - if match: - curr_md.version = f"{match.group('operator')}{match.group('version')}" if match.group('operator') else match.group('version') - except (UnicodeDecodeError, FileNotFoundError) as e: - logging.warning(f"Skipping {os.path.join(path, root, file_name)} because of {e}") - continue + if "version" in line: + match = re.match(MODULE_VERSION_PATTERN, line) + if match: + curr_md.version = f"{match.group('operator')}{match.group('version')}" if match.group('operator') else match.group('version') + except (UnicodeDecodeError, FileNotFoundError) as e: + logging.warning(f"Skipping {os.path.join(path, root, file_name)} because of {e}") + continue return modules_found