Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(secrets): Adding the option to prerun before multiline pattern executing #6586

Merged
merged 12 commits into from
Jul 22, 2024
43 changes: 43 additions & 0 deletions checkov/secrets/local_secrets_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# flake8: noqa
# type: ignore

import json
import os

from checkov.main import secrets_runner
from checkov.common.runners.runner_registry import RunnerRegistry
from checkov.runner_filter import RunnerFilter
from checkov.common.bridgecrew.platform_integration import bc_integration


with open(os.environ['LOCAL_SECRETS_POLICIES_JSON']) as secrets_policies_file:
default_regexes = json.load(secrets_policies_file)
bc_integration.customer_run_config_response = {'secretsPolicies': default_regexes}


def execute():
runner = secrets_runner(entropy_limit=4)
# 20 min less in order to finish processing, else put checkov's default (12h - 1200)
runner_registry = RunnerRegistry(
'',
RunnerFilter(
block_list_secret_scan=[],
enable_secret_scan_all_files=True,
enable_git_history_secret_scan=False,
git_history_last_commit_scanned=None,
git_history_timeout="checkov_timeout_str",
checks=['BC_GIT_79']
),
runner
)

scan_reports = runner_registry.run(
root_folder=os.environ["LOCAL_SCANNING_FOLDER"],
external_checks_dir=list(),
collect_skip_comments=True)

print(scan_reports)


if __name__ == "__main__":
execute()
48 changes: 46 additions & 2 deletions checkov/secrets/plugins/custom_regex_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
from typing import Set, Any, Generator, Pattern, Optional, Dict, Tuple, TYPE_CHECKING, cast
from collections import defaultdict

from detect_secrets.constants import VerifiedResult
from detect_secrets.core.potential_secret import PotentialSecret
Expand Down Expand Up @@ -29,16 +30,24 @@ def __init__(self) -> None:
self.regex_to_metadata: dict[str, dict[str, Any]] = dict()
self.denylist = set()
self.multiline_deny_list = set()
self.multiline_pattern_by_prerun_compiled: dict[str, Pattern[str]] = dict()
self.multiline_regex_to_metadata: dict[str, dict[str, Any]] = dict()
self._analyzed_files: Set[str] = set()
self._analyzed_files_by_check: Dict[str, Set[str]] = defaultdict(lambda: set())
self._multiline_regex_supported_file_types: Set[str] = set()
detectors = load_detectors()

for detector in detectors:
try:
if detector.get("isMultiline"):
self.multiline_deny_list.add(re.compile('{}'.format(detector["Regex"])))
self.multiline_regex_to_metadata[detector["Regex"]] = detector
# If prerun exists, we will add it as 'regular detector' (special treat in analyze_line)
if detector.get("prerun"):
pazbechor marked this conversation as resolved.
Show resolved Hide resolved
self.denylist.add(re.compile('{}'.format(detector["prerun"])))
self.regex_to_metadata[detector["prerun"]] = detector
self.multiline_pattern_by_prerun_compiled[detector["prerun"]] = re.compile('{}'.format(detector["Regex"]))
else:
self.multiline_deny_list.add(re.compile('{}'.format(detector["Regex"])))
self.multiline_regex_to_metadata[detector["Regex"]] = detector
continue
self.denylist.add(re.compile('{}'.format(detector["Regex"])))
self.regex_to_metadata[detector["Regex"]] = detector
Expand Down Expand Up @@ -86,11 +95,14 @@ def analyze_line(
if filename not in self._analyzed_files:
self._analyzed_files.add(filename)
# We only want to read file if: there is regex supporting it & file size is not over MAX_FILE_SIZE
# Notice: in the find potential secret we check per multiline regex if we should run it according the filetype.
# This is only a validation to reduce file content reading in case it not supported at all
if not self.multiline_regex_to_metadata.values() or \
not self.multiline_regex_supported_file_types or \
not any([filename.endswith(str(file_type)) for file_type in self.multiline_regex_supported_file_types]) or \
not 0 < get_file_size_safe(filename) < CustomRegexDetector.MAX_FILE_SIZE:
return output

file_content = read_file_safe(filename)
if not file_content:
return output
Expand Down Expand Up @@ -138,6 +150,38 @@ def _find_potential_secret(
except Exception:
is_verified = False
regex_data = current_regex_to_metadata[regex.pattern]

# It's a multiline regex (only the prerun executed). We should execute the whole multiline pattern
# We want to run multiline policy once per file (if prerun was found)
if regex_data.get("prerun") and filename not in self._analyzed_files_by_check[regex_data['Check_ID']]:
self._analyzed_files_by_check[regex_data['Check_ID']].add(filename)

# We are going to scan the whole file with the multiline regex
if not 0 < get_file_size_safe(filename) < CustomRegexDetector.MAX_FILE_SIZE:
return
file_content = read_file_safe(filename)
if not file_content:
return
multiline_regex = self.multiline_pattern_by_prerun_compiled.get(regex.pattern)
if multiline_regex is None:
return
multiline_matches = multiline_regex.findall(file_content)
for mm in multiline_matches:
mm = f"'{mm}'"
ps = PotentialSecret(
type=regex_data["Name"],
filename=filename,
secret=mm,
line_number=line_number,
is_verified=is_verified,
is_added=is_added,
is_removed=is_removed,
is_multiline=True,
)
ps.check_id = regex_data["Check_ID"]
output.add(ps)
return

# Wrap multiline match with fstring + ''
match = f"'{match}'" if is_multiline else match
ps = PotentialSecret(
Expand Down
9 changes: 6 additions & 3 deletions checkov/secrets/plugins/load_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,16 @@ def modify_secrets_policy_to_detectors(policies_list: List[dict[str, Any]]) -> L
return secrets_list


def add_to_custom_detectors(custom_detectors: List[Dict[str, Any]], name: str, check_id: str, regex: str,
def add_to_custom_detectors(custom_detectors: List[Dict[str, Any]], name: str, check_id: str, regex: str, prerun: str,
is_custom: str, is_multiline: bool = False, supported_files: Optional[List[str]] = None) -> None:
custom_detectors.append({
'Name': name,
'Check_ID': check_id,
'Regex': regex,
'isCustom': is_custom,
'isMultiline': is_multiline,
'supportedFiles': supported_files if supported_files else []
'supportedFiles': supported_files if supported_files else [],
'prerun': prerun
})


Expand All @@ -54,7 +55,7 @@ def add_detectors_from_condition_query(custom_detectors: List[Dict[str, Any]], c
value = [value]
for regex in value:
parsed = True
add_to_custom_detectors(custom_detectors, secret_policy['title'], check_id, regex,
add_to_custom_detectors(custom_detectors, secret_policy['title'], check_id, regex, "",
secret_policy['isCustom'])
return parsed

Expand All @@ -77,6 +78,8 @@ def add_detectors_from_code(custom_detectors: List[Dict[str, Any]], code: str, s
secret_policy['title'],
check_id,
regex,
# Only one prerun per multiline regex
code_dict['definition'].get('prerun', [''])[0],
pazbechor marked this conversation as resolved.
Show resolved Hide resolved
secret_policy['isCustom'],
code_dict['definition'].get("multiline", False),
code_dict['definition'].get("supported_files", [])
Expand Down
Loading