From 767546abdfc68f1e78921ac622cd267cfb3f7bca Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 17 Jul 2023 09:13:05 +0300 Subject: [PATCH] Usage_list refactored, password for doc (#379) * [auxiliary] 2023-07-13T10:46:39+03:00 * SourceType applied to unify usage 'doc' and 'src' * tmp * fix * import fix * codestyle * [skip actions] [auxiliary] 2023-07-13T15:20:57+03:00 * min_line_len=10 * fix flake8 --- credsweeper/app.py | 9 +- credsweeper/filters/group/group.py | 7 +- .../filters/value_not_part_encoded_check.py | 3 +- credsweeper/rules/config.yaml | 215 +----------- credsweeper/rules/rule.py | 14 +- credsweeper/scanner/scanner.py | 19 +- tests/__init__.py | 2 +- tests/conftest.py | 2 +- tests/data/doc.json | 308 ++++++++++++++++++ .../filters/test_credit_card_number_check.py | 1 - tests/filters/test_line_specific_key_check.py | 1 - tests/filters/test_separator_unusual_check.py | 1 - tests/filters/test_value_allowlist_check.py | 1 - .../test_value_array_dictionary_check.py | 5 +- tests/filters/test_value_blocklist_check.py | 1 - tests/filters/test_value_camel_case_check.py | 1 - .../test_value_dictionary_keyword_check.py | 1 - ...est_value_dictionary_value_length_check.py | 1 - tests/filters/test_value_file_path_check.py | 1 - tests/filters/test_value_similarity_check.py | 4 +- tests/filters/test_value_string_type_check.py | 1 - tests/rules/test_rule.py | 4 +- tests/samples/passwd.groovy | 2 + tests/scanner/scan_type/test_multipattern.py | 4 +- tests/test_app.py | 13 +- tests/test_main.py | 3 +- 26 files changed, 369 insertions(+), 255 deletions(-) diff --git a/credsweeper/app.py b/credsweeper/app.py index 119e4f3fd..9f2418801 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -39,7 +39,7 @@ class CredSweeper: """ def __init__(self, - rule_path: Optional[str] = None, + rule_path: Union[None, str, Path] = None, config_path: Optional[str] = None, api_validation: bool = False, json_filename: Union[None, str, Path] = None, @@ -97,9 +97,7 @@ def __init__(self, exclude_values=exclude_values) self.config = Config(config_dict) self.scanner = Scanner(self.config, rule_path) - self.doc_scanner = Scanner(self.config, rule_path, ["doc"]) self.deep_scanner = DeepScanner(self.config, self.scanner) - self.deep_doc_scanner = DeepScanner(self.config, self.doc_scanner) self.credential_manager = CredentialManager() self.json_filename: Union[None, str, Path] = json_filename self.xlsx_filename: Union[None, str, Path] = xlsx_filename @@ -305,12 +303,9 @@ def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProv candidates.append(dummy_candidate) else: - if self.config.depth: + if self.config.depth or self.config.doc: # deep scan with possible data representation candidates = self.deep_scanner.scan(content_provider, self.config.depth, self.config.size_limit) - elif self.config.doc: - # document-specific scanning - candidates = self.deep_doc_scanner.scan(content_provider, 0, self.config.size_limit) else: if content_provider.file_type not in self.config.exclude_containers: # Regular file scanning diff --git a/credsweeper/filters/group/group.py b/credsweeper/filters/group/group.py index e6a029f76..b81a7a00e 100644 --- a/credsweeper/filters/group/group.py +++ b/credsweeper/filters/group/group.py @@ -35,7 +35,7 @@ def filters(self, filters: List[Filter]) -> None: @staticmethod def get_keyword_base_filters(config: Config) -> List[Filter]: """returns base filters""" - return [ # + filters = [ # SeparatorUnusualCheck(), ValueAllowlistCheck(), ValueArrayDictionaryCheck(), @@ -46,13 +46,14 @@ def get_keyword_base_filters(config: Config) -> List[Filter]: ValueLastWordCheck(), ValueLengthCheck(config), ValueMethodCheck(), - ValueNotAllowedPatternCheck(), ValueSimilarityCheck(), ValueStringTypeCheck(config), ValueTokenCheck(), VariableNotAllowedPatternCheck(), - ValuePatternCheck(config) ] + if not config.doc: + filters.extend([ValuePatternCheck(config), ValueNotAllowedPatternCheck()]) + return filters @staticmethod def get_pattern_base_filters(config: Config) -> List[Filter]: diff --git a/credsweeper/filters/value_not_part_encoded_check.py b/credsweeper/filters/value_not_part_encoded_check.py index f3a7862af..b186f1f5e 100644 --- a/credsweeper/filters/value_not_part_encoded_check.py +++ b/credsweeper/filters/value_not_part_encoded_check.py @@ -1,6 +1,5 @@ -from typing import Optional - import re +from typing import Optional from credsweeper.config import Config from credsweeper.credentials import LineData diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 231bbfdae..8ea3008fb 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -6,8 +6,7 @@ filter_type: - CreditCardNumberCheck min_line_len: 16 - usage_list: - - src + doc_available: false - name: API severity: medium @@ -19,8 +18,7 @@ min_line_len: 11 required_substrings: - api - usage_list: - - src + doc_available: false - name: AWS Client ID severity: high @@ -32,9 +30,6 @@ required_substrings: - A min_line_len: 20 - usage_list: - - src - - doc - name: AWS Multi severity: high @@ -48,9 +43,6 @@ - AKIA - ASIA min_line_len: 20 - usage_list: - - src - - doc - name: AWS MWS Key severity: high @@ -62,9 +54,6 @@ required_substrings: - amzn min_line_len: 30 - usage_list: - - src - - doc - name: Credential severity: medium @@ -76,8 +65,7 @@ min_line_len: 18 required_substrings: - credential - usage_list: - - src + doc_available: false - name: Dynatrace API Token severity: high @@ -89,9 +77,6 @@ required_substrings: - dt0 min_line_len: 90 - usage_list: - - src - - doc - name: Facebook Access Token severity: high @@ -103,9 +88,6 @@ required_substrings: - EAAC min_line_len: 31 - usage_list: - - src - - doc - name: Github Old Token severity: high @@ -119,9 +101,6 @@ required_substrings: - git min_line_len: 47 - usage_list: - - src - - doc - name: Google API Key severity: high @@ -135,9 +114,6 @@ required_substrings: - AIza min_line_len: 39 - usage_list: - - src - - doc - name: Google Multi severity: high @@ -152,9 +128,6 @@ required_substrings: - .apps.googleusercontent.com min_line_len: 40 - usage_list: - - src - - doc - name: Google OAuth Access Token severity: high @@ -166,9 +139,6 @@ required_substrings: - ya29. min_line_len: 27 - usage_list: - - src - - doc - name: Heroku API Key severity: high @@ -180,9 +150,6 @@ required_substrings: - heroku min_line_len: 24 - usage_list: - - src - - doc - name: Instagram Access Token severity: high @@ -194,9 +161,6 @@ required_substrings: - IGQVJ min_line_len: 105 - usage_list: - - src - - doc - name: JSON Web Token severity: medium @@ -208,9 +172,6 @@ required_substrings: - eyJ min_line_len: 16 - usage_list: - - src - - doc - name: MailChimp API Key severity: high @@ -224,9 +185,6 @@ required_substrings: - -us min_line_len: 35 - usage_list: - - src - - doc - name: MailGun API Key severity: high @@ -238,9 +196,6 @@ required_substrings: - key- min_line_len: 36 - usage_list: - - src - - doc - name: Password severity: medium @@ -253,8 +208,6 @@ required_substrings: - pass - pw - usage_list: - - src - name: PayPal Braintree Access Token severity: high @@ -266,9 +219,6 @@ required_substrings: - access_token$production$ min_line_len: 72 - usage_list: - - src - - doc - name: PEM Private Key severity: high @@ -278,9 +228,6 @@ filter_type: - LineSpecificKeyCheck min_line_len: 27 - usage_list: - - src - - doc - name: Picatic API Key severity: high @@ -292,9 +239,6 @@ required_substrings: - sk_live_ min_line_len: 40 - usage_list: - - src - - doc - name: Secret severity: medium @@ -306,8 +250,7 @@ min_line_len: 14 required_substrings: - secret - usage_list: - - src + doc_available: false - name: SendGrid API Key severity: high @@ -319,9 +262,6 @@ required_substrings: - SG. min_line_len: 34 - usage_list: - - src - - doc - name: Shopify Token severity: high @@ -332,9 +272,6 @@ required_substrings: - shp min_line_len: 38 - usage_list: - - src - - doc - name: Slack Token severity: high @@ -348,9 +285,6 @@ required_substrings: - xox min_line_len: 15 - usage_list: - - src - - doc - name: Slack Webhook severity: high @@ -362,9 +296,6 @@ required_substrings: - hooks.slack.com/services/T min_line_len: 61 - usage_list: - - src - - doc - name: Stripe Standard API Key severity: high @@ -378,9 +309,6 @@ required_substrings: - sk_live_ min_line_len: 32 - usage_list: - - src - - doc - name: Stripe Restricted API Key severity: high @@ -392,9 +320,6 @@ required_substrings: - rk_live_ min_line_len: 32 - usage_list: - - src - - doc - name: Square Access Token severity: high @@ -408,9 +333,6 @@ required_substrings: - EAAA min_line_len: 64 - usage_list: - - src - - doc - name: Square Client ID severity: medium @@ -424,9 +346,6 @@ required_substrings: - sq0 min_line_len: 29 - usage_list: - - src - - doc - name: Square OAuth Secret severity: high @@ -438,9 +357,6 @@ required_substrings: - sq0csp min_line_len: 50 - usage_list: - - src - - doc - name: Token severity: medium @@ -452,8 +368,7 @@ min_line_len: 13 required_substrings: - token - usage_list: - - src + doc_available: false - name: Twilio API Key severity: high @@ -465,9 +380,6 @@ required_substrings: - SK min_line_len: 34 - usage_list: - - src - - doc - name: URL Credentials severity: high @@ -478,10 +390,7 @@ use_ml: true required_substrings: - // - min_line_len: 6 - usage_list: - - src - - doc + min_line_len: 10 - name: Auth severity: medium @@ -493,8 +402,7 @@ min_line_len: 12 required_substrings: - auth - usage_list: - - src + doc_available: false - name: Key severity: medium @@ -506,8 +414,7 @@ min_line_len: 11 required_substrings: - key - usage_list: - - src + doc_available: false - name: Telegram Bot API Token severity: high @@ -518,9 +425,6 @@ required_substrings: - :AA min_line_len: 45 - usage_list: - - src - - doc - name: PyPi API Token severity: high @@ -531,9 +435,6 @@ required_substrings: - pypi- min_line_len: 155 - usage_list: - - src - - doc - name: Github Token severity: high @@ -544,9 +445,6 @@ required_substrings: - gh min_line_len: 40 - usage_list: - - src - - doc - name: Github Personal Access Token severity: high @@ -559,9 +457,6 @@ required_substrings: - ghp_ min_line_len: 40 - usage_list: - - src - - doc - name: Github Fine-granted Token severity: high @@ -574,9 +469,6 @@ required_substrings: - github_pat_ min_line_len: 90 - usage_list: - - src - - doc - name: Firebase Domain severity: info @@ -587,9 +479,6 @@ required_substrings: - .firebase min_line_len: 16 - usage_list: - - src - - doc - name: AWS S3 Bucket severity: info @@ -601,9 +490,6 @@ - .s3-website - .s3.amazonaws.com min_line_len: 14 - usage_list: - - src - - doc - name: Nonce severity: medium @@ -615,8 +501,7 @@ min_line_len: 13 required_substrings: - nonce - usage_list: - - src + doc_available: false - name: Salt severity: medium @@ -628,8 +513,7 @@ min_line_len: 12 required_substrings: - salt - usage_list: - - src + doc_available: false - name: Certificate severity: medium @@ -641,8 +525,7 @@ min_line_len: 12 required_substrings: - cert - usage_list: - - src + doc_available: false - name: Azure Access Token severity: high @@ -654,9 +537,6 @@ required_substrings: - eyJ min_line_len: 148 - usage_list: - - src - - doc - name: Azure Secret Value severity: high @@ -667,9 +547,6 @@ min_line_len: 40 required_substrings: - 8Q~ - usage_list: - - src - - doc - name: Bitbucket App Password severity: high @@ -680,9 +557,6 @@ min_line_len: 28 required_substrings: - ATBB - usage_list: - - src - - doc - name: Bitbucket Repository Access Token severity: high @@ -693,9 +567,6 @@ min_line_len: 183 required_substrings: - ATCTT3xFfGN0 - usage_list: - - src - - doc - name: Bitbucket HTTP Access Token severity: high @@ -706,9 +577,6 @@ min_line_len: 49 required_substrings: - BBDC- - usage_list: - - src - - doc - name: Bitbucket Client ID severity: info @@ -717,9 +585,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P[a-zA-Z0-9]{18}([a-zA-Z0-9]{14})?)([^0-9A-Za-z.$_/+-]|$) filter_type: WeirdBase64Token min_line_len: 18 - usage_list: - - src - - doc - name: Bitbucket Client Secret severity: info @@ -728,9 +593,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P([a-zA-Z0-9_-]{32}){1,2})([^0-9A-Za-z.$_/+-]|$) filter_type: WeirdBase64Token min_line_len: 32 - usage_list: - - src - - doc - name: Jira / Confluence PAT token severity: high @@ -743,9 +605,6 @@ - M - N - O - usage_list: - - src - - doc - name: Atlassian Old PAT token severity: info @@ -754,9 +613,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P[a-zA-Z0-9]{24})([^=0-9A-Za-z.$_/+-]|$) filter_type: WeirdBase64Token min_line_len: 24 - usage_list: - - src - - doc - name: Atlassian PAT token severity: high @@ -767,9 +623,6 @@ min_line_len: 191 required_substrings: - ATATT3xFfGF0 - usage_list: - - src - - doc - name: Digital Ocean PAT severity: high @@ -780,9 +633,6 @@ min_line_len: 71 required_substrings: - dop_v1_ - usage_list: - - src - - doc - name: Digital Ocean OAuth Access Token severity: high @@ -793,9 +643,6 @@ min_line_len: 71 required_substrings: - doo_v1_ - usage_list: - - src - - doc - name: Dropbox OAuth2 API Access Token severity: high @@ -806,9 +653,6 @@ min_line_len: 138 required_substrings: - sl. - usage_list: - - src - - doc - name: NuGet API key severity: high @@ -819,9 +663,6 @@ min_line_len: 46 required_substrings: - oy2 - usage_list: - - src - - doc - name: Gitlab PAT severity: high @@ -832,9 +673,6 @@ min_line_len: 26 required_substrings: - glpat- - usage_list: - - src - - doc - name: Gitlab Pipeline Trigger Token severity: high @@ -845,9 +683,6 @@ min_line_len: 46 required_substrings: - glptt- - usage_list: - - src - - doc - name: Gitlab Registration Runner Token severity: high @@ -858,9 +693,6 @@ min_line_len: 29 required_substrings: - GR1348941 - usage_list: - - src - - doc - name: Gitlab Registration Runner Token 2023 severity: high @@ -871,9 +703,6 @@ min_line_len: 25 required_substrings: - glrt- - usage_list: - - src - - doc - name: Grafana Provisioned API Key severity: high @@ -885,9 +714,6 @@ min_line_len: 67 required_substrings: - eyJ - usage_list: - - src - - doc - name: Grafana Access Policy Token severity: high @@ -899,9 +725,6 @@ min_line_len: 87 required_substrings: - glc_eyJ - usage_list: - - src - - doc - name: Dropbox API secret (long term) severity: high @@ -912,9 +735,6 @@ min_line_len: 43 required_substrings: - AAAAAAAAAA - usage_list: - - src - - doc - name: Dropbox App secret severity: info @@ -923,9 +743,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P[a-z0-9]{15})([^=0-9A-Za-z_/+-]|$) filter_type: WeirdBase36Token min_line_len: 15 - usage_list: - - src - - doc - name: Gitlab Incoming Email Token severity: info @@ -934,9 +751,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P[a-z0-9]{24,25})([^=0-9A-Za-z_/+-]|$) filter_type: WeirdBase36Token min_line_len: 24 - usage_list: - - src - - doc - name: Gitlab Feed Token severity: info @@ -945,9 +759,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P[a-zA-Z0-9_-]{20})([^=0-9A-Za-z_/+-]|$) filter_type: WeirdBase64Token min_line_len: 20 - usage_list: - - src - - doc - name: Jira 2FA severity: info @@ -961,6 +772,4 @@ - ValueBase32DataCheck - ValueTokenBase32Check min_line_len: 16 - usage_list: - - src - - doc + doc_available: true diff --git a/credsweeper/rules/rule.py b/credsweeper/rules/rule.py index 4a771f46b..867ced2a6 100644 --- a/credsweeper/rules/rule.py +++ b/credsweeper/rules/rule.py @@ -45,7 +45,6 @@ class Rule: NAME = "name" SEVERITY = "severity" TYPE = "type" - USAGE_LIST = "usage_list" VALUES = "values" FILTER_TYPE = "filter_type" MIN_LINE_LEN = "min_line_len" @@ -54,6 +53,7 @@ class Rule: USE_ML = "use_ml" REQUIRED_SUBSTRINGS = "required_substrings" VALIDATIONS = "validations" + DOC_AVAILABLE = "doc_available" # True - by default def __init__(self, config: Config, rule_dict: Dict) -> None: self.config = config @@ -76,7 +76,7 @@ def __init__(self, config: Config, rule_dict: Dict) -> None: self.__validations = self._get_validations(rule_dict.get(Rule.VALIDATIONS)) self.__required_substrings = [i.strip().lower() for i in rule_dict.get(Rule.REQUIRED_SUBSTRINGS, [])] self.__min_line_len = int(rule_dict.get(Rule.MIN_LINE_LEN, MAX_LINE_LENGTH)) - self.__usage_list: List[str] = rule_dict.get(Rule.USAGE_LIST, []) + self.__doc_available: bool = rule_dict.get(Rule.DOC_AVAILABLE, True) def _malformed_rule_error(self, rule_dict: Dict, field: str): raise ValueError(f"Malformed rule '{self.__rule_name}'." @@ -242,9 +242,7 @@ def _assert_rule_mandatory_fields(rule_template: Dict) -> None: ValueError if missing fields is present """ - mandatory_fields = [ - Rule.NAME, Rule.SEVERITY, Rule.TYPE, Rule.USAGE_LIST, Rule.VALUES, Rule.FILTER_TYPE, Rule.MIN_LINE_LEN - ] + mandatory_fields = [Rule.NAME, Rule.SEVERITY, Rule.TYPE, Rule.VALUES, Rule.FILTER_TYPE, Rule.MIN_LINE_LEN] missing_fields = [field for field in mandatory_fields if field not in rule_template] if len(missing_fields) > 0: raise ValueError(f"Malformed rule config file. Contain rule with missing fields: {missing_fields}.") @@ -260,6 +258,6 @@ def min_line_len(self) -> int: return self.__min_line_len @cached_property - def usage_list(self) -> List[str]: - """usage_list getter""" - return self.__usage_list + def doc_available(self) -> bool: + """doc_available getter""" + return self.__doc_available diff --git a/credsweeper/scanner/scanner.py b/credsweeper/scanner/scanner.py index 2ee107d73..9d96a8b2e 100644 --- a/credsweeper/scanner/scanner.py +++ b/credsweeper/scanner/scanner.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import List, Optional, Type, Tuple, Dict, Union +from typing import List, Type, Tuple, Dict, Union from credsweeper.app import APP_PATH from credsweeper.common.constants import RuleType, MIN_VARIABLE_LENGTH, MIN_SEPARATOR_LENGTH, MIN_VALUE_LENGTH, \ @@ -29,7 +29,7 @@ class Scanner: TargetGroup = List[Tuple[AnalysisTarget, str, int]] - def __init__(self, config: Config, rule_path: Optional[str], usage_list: Optional[List[str]] = None) -> None: + def __init__(self, config: Config, rule_path: Union[None, str, Path]) -> None: self.config = config self.__scanner_for_rule: Dict[str, Type[ScanType]] = {} self.rules: List[Rule] = [] @@ -37,11 +37,11 @@ def __init__(self, config: Config, rule_path: Optional[str], usage_list: Optiona self.min_keyword_len = MAX_LINE_LENGTH self.min_pattern_len = MAX_LINE_LENGTH self.min_pem_key_len = MAX_LINE_LENGTH - self._set_rules(rule_path, usage_list if isinstance(usage_list, list) else ["src", "doc"]) + self._set_rules(rule_path) self.min_len = min(self.min_pattern_len, self.min_keyword_len, self.min_pem_key_len, MIN_VARIABLE_LENGTH + MIN_SEPARATOR_LENGTH + MIN_VALUE_LENGTH) - def _set_rules(self, rule_path: Union[None, str, Path], usage_list: List[str]) -> None: + def _set_rules(self, rule_path: Union[None, str, Path]) -> None: """Auxiliary method to fill rules, determine min_pattern_len and set scanners""" if rule_path is None: rule_path = APP_PATH / "rules" / "config.yaml" @@ -49,7 +49,7 @@ def _set_rules(self, rule_path: Union[None, str, Path], usage_list: List[str]) - if rule_templates and isinstance(rule_templates, list): for rule_template in rule_templates: rule = Rule(self.config, rule_template) - if not self._is_available(usage_list, rule): + if not self._is_available(rule): continue self.rules.append(rule) if 0 < rule.min_line_len: @@ -107,13 +107,16 @@ def _select_and_group_targets(self, targets: List[AnalysisTarget]) -> Tuple[Targ return keyword_targets, pattern_targets, pem_targets - def _is_available(self, usage_list: List[str], rule: Rule) -> bool: + def _is_available(self, rule: Rule) -> bool: """separate the method to reduce complexity""" if rule.severity < self.config.severity: return False - for usage in usage_list: - if usage in rule.usage_list: + if self.config.doc: + # apply only available for doc scanning rules + if rule.doc_available: return True + else: + return True return False @staticmethod diff --git a/tests/__init__.py b/tests/__init__.py index 0bdab9b16..47310f4c9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ SAMPLES_POST_CRED_COUNT: int = 97 # with option --doc -SAMPLES_IN_DOC = 73 +SAMPLES_IN_DOC = 87 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 17 diff --git a/tests/conftest.py b/tests/conftest.py index 520c2d3a4..21a1b8981 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,8 +5,8 @@ import pytest from credsweeper.app import APP_PATH -from credsweeper.config import Config from credsweeper.common.constants import Severity +from credsweeper.config import Config from credsweeper.rules import Rule from credsweeper.scanner import Scanner from credsweeper.utils import Util diff --git a/tests/data/doc.json b/tests/data/doc.json index 2be38a89b..c65aacc50 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -949,6 +949,28 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.92706, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "+ \"password\": \"dkajco1\"", + "line_num": 10, + "path": "tests/samples/iso_ir_111.patch", + "info": "tests/samples/iso_ir_111.patch|RAW", + "value": "dkajco1", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.8073549220576046, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1195,6 +1217,160 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.84216, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "my_pw: nCzx8A8#!", + "line_num": 2, + "path": "tests/samples/pass_valid", + "info": "tests/samples/pass_valid|RAW", + "value": "nCzx8A8#!", + "variable": "my_pw", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.2432750011217983, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.9988, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "gi_reo_gi_passwd = \"cAc48k1Zd7\"", + "line_num": 1, + "path": "tests/samples/passwd.groovy", + "info": "tests/samples/passwd.groovy|RAW", + "value": "cAc48k1Zd7", + "variable": "gi_reo_gi_passwd", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.121928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.97345, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "mypw: KrAcMe12345,", + "line_num": 2, + "path": "tests/samples/passwd.groovy", + "info": "tests/samples/passwd.groovy|RAW", + "value": "KrAcMe12345,", + "variable": "mypw", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.2862156256610597, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99755, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "password = \"cackle!\"", + "line_num": 1, + "path": "tests/samples/password.gradle", + "info": "tests/samples/password.gradle|RAW", + "value": "cackle!", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.120589933192232, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.92706, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "+ \"password\": \"dkajco1\"", + "line_num": 9, + "path": "tests/samples/password.patch", + "info": "tests/samples/password.patch|RAW", + "value": "dkajco1", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.8073549220576046, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.92706, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "+ \"password\": \"dkajco1\"", + "line_num": 10, + "path": "tests/samples/password_utf16.patch", + "info": "tests/samples/password_utf16.patch|RAW", + "value": "dkajco1", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.8073549220576046, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.92054, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "+ \"password\": \"dkajc\u00f61\"", + "line_num": 9, + "path": "tests/samples/password_western.patch", + "info": "tests/samples/password_western.patch|RAW", + "value": "dkajc\u00f61", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.4063042189065182, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1374,6 +1550,28 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99755, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "pwd = \"cackle!\"", + "line_num": 1, + "path": "tests/samples/pwd.gradle", + "info": "tests/samples/pwd.gradle|RAW", + "value": "cackle!", + "variable": "pwd", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.120589933192232, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1682,6 +1880,72 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.74042, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "password = \"0dm1nk0\"", + "line_num": 29, + "path": "tests/samples/test.html", + "info": "tests/samples/test.html|HTML", + "value": "0dm1nk0", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.5216406363433186, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.77313, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "password = \"Cr3DeHTbIal\"", + "line_num": 42, + "path": "tests/samples/test.html", + "info": "tests/samples/test.html|HTML", + "value": "Cr3DeHTbIal", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.459431618637298, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.7796, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "\"password\" = \"p@$$w0Rd42\"", + "line_num": 58, + "path": "tests/samples/test.html", + "info": "tests/samples/test.html|HTML", + "value": "p@$$w0Rd42", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.325349666421154, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1747,5 +2011,49 @@ } } ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.95248, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "password = \"cackle!\"", + "line_num": 2, + "path": "tests/samples/xml_data.xml", + "info": "tests/samples/xml_data.xml|RAW", + "value": "cackle!", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.120589933192232, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.95248, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "password : cackle!", + "line_num": 1, + "path": "tests/samples/xml_password.xml", + "info": "tests/samples/xml_password.xml|XML", + "value": "cackle!", + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.120589933192232, + "valid": false + } + } + ] } ] diff --git a/tests/filters/test_credit_card_number_check.py b/tests/filters/test_credit_card_number_check.py index 889c25e0e..d8f0eba46 100644 --- a/tests/filters/test_credit_card_number_check.py +++ b/tests/filters/test_credit_card_number_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters.cred_card_number_check import CreditCardNumberCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_line_specific_key_check.py b/tests/filters/test_line_specific_key_check.py index 1a8772c29..b0ddff8d6 100644 --- a/tests/filters/test_line_specific_key_check.py +++ b/tests/filters/test_line_specific_key_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import LineSpecificKeyCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_separator_unusual_check.py b/tests/filters/test_separator_unusual_check.py index 6194c451a..692f2ced9 100644 --- a/tests/filters/test_separator_unusual_check.py +++ b/tests/filters/test_separator_unusual_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import SeparatorUnusualCheck from tests.filters.conftest import DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_allowlist_check.py b/tests/filters/test_value_allowlist_check.py index 30c025e26..9c1374c27 100644 --- a/tests/filters/test_value_allowlist_check.py +++ b/tests/filters/test_value_allowlist_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueAllowlistCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_array_dictionary_check.py b/tests/filters/test_value_array_dictionary_check.py index a402defbd..9451d5763 100644 --- a/tests/filters/test_value_array_dictionary_check.py +++ b/tests/filters/test_value_array_dictionary_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueArrayDictionaryCheck, VariableNotAllowedPatternCheck from credsweeper.rules import Rule from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET @@ -19,8 +18,8 @@ def token_rule(self, config) -> Rule: "filter_type": [VariableNotAllowedPatternCheck.__name__], "use_ml": True, "min_line_len": 0, - "usage_list": ["src", "doc"], - "validations": [] + "validations": [], + "doc_available": True, } rule = Rule(config, token_rule_without_filters) return rule diff --git a/tests/filters/test_value_blocklist_check.py b/tests/filters/test_value_blocklist_check.py index 627d1fcb8..4523dd79d 100644 --- a/tests/filters/test_value_blocklist_check.py +++ b/tests/filters/test_value_blocklist_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueBlocklistCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_camel_case_check.py b/tests/filters/test_value_camel_case_check.py index f184241d9..0393a1d83 100644 --- a/tests/filters/test_value_camel_case_check.py +++ b/tests/filters/test_value_camel_case_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueCamelCaseCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_dictionary_keyword_check.py b/tests/filters/test_value_dictionary_keyword_check.py index 15f026aa7..695ea0ccf 100644 --- a/tests/filters/test_value_dictionary_keyword_check.py +++ b/tests/filters/test_value_dictionary_keyword_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueDictionaryKeywordCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_dictionary_value_length_check.py b/tests/filters/test_value_dictionary_value_length_check.py index b7161598c..a33dc586a 100644 --- a/tests/filters/test_value_dictionary_value_length_check.py +++ b/tests/filters/test_value_dictionary_value_length_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueDictionaryValueLengthCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_file_path_check.py b/tests/filters/test_value_file_path_check.py index 6a004cb22..9c1577475 100644 --- a/tests/filters/test_value_file_path_check.py +++ b/tests/filters/test_value_file_path_check.py @@ -1,6 +1,5 @@ import pytest -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueFilePathCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data diff --git a/tests/filters/test_value_similarity_check.py b/tests/filters/test_value_similarity_check.py index ebfbcabc5..d5a08b11a 100644 --- a/tests/filters/test_value_similarity_check.py +++ b/tests/filters/test_value_similarity_check.py @@ -17,9 +17,9 @@ def password_rule(self, config) -> Rule: "values": ["password|passwd|pwd"], "filter_type": [VariableNotAllowedPatternCheck.__name__], "use_ml": True, - "usage_list": ["src", "doc"], "min_line_len": 0, - "validations": [] + "validations": [], + "doc_available": True, } rule = Rule(config, pass_rule_without_filters) return rule diff --git a/tests/filters/test_value_string_type_check.py b/tests/filters/test_value_string_type_check.py index 2922ab3e6..0d57aeb05 100644 --- a/tests/filters/test_value_string_type_check.py +++ b/tests/filters/test_value_string_type_check.py @@ -1,7 +1,6 @@ import pytest from credsweeper.config import Config -from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueStringTypeCheck from credsweeper.utils import Util from tests.filters.conftest import DUMMY_ANALYSIS_TARGET diff --git a/tests/rules/test_rule.py b/tests/rules/test_rule.py index 0ffa06c07..486cc29f6 100644 --- a/tests/rules/test_rule.py +++ b/tests/rules/test_rule.py @@ -22,7 +22,7 @@ class TestRuleConfigParsing: "min_line_len": 32, "use_ml": False, "validations": [], - "usage_list": ["src", "doc"] + "doc_available": True, }, # Check proper config with no validations { @@ -33,7 +33,7 @@ class TestRuleConfigParsing: "filter_type": GeneralPattern.__name__, "min_line_len": 32, "use_ml": False, - "usage_list": ["src", "doc"] + "doc_available": True, }, ]) def rule_config(self, request: str) -> Any: diff --git a/tests/samples/passwd.groovy b/tests/samples/passwd.groovy index 37aadf8be..72f92c13d 100644 --- a/tests/samples/passwd.groovy +++ b/tests/samples/passwd.groovy @@ -1 +1,3 @@ gi_reo_gi_passwd = "cAc48k1Zd7" +mypw: KrAcMe12345, +masked_password = *****1*****2; diff --git a/tests/scanner/scan_type/test_multipattern.py b/tests/scanner/scan_type/test_multipattern.py index 8c865a44a..cb4bfcf75 100644 --- a/tests/scanner/scan_type/test_multipattern.py +++ b/tests/scanner/scan_type/test_multipattern.py @@ -1,7 +1,7 @@ import random import string import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock from credsweeper.common.constants import MAX_LINE_LENGTH from credsweeper.config import Config @@ -22,10 +22,10 @@ def setUp(self) -> None: "name": "MULTI_PATTERN_RULE", "severity": "info", "type": "pattern", - "usage_list": ["src"], "values": ["a", "b"], "filter_type": [], "min_line_len": 0, + "doc_available": False, }) def test_oversize_line_n(self) -> None: diff --git a/tests/test_app.py b/tests/test_app.py index 5312db9fd..575121f24 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -15,7 +15,7 @@ from credsweeper.app import APP_PATH from credsweeper.utils import Util from tests import AZ_STRING, SAMPLES_POST_CRED_COUNT, SAMPLES_IN_DEEP_3, SAMPLES_PATH, \ - TESTS_PATH, SAMPLES_CRED_COUNT + TESTS_PATH, SAMPLES_CRED_COUNT, SAMPLES_IN_DOC class TestApp(TestCase): @@ -607,7 +607,7 @@ def test_rules_ml_p(self) -> None: self.assertSetEqual(rules_set.difference(missed), report_set, f"\n{_stdout}") self.assertEqual(SAMPLES_POST_CRED_COUNT, len(report)) - # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def test_rules_ml_n(self) -> None: # checks whether all rules have test samples which detected without ML @@ -649,3 +649,12 @@ def test_severity_n(self) -> None: str(SAMPLES_PATH) ]) self.assertNotIn("severity: medium", _stdout) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_doc_n(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + json_filename = os.path.join(tmp_dir, f"{__name__}.json") + _stdout, _stderr = self._m_credsweeper(["--doc", "--path", str(SAMPLES_PATH), "--save-json", json_filename]) + report = Util.json_load(json_filename) + self.assertEqual(SAMPLES_IN_DOC, len(report)) diff --git a/tests/test_main.py b/tests/test_main.py index cc1c12c48..d6a07dd53 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -700,7 +700,8 @@ def test_doc_p(self) -> None: cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() expected_credential_lines = [ - "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0", + "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0", 'password = "0dm1nk0"', 'password = "Cr3DeHTbIal"', + '"password" = "p@$$w0Rd42"' ] self.assertEqual(len(expected_credential_lines), len(found_credentials)) for cred in found_credentials: