Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base64filter #592

Merged
merged 1 commit into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck
from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem
from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck
from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/group/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,5 @@ def get_pattern_base_filters(config: Config) -> List[Filter]:
return [ #
LineSpecificKeyCheck(), #
ValuePatternCheck(config), #
ValuePatternLengthCheck(config)
ValuePatternLengthCheck(config), #
]
56 changes: 56 additions & 0 deletions credsweeper/filters/value_base64_part_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import contextlib
import statistics

from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueBase64PartCheck(Filter):
"""
Check that candidate is NOT a part of base64 long line
"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received weird base64 token which must be a random string

Args:
line_data: credential candidate data
target: multiline target from which line data was obtained

Return:
True, when need to filter candidate and False if left

"""

with contextlib.suppress(Exception):
if line_data.value_start and '/' == line_data.line[line_data.value_start - 1]:
if '-' in line_data.value or '_' in line_data.value:
# the value contains url-safe chars, so '/' is a delimiter
return False
value_entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
left_start = line_data.value_start - len(line_data.value)
if 0 > left_start:
left_start = 0
left_entropy = Util.get_shannon_entropy(line_data.line[left_start:line_data.value_start],
Chars.BASE64STD_CHARS.value)
right_end = line_data.value_end + len(line_data.value)
if len(line_data.line) < right_end:
right_end = len(line_data.line)
right_entropy = Util.get_shannon_entropy(line_data.line[line_data.value_end:right_end],
Chars.BASE64STD_CHARS.value)
data = [value_entropy, left_entropy, right_entropy]
avg = statistics.mean(data)
stdev = statistics.stdev(data, avg)
avg_min = avg - stdev
if avg_min < left_entropy and avg_min < right_entropy:
# high entropy of bound parts looks like a part of base64 long line
return True

return False
4 changes: 3 additions & 1 deletion credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@
type: pattern
values:
- (?<![0-9A-Za-z_+-])(?P<value>EAA[0-9A-Za-z]{80,800})
filter_type: GeneralPattern
filter_type:
- ValuePatternCheck
- ValueBase64PartCheck
required_substrings:
- EAA
min_line_len: 80
Expand Down
33 changes: 16 additions & 17 deletions credsweeper/scanner/scan_type/scan_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,24 +164,23 @@ def _get_candidates(cls, config: Config, rule: Rule, target: AnalysisTarget) ->
if config.exclude_lines and target.line_strip in config.exclude_lines:
return candidates

line_data_list = cls.get_line_data_list(config=config,
target=target,
pattern=rule.patterns[0],
filters=rule.filters)

for line_data in line_data_list:
if config.exclude_values and line_data.value.strip() in config.exclude_values:
continue

candidate = Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config, rule.validations,
rule.use_ml, rule.confidence)
# single pattern with multiple values means all the patterns must matched in target
if 1 < len(rule.patterns) and rule.rule_type in (RuleType.PATTERN, RuleType.KEYWORD):
# additional check whether all patterns match
if not cls._aux_scan(config, rule, target, candidate):
# cannot find secondary values for the candidate
if line_data_list := cls.get_line_data_list(config=config,
target=target,
pattern=rule.patterns[0],
filters=rule.filters):
for line_data in line_data_list:
if config.exclude_values and line_data.value.strip() in config.exclude_values:
continue
candidates.append(candidate)

candidate = Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config,
rule.validations, rule.use_ml, rule.confidence)
# single pattern with multiple values means all the patterns must matched in target
if 1 < len(rule.patterns) and rule.rule_type in (RuleType.PATTERN, RuleType.KEYWORD):
# additional check whether all patterns match
if not cls._aux_scan(config, rule, target, candidate):
# cannot find secondary values for the candidate
continue
candidates.append(candidate)
return candidates

@classmethod
Expand Down
33 changes: 33 additions & 0 deletions tests/filters/test_value_base64_part_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import re
import unittest

from credsweeper.credentials import LineData
from credsweeper.filters import ValueBase64PartCheck
from tests.filters.conftest import DUMMY_ANALYSIS_TARGET


class TestValueBase64PartCheck(unittest.TestCase):
EAA_PATTERN = re.compile(r"(?P<value>\bEAA[0-9A-Za-z]{32})")

def test_value_check_n(self) -> None:
line_data = LineData(config=None,
path="dummy",
file_type="",
line="qcE81rS+FJHuvg39lz4T/EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo"
"se0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD",
info="",
line_num=1,
line_pos=0,
pattern=TestValueBase64PartCheck.EAA_PATTERN)
self.assertTrue(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET))

def test_value_check_p(self) -> None:
line_data = LineData(config=None,
path="dummy",
file_type="",
line="http://meta.test/api/EAACRvAWiwzR8rcXFsLiUH13ybj0tdEa?x=login",
info="",
line_num=1,
line_pos=0,
pattern=TestValueBase64PartCheck.EAA_PATTERN)
self.assertFalse(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET))
2 changes: 1 addition & 1 deletion tests/samples/test.html

Large diffs are not rendered by default.

Loading