Skip to content

Commit

Permalink
Apply morpheme check for CamelCase and ValueFilePath filters
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Sep 6, 2024
1 parent 6cf2731 commit e6d2db0
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 16 deletions.
18 changes: 18 additions & 0 deletions credsweeper/common/keyword_checklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,21 @@ def morpheme_set(self) -> Set[str]:
def morpheme_len(self) -> int:
"""Length of morpheme_set"""
return len(self.__morpheme_set)

def check_morphemes(self, line_lower:str, threshold:int)->bool:
"""Checks limit of morphemes limit in line.
Args:
line_lower: input line - MUST be in lower
threshold: number of minimal morphemes
Return:
True - if number of morphemes exceeds the threshold
"""
matches = 0
for keyword in self.morpheme_set:
if keyword in line_lower:
matches += 1
if threshold < matches:
return True
return False
6 changes: 6 additions & 0 deletions credsweeper/common/morpheme_checklist.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
../
.com
.org
000
111
222
Expand Down Expand Up @@ -373,6 +376,7 @@ course
court
cove
cpu_
crac
creat
cred
cript
Expand Down Expand Up @@ -694,6 +698,7 @@ hybrid
iabl
ical
icon
id_rsa
iden
idle
ieee
Expand Down Expand Up @@ -1307,6 +1312,7 @@ spot
spray
sql
src_
ssh
ssl
stack
stan
Expand Down
3 changes: 2 additions & 1 deletion credsweeper/filters/value_camel_case_check.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re

from credsweeper.config import Config
from credsweeper.common import static_keyword_checklist
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
Expand Down Expand Up @@ -30,6 +31,6 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
if line_data.is_well_quoted_value:
return False
if self.CAMEL_CASE_PATTERN.match(line_data.value):
return True
return static_keyword_checklist.check_morphemes(line_data.value.lower(), 1)

return False
9 changes: 1 addition & 8 deletions credsweeper/filters/value_couple_keyword_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,4 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
True, if need to filter candidate and False if left
"""
value = line_data.value.lower()
matches = 0
for keyword in static_keyword_checklist.morpheme_set:
if keyword in value:
matches += 1
if 1 < matches:
return True
return False
return static_keyword_checklist.check_morphemes(line_data.value.lower(), 1)
7 changes: 4 additions & 3 deletions credsweeper/filters/value_file_path_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from credsweeper.common.constants import Chars
from credsweeper.common import static_keyword_checklist
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand All @@ -13,7 +14,7 @@ class ValueFilePathCheck(Filter):
and do not have any special characters ( !$@`&*()+)
"""
base64_possible_set = set(Chars.BASE64_CHARS.value) | set(Chars.BASE64URL_CHARS.value)
unusual_windows_symbols_in_path = "\t\n\r !$@`&*()[]{}<>+=;,~"
unusual_windows_symbols_in_path = "\t\n\r !$@`&*()[]{}<>+=;,~^"
unusual_linux_symbols_in_path = unusual_windows_symbols_in_path + ":\\"

def __init__(self, config: Config = None) -> None:
Expand Down Expand Up @@ -41,7 +42,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
or value.startswith("//") and ':' == line_data.separator):
# common case for url definition or aliases
# or _keyword_://example.com where : is the separator
return True
return static_keyword_checklist.check_morphemes(value.lower(), 1)
# base64 encoded data might look like linux path
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value))
# get minimal entropy to compare with shannon entropy of found value
Expand Down Expand Up @@ -70,5 +71,5 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
break
else:
if contains_unix_separator ^ contains_windows_separator:
return True
return static_keyword_checklist.check_morphemes(value.lower(), 1)
return False
2 changes: 1 addition & 1 deletion tests/common/test_keyword_checklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_morpheme_set_p(self):
for i in KeywordChecklist().morpheme_set:
self.assertLessEqual(3, len(i))
# valid symbols for variable names
self.assertRegex(i, r"[a-z0-9_]{3,500}")
self.assertRegex(i, r"[a-z0-9_/.\\:]{3,500}")

def test_keyword_set_n(self):
# checks whether the keywords are unique, in lower case and not shorter than 3 symbols
Expand Down
8 changes: 5 additions & 3 deletions tests/filters/test_value_file_path_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ def test_value_file_path_check_p(self, file_path: pytest.fixture, line: str) ->
@pytest.mark.parametrize(
"line",
[
"~/.ssh/id_rsa", # path
"../key", # path
"../../log", # path
"/home/user/.ssh/id_rsa", # path
"../.ssh/id_rsa", # path
"crackle/filepath.txt",
"/home/user/tmp", # simple path
"../..", # path
"dir/..", # path
"../dir", # path
"file:///Crackle/filepath/", # path from browser url
"~/.custompass", # path with synonym
"./sshpass.sh", # path with synonym
Expand Down

0 comments on commit e6d2db0

Please sign in to comment.