Skip to content

Commit

Permalink
[skip actions] [subhashtext] 2024-08-10T10:14:07+03:00
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Aug 10, 2024
2 parents 9398354 + 061d0d5 commit 9c0b97c
Show file tree
Hide file tree
Showing 48 changed files with 2,475 additions and 5,762 deletions.
35 changes: 23 additions & 12 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ jobs:
exit_code=0
LOW_DELTA=10
THRESHOLD=250
# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
Expand All @@ -311,7 +311,7 @@ jobs:
echo "Speed-up."
fi
fi
# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
Expand All @@ -335,10 +335,11 @@ jobs:
echo "Speed-up."
fi
fi
exit ${exit_code}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

experiment:
# the ml train test is placed here to use cached data set
needs: [ download_data ]
Expand Down Expand Up @@ -428,24 +429,34 @@ jobs:
exit 1
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

run_doc_benchmark:
runs-on: ubuntu-latest
if: ${{ 'Samsung/CredSweeper' == github.event.pull_request.head.repo.full_name }}
if: ${{ 'push' == github.event_name }} or ${{ 'Samsung/CredSweeper' == github.event.pull_request.head.repo.full_name }}
steps:
- name: Checkout CredSweeper
- name: Checkout CredSweeper PR
if: ${{ 'pull_request' == github.event_name }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Checkout CredSweeper HEAD
if: ${{ 'push' == github.event_name }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.head }}

- name: Send cURL request with the commit SHA
if: ${{ 'pull_request' == github.event_name }}
run: |
COMMIT_SHA=$(git rev-parse HEAD)
curl -X POST ${{ secrets.SLACK_URL }} \
--data-urlencode \
"payload={'text':'[BMT Request] ${{ github.event.repository.html_url }}/commit/${COMMIT_SHA}'}"
if [[ "${{ secrets.SLACK_URL }}" =~ http.*/.*/.* ]]; then
COMMIT_SHA=$(git rev-parse HEAD)
echo ${COMMIT_SHA}
curl -X POST ${{ secrets.SLACK_URL }} \
--data-urlencode \
"payload={'text':'[BMT Request] ${{ github.event.repository.html_url }}/commit/${COMMIT_SHA}'}"
else
echo "secrets.SLACK_URL is not available"
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
202 changes: 99 additions & 103 deletions cicd/benchmark.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
'__version__'
]

__version__ = "1.8.2"
__version__ = "1.8.3"
2 changes: 2 additions & 0 deletions credsweeper/credentials/line_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def sanitize_variable(self) -> None:
while self.variable and sanitized_var_len != len(self.variable):
sanitized_var_len = len(self.variable)
self.variable = self.variable.strip(self.variable_strip_pattern)
if self.variable.endswith('\\'):
self.variable = self.variable[:-1]
if variable and len(self.variable) < len(variable) and 0 <= self.variable_start and 0 <= self.variable_end:
start = variable.find(self.variable)
self.variable_start += start
Expand Down
5 changes: 2 additions & 3 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck
from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck
from credsweeper.filters.value_atlassian_token_check import ValueAtlassianTokenCheck
from credsweeper.filters.value_azure_token_check import ValueAzureTokenCheck
from credsweeper.filters.value_base32_data_check import ValueBase32DataCheck
from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck
from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem
from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck
from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
Expand All @@ -24,17 +26,14 @@
from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck
from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
from credsweeper.filters.value_ip_check import ValueIPCheck
from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
from credsweeper.filters.value_length_check import ValueLengthCheck
from credsweeper.filters.value_method_check import ValueMethodCheck
from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck
from credsweeper.filters.value_number_check import ValueNumberCheck
from credsweeper.filters.value_pattern_check import ValuePatternCheck
from credsweeper.filters.value_pattern_length_check import ValuePatternLengthCheck
from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
from credsweeper.filters.value_split_keyword_check import ValueSplitKeywordCheck
from credsweeper.filters.value_string_type_check import ValueStringTypeCheck
Expand Down
2 changes: 2 additions & 0 deletions credsweeper/filters/group/general_pattern.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from credsweeper.common.constants import GroupType
from credsweeper.config import Config
from credsweeper.filters import ValueUselessWordCheck
from credsweeper.filters.group import Group


Expand All @@ -8,3 +9,4 @@ class GeneralPattern(Group):

def __init__(self, config: Config) -> None:
super().__init__(config, GroupType.PATTERN)
self.filters.extend([ValueUselessWordCheck()])
8 changes: 3 additions & 5 deletions credsweeper/filters/group/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from credsweeper.config import Config
from credsweeper.filters import (Filter, LineSpecificKeyCheck, ValueAllowlistCheck, ValueArrayDictionaryCheck,
ValueBlocklistCheck, ValueCamelCaseCheck, ValueFilePathCheck, ValueFirstWordCheck,
ValueLastWordCheck, ValueLengthCheck, ValueMethodCheck, ValueNotAllowedPatternCheck,
ValuePatternCheck, ValueSimilarityCheck, ValueStringTypeCheck, ValueTokenCheck,
VariableNotAllowedPatternCheck, ValuePatternLengthCheck, ValueHexNumberCheck)
ValueLastWordCheck, ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck,
ValueSimilarityCheck, ValueStringTypeCheck, ValueTokenCheck,
VariableNotAllowedPatternCheck, ValueHexNumberCheck)


class Group(ABC):
Expand Down Expand Up @@ -43,7 +43,6 @@ def get_keyword_base_filters(config: Config) -> List[Filter]:
ValueFirstWordCheck(),
ValueHexNumberCheck(),
ValueLastWordCheck(),
ValueLengthCheck(config),
ValueMethodCheck(),
ValueSimilarityCheck(),
ValueStringTypeCheck(config),
Expand All @@ -60,5 +59,4 @@ def get_pattern_base_filters(config: Config) -> List[Filter]:
return [ #
LineSpecificKeyCheck(), #
ValuePatternCheck(config), #
ValuePatternLengthCheck(config)
]
7 changes: 3 additions & 4 deletions credsweeper/filters/group/url_credentials_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from credsweeper.config import Config
from credsweeper.filters import (ValueAllowlistCheck, ValueArrayDictionaryCheck, ValueBlocklistCheck,
ValueCamelCaseCheck, ValueDictionaryValueLengthCheck, ValueFilePathCheck,
ValueFirstWordCheck, ValueLastWordCheck, ValueLengthCheck, ValueMethodCheck,
ValueNotAllowedPatternCheck, ValuePatternCheck, ValueStringTypeCheck, ValueTokenCheck)
ValueFirstWordCheck, ValueLastWordCheck, ValueMethodCheck, ValueNotAllowedPatternCheck,
ValuePatternCheck, ValueStringTypeCheck, ValueTokenCheck)
from credsweeper.filters.group import Group


Expand All @@ -25,11 +25,10 @@ def __init__(self, config: Config) -> None:
ValueFilePathCheck(),
ValueFirstWordCheck(),
ValueLastWordCheck(),
ValueLengthCheck(config),
ValueMethodCheck(),
ValueStringTypeCheck(config),
ValueNotAllowedPatternCheck(),
ValueTokenCheck(),
ValueDictionaryValueLengthCheck(),
ValueDictionaryValueLengthCheck(min_len=4, max_len=80),
ValuePatternCheck(config)
]
12 changes: 9 additions & 3 deletions credsweeper/filters/line_specific_key_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re

from credsweeper.common.constants import ML_HUNK
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand All @@ -10,8 +11,8 @@
class LineSpecificKeyCheck(Filter):
"""Check that values from list below is not in candidate line."""

NOT_ALLOWED = [r"example", r"enc\(", r"enc\[", r"true", r"false"]
NOT_ALLOWED_PATTERN = re.compile(Util.get_regex_combine_or(NOT_ALLOWED))
NOT_ALLOWED = [r"example", r"\benc[\(\[]", r"\btrue\b", r"\bfalse\b"]
NOT_ALLOWED_PATTERN = re.compile(Util.get_regex_combine_or(NOT_ALLOWED), re.IGNORECASE)

def __init__(self, config: Config = None) -> None:
pass
Expand All @@ -29,8 +30,13 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""
if line_data.line is None:
return True
if 0 <= line_data.variable_start:
# variable may be defined too
sub_line_start = 0 if ML_HUNK >= line_data.variable_start else line_data.variable_start - ML_HUNK
else:
sub_line_start = 0 if ML_HUNK >= line_data.value_start else line_data.value_start - ML_HUNK

if self.NOT_ALLOWED_PATTERN.search(target.line_lower):
if self.NOT_ALLOWED_PATTERN.search(line_data.line, sub_line_start, line_data.value_end + ML_HUNK):
return True

return False
49 changes: 0 additions & 49 deletions credsweeper/filters/separator_unusual_check.py

This file was deleted.

52 changes: 52 additions & 0 deletions credsweeper/filters/value_azure_token_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import contextlib
import json

from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
from credsweeper.utils import Util


class ValueAzureTokenCheck(Filter):
"""
Azure tokens contains header, payload and signature
https://learn.microsoft.com/en-us/azure/active-directory-b2c/access-tokens
"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received token which might be structured.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""
with contextlib.suppress(Exception):
parts = line_data.value.split('.')
if 3 != len(parts):
return True
hdr = Util.decode_base64(parts[0], padding_safe=True, urlsafe_detect=True)
header = json.loads(hdr)
if not ("alg" in header and "typ" in header and "kid" in header):
# must be all parts in header
return True
pld = Util.decode_base64(parts[1], padding_safe=True, urlsafe_detect=True)
payload = json.loads(pld)
if not ("iss" in payload and "exp" in payload and "iat" in payload):
# must be all parts in payload
return True
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value)
# good signature has to be like random bytes
return entropy < min_entropy

return True
56 changes: 56 additions & 0 deletions credsweeper/filters/value_base64_part_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import contextlib
import statistics

from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueBase64PartCheck(Filter):
"""
Check that candidate is NOT a part of base64 long line
"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received weird base64 token which must be a random string
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""

with contextlib.suppress(Exception):
if line_data.value_start and '/' == line_data.line[line_data.value_start - 1]:
if '-' in line_data.value or '_' in line_data.value:
# the value contains url-safe chars, so '/' is a delimiter
return False
value_entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
left_start = line_data.value_start - len(line_data.value)
if 0 > left_start:
left_start = 0
left_entropy = Util.get_shannon_entropy(line_data.line[left_start:line_data.value_start],
Chars.BASE64STD_CHARS.value)
right_end = line_data.value_end + len(line_data.value)
if len(line_data.line) < right_end:
right_end = len(line_data.line)
right_entropy = Util.get_shannon_entropy(line_data.line[line_data.value_end:right_end],
Chars.BASE64STD_CHARS.value)
data = [value_entropy, left_entropy, right_entropy]
avg = statistics.mean(data)
stdev = statistics.stdev(data, avg)
avg_min = avg - stdev
if avg_min < left_entropy and avg_min < right_entropy:
# high entropy of bound parts looks like a part of base64 long line
return True

return False
7 changes: 4 additions & 3 deletions credsweeper/filters/value_dictionary_value_length_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
class ValueDictionaryValueLengthCheck(Filter):
"""Check that candidate length is between 5 and 30."""

def __init__(self, config: Config = None) -> None:
pass
def __init__(self, config: Config = None, min_len: int = 4, max_len: int = 31) -> None:
self.min_len = min_len
self.max_len = max_len

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received credential candidate data 'line_data'.
Expand All @@ -21,7 +22,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
True, if need to filter candidate and False if left
"""
if 4 <= len(line_data.value) <= 31:
if self.min_len <= len(line_data.value) <= self.max_len:
return False
else:
return True
Loading

0 comments on commit 9c0b97c

Please sign in to comment.