diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 78e92ff6a..7ca175fa0 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -184,6 +184,10 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Add synthetic huge data + if: steps.cache-data.outputs.cache-hit == 'true' + run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text + - name: Update PIP run: python -m pip install --upgrade pip @@ -193,14 +197,13 @@ jobs: # check the banner credsweeper --banner - - name: Run performance benchmark + - name: Run performance benchmark RELEASE run: | START_TIME=$(date +%s) - credsweeper --path data --save-json /dev/null + /usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null FINISH_TIME=$(date +%s) RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} )) if [ 0 -lt ${RELEASE_TIME} ]; then - echo ${RELEASE_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S") else echo "Wrong result '${RELEASE_TIME}'" @@ -212,26 +215,51 @@ jobs: run: | python -m pip uninstall -y credsweeper - - name: Checkout CredSweeper + - name: Checkout base CredSweeper + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: temp/CredSweeper.base + + - name: Install base CredSweeper + run: | + python -m pip install temp/CredSweeper.base + # check the banner + credsweeper --banner + + - name: Run performance benchmark BASE + run: | + START_TIME=$(date +%s) + /usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null + FINISH_TIME=$(date +%s) + BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} )) + if [ 0 -lt ${BASE_TIME} ]; then + echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S") + else + echo "Wrong result '${BASE_TIME}'" + exit 1 + fi + echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV + + - name: Checkout current CredSweeper uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - path: temp/CredSweeper + path: temp/CredSweeper.head - name: Install current CredSweeper run: | - python -m pip install temp/CredSweeper + python -m pip install temp/CredSweeper.head # check the banner credsweeper --banner - - name: Run performance benchmark + - name: Run performance benchmark CURRENT run: | START_TIME=$(date +%s) - credsweeper --path data --save-json /dev/null + /usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null FINISH_TIME=$(date +%s) HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} )) if [ 0 -lt ${HEAD_TIME} ]; then - echo ${HEAD_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S") else echo "Wrong result '${HEAD_TIME}'" @@ -241,24 +269,59 @@ jobs: - name: Compare results run: | + exit_code=0 + LOW_DELTA=10 THRESHOLD=250 + + # RELEASE if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then - delta=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} )) - echo "delta=$delta" - if [ $THRESHOLD -lt ${delta} ]; then - echo "Significantly slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}" - exit 1 + d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} )) + echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}" + if [ $LOW_DELTA -ge ${d} ]; then + echo "Almost the same." + elif [ $THRESHOLD -lt ${d} ]; then + echo "Significantly Slowdown." + exit_code=1 + else + echo "Slowdown." fi - echo "Slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}" else - delta=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} )) - echo "delta=$delta" - if [ $THRESHOLD -lt ${delta} ]; then - echo "Significantly speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}" - exit 0 + d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} )) + echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}" + if [ $LOW_DELTA -ge ${d} ]; then + echo "Almost the same." + elif [ $THRESHOLD -lt ${d} ]; then + echo "Significantly speed-up." + else + echo "Speed-up." fi - echo "Speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}" fi + + # BASE + if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then + d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} )) + echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}" + if [ $LOW_DELTA -ge ${d} ]; then + echo "Almost the same." + elif [ $THRESHOLD -lt ${d} ]; then + echo "Significantly Slowdown." + exit_code=1 + else + echo "Slowdown." + fi + else + d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} )) + echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}" + if [ $LOW_DELTA -ge ${d} ]; then + echo "Almost the same." + elif [ $THRESHOLD -lt ${d} ]; then + echo "Significantly speed-up." + else + echo "Speed-up." + fi + fi + + exit ${exit_code} # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/credsweeper/credentials/candidate.py b/credsweeper/credentials/candidate.py index eff04009c..833741dda 100644 --- a/credsweeper/credentials/candidate.py +++ b/credsweeper/credentials/candidate.py @@ -32,78 +32,20 @@ def __init__(self, config: Config, validations: List[Validation] = None, use_ml: bool = False) -> None: + self.line_data_list = line_data_list + self.patterns = patterns + self.rule_name = rule_name + self.severity = severity + self.config = config + self.validations: List[Validation] = validations if validations is not None else [] + self.use_ml = use_ml + self.api_validation = KeyValidationOption.NOT_AVAILABLE self.ml_validation = KeyValidationOption.NOT_AVAILABLE - self.line_data_list: List[LineData] = line_data_list if line_data_list else [] - self.patterns: List[re.Pattern] = patterns if patterns else [] - self.ml_probability = None - self.rule_name: str = rule_name - self.severity: Optional[Severity] = severity - self.validations: List[Validation] = validations if validations else [] - self.use_ml: bool = use_ml - self.config = config + self.ml_probability: Optional[bool] = None - @property - def api_validation(self) -> KeyValidationOption: - """api_validation getter""" - return self.__api_validation - - @api_validation.setter - def api_validation(self, validation: KeyValidationOption) -> None: - """api_validation setter""" - self.__api_validation = validation - - @property - def ml_validation(self) -> KeyValidationOption: - """ml_validation getter""" - return self.__ml_validation - - @ml_validation.setter - def ml_validation(self, validation: KeyValidationOption) -> None: - """ml_validation setter""" - self.__ml_validation = validation - - @property - def line_data_list(self) -> List[LineData]: - """line_data_list getter""" - return self.__line_data_list - - @line_data_list.setter - def line_data_list(self, line_data_list: List[LineData]) -> None: - """line_data_list setter""" - self.__line_data_list = line_data_list - - @property - def patterns(self) -> List[re.Pattern]: - """patterns getter""" - return self.__patterns - - @patterns.setter - def patterns(self, patterns: List[re.Pattern]) -> None: - """patterns setter""" - self.__patterns = patterns - - @property - def rule_name(self) -> str: - """rule_name getter""" - return self.__rule_name - - @rule_name.setter - def rule_name(self, rule_name: str) -> None: - """rule_name setter""" - self.__rule_name = rule_name - - @property - def severity(self) -> Severity: - """severity getter""" - return self.__severity - - @severity.setter - def severity(self, severity: Severity) -> None: - """severity setter""" - self.__severity = severity - - def _encode(self, value: Any) -> Any: + @staticmethod + def _encode(value: Any) -> Any: """Encode value to the base string ascii Args: diff --git a/credsweeper/credentials/line_data.py b/credsweeper/credentials/line_data.py index 87990e142..2453c48c4 100644 --- a/credsweeper/credentials/line_data.py +++ b/credsweeper/credentials/line_data.py @@ -1,5 +1,5 @@ +import contextlib import re -from functools import cached_property from typing import Any, Dict, Optional, Tuple from credsweeper.config import Config @@ -38,13 +38,14 @@ def __init__( info: str, # pattern: re.Pattern) -> None: self.config = config - self.key: Optional[str] = None self.line: str = line self.line_num: int = line_num self.path: str = path self.file_type: str = file_type self.info: str = info self.pattern: re.Pattern = pattern + + self.key: Optional[str] = None self.separator: Optional[str] = None self.separator_span: Optional[Tuple[int, int]] = None self.value: Optional[str] = None @@ -54,142 +55,6 @@ def __init__( self.initialize() - @property - def key(self) -> str: - """key getter""" - return self.__key - - @key.setter - def key(self, key: str) -> None: - """key setter""" - self.__key = key - - @property - def line(self) -> str: - """line getter""" - return self.__line - - @line.setter - def line(self, line: str) -> None: - """line setter""" - self.__line = line - self.__dict__.pop("line_len", None) - - @cached_property - def line_len(self) -> int: - """line_len getter""" - return len(self.__line) - - @property - def line_num(self) -> int: - """line_num getter""" - return self.__line_num - - @line_num.setter - def line_num(self, line_num: int) -> None: - """line_num setter""" - self.__line_num = line_num - - @property - def path(self) -> str: - """path getter""" - return self.__path - - @path.setter - def path(self, path: str) -> None: - """path setter""" - self.__path = path - - @property - def file_type(self) -> str: - """file_type getter""" - return self.__file_type - - @file_type.setter - def file_type(self, file_type: str) -> None: - """file_type setter""" - self.__file_type = file_type - - @property - def info(self) -> str: - """info getter""" - return self.__info - - @info.setter - def info(self, info: str) -> None: - """info setter""" - self.__info = info - - @property - def pattern(self) -> re.Pattern: - """pattern getter""" - return self.__pattern - - @pattern.setter - def pattern(self, pattern: re.Pattern) -> None: - """pattern setter""" - self.__pattern = pattern - - @property - def separator(self) -> str: - """separator getter""" - return self.__separator - - @separator.setter - def separator(self, separator: str) -> None: - """separator setter""" - self.__separator = separator - - @property - def separator_span(self) -> Tuple[int, int]: - """separator_span getter""" - return self.__separator_span - - @separator_span.setter - def separator_span(self, separator_span: Tuple[int, int]) -> None: - """separator_span setter""" - self.__separator_span = separator_span - - @property - def value(self) -> str: - """value getter""" - return self.__value - - @value.setter - def value(self, value: str) -> None: - """value setter""" - self.__value = value - - @property - def variable(self) -> str: - """variable getter""" - return self.__variable - - @variable.setter - def variable(self, variable: str) -> None: - """variable setter""" - self.__variable = variable - - @property - def value_leftquote(self) -> str: - """value_leftquote getter""" - return self.__value_leftquote - - @value_leftquote.setter - def value_leftquote(self, value_leftquote: str) -> None: - """value_leftquote setter""" - self.__value_leftquote = value_leftquote - - @property - def value_rightquote(self) -> str: - """value_rightquote getter""" - return self.__value_rightquote - - @value_rightquote.setter - def value_rightquote(self, value_rightquote: str) -> None: - """value_rightquote setter""" - self.__value_rightquote = value_rightquote - def initialize(self) -> None: """Set all internal fields.""" self.set_pattern_match_groups() @@ -200,17 +65,15 @@ def set_pattern_match_groups(self) -> None: if match_obj is None: return - def get_group_from_match_obj(match_obj: re.Match, group: str) -> Any: - try: - return match_obj.group(group) - except Exception: - return None - - def get_span_from_match_obj(match_obj: re.Match, group: str) -> Optional[Tuple[int, int]]: - try: - return match_obj.span(group) - except Exception: - return None + def get_group_from_match_obj(_match_obj: re.Match, group: str) -> Any: + with contextlib.suppress(Exception): + return _match_obj.group(group) + return None + + def get_span_from_match_obj(_match_obj: re.Match, group: str) -> Optional[Tuple[int, int]]: + with contextlib.suppress(Exception): + return _match_obj.span(group) + return None self.key = get_group_from_match_obj(match_obj, "keyword") self.separator = get_group_from_match_obj(match_obj, "separator") diff --git a/credsweeper/file_handler/__init__.py b/credsweeper/file_handler/__init__.py index cd3ffe3b4..7d8ec6e0b 100644 --- a/credsweeper/file_handler/__init__.py +++ b/credsweeper/file_handler/__init__.py @@ -11,5 +11,5 @@ 'DataContentProvider', # 'DiffContentProvider', # 'StringContentProvider', # - 'TextContentProvider' + 'TextContentProvider', # ] diff --git a/credsweeper/filters/value_not_part_encoded_check.py b/credsweeper/filters/value_not_part_encoded_check.py index b186f1f5e..dcd1c654a 100644 --- a/credsweeper/filters/value_not_part_encoded_check.py +++ b/credsweeper/filters/value_not_part_encoded_check.py @@ -33,7 +33,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: return True if line_data.line_num == target.line_num \ - and line_data.line_len == target.line_len \ + and len(line_data.line) == target.line_len \ and line_data.line == target.line \ and 0 < target.line_num <= target.lines_len \ and line_data.line == target.lines[target.line_num - 1]: