Skip to content

Commit

Permalink
Slight changes (#382)
Browse files Browse the repository at this point in the history
* slightchanges

* remove unnecessary properties

* benchmark with BASE and RELEASE

* rollback cached properties

* synth data

* style

* imports

* perf benchmark result compare
  • Loading branch information
babenek authored Jul 19, 2023
1 parent 767546a commit 3989a25
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 241 deletions.
105 changes: 84 additions & 21 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Add synthetic huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text

- name: Update PIP
run: python -m pip install --upgrade pip

Expand All @@ -193,14 +197,13 @@ jobs:
# check the banner
credsweeper --banner
- name: Run performance benchmark
- name: Run performance benchmark RELEASE
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo ${RELEASE_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
Expand All @@ -212,26 +215,51 @@ jobs:
run: |
python -m pip uninstall -y credsweeper
- name: Checkout CredSweeper
- name: Checkout base CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: temp/CredSweeper.base

- name: Install base CredSweeper
run: |
python -m pip install temp/CredSweeper.base
# check the banner
credsweeper --banner
- name: Run performance benchmark BASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${BASE_TIME} ]; then
echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${BASE_TIME}'"
exit 1
fi
echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV
- name: Checkout current CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper
path: temp/CredSweeper.head

- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper
python -m pip install temp/CredSweeper.head
# check the banner
credsweeper --banner
- name: Run performance benchmark
- name: Run performance benchmark CURRENT
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo ${HEAD_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
Expand All @@ -241,24 +269,59 @@ jobs:
- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250
# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
delta=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 1
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
echo "Slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
else
delta=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 0
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
echo "Speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
fi
# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
exit ${exit_code}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

80 changes: 11 additions & 69 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,78 +32,20 @@ def __init__(self,
config: Config,
validations: List[Validation] = None,
use_ml: bool = False) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.validations: List[Validation] = validations if validations is not None else []
self.use_ml = use_ml

self.api_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.line_data_list: List[LineData] = line_data_list if line_data_list else []
self.patterns: List[re.Pattern] = patterns if patterns else []
self.ml_probability = None
self.rule_name: str = rule_name
self.severity: Optional[Severity] = severity
self.validations: List[Validation] = validations if validations else []
self.use_ml: bool = use_ml
self.config = config
self.ml_probability: Optional[bool] = None

@property
def api_validation(self) -> KeyValidationOption:
"""api_validation getter"""
return self.__api_validation

@api_validation.setter
def api_validation(self, validation: KeyValidationOption) -> None:
"""api_validation setter"""
self.__api_validation = validation

@property
def ml_validation(self) -> KeyValidationOption:
"""ml_validation getter"""
return self.__ml_validation

@ml_validation.setter
def ml_validation(self, validation: KeyValidationOption) -> None:
"""ml_validation setter"""
self.__ml_validation = validation

@property
def line_data_list(self) -> List[LineData]:
"""line_data_list getter"""
return self.__line_data_list

@line_data_list.setter
def line_data_list(self, line_data_list: List[LineData]) -> None:
"""line_data_list setter"""
self.__line_data_list = line_data_list

@property
def patterns(self) -> List[re.Pattern]:
"""patterns getter"""
return self.__patterns

@patterns.setter
def patterns(self, patterns: List[re.Pattern]) -> None:
"""patterns setter"""
self.__patterns = patterns

@property
def rule_name(self) -> str:
"""rule_name getter"""
return self.__rule_name

@rule_name.setter
def rule_name(self, rule_name: str) -> None:
"""rule_name setter"""
self.__rule_name = rule_name

@property
def severity(self) -> Severity:
"""severity getter"""
return self.__severity

@severity.setter
def severity(self, severity: Severity) -> None:
"""severity setter"""
self.__severity = severity

def _encode(self, value: Any) -> Any:
@staticmethod
def _encode(value: Any) -> Any:
"""Encode value to the base string ascii
Args:
Expand Down
Loading

0 comments on commit 3989a25

Please sign in to comment.