Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slight changes #382

Merged
merged 8 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 84 additions & 21 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Add synthetic huge data
csh519 marked this conversation as resolved.
Show resolved Hide resolved
if: steps.cache-data.outputs.cache-hit == 'true'
run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text

- name: Update PIP
run: python -m pip install --upgrade pip

Expand All @@ -193,14 +197,13 @@ jobs:
# check the banner
credsweeper --banner

- name: Run performance benchmark
- name: Run performance benchmark RELEASE
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo ${RELEASE_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
Expand All @@ -212,26 +215,51 @@ jobs:
run: |
python -m pip uninstall -y credsweeper

- name: Checkout CredSweeper
- name: Checkout base CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: temp/CredSweeper.base

- name: Install base CredSweeper
run: |
python -m pip install temp/CredSweeper.base
# check the banner
credsweeper --banner

- name: Run performance benchmark BASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${BASE_TIME} ]; then
echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${BASE_TIME}'"
exit 1
fi
echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV

- name: Checkout current CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper
path: temp/CredSweeper.head

- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper
python -m pip install temp/CredSweeper.head
# check the banner
credsweeper --banner

- name: Run performance benchmark
- name: Run performance benchmark CURRENT
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo ${HEAD_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
Expand All @@ -241,24 +269,59 @@ jobs:

- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250

# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
delta=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 1
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
echo "Slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
else
delta=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 0
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
echo "Speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
fi

# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi

exit ${exit_code}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

80 changes: 11 additions & 69 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,78 +32,20 @@ def __init__(self,
config: Config,
validations: List[Validation] = None,
use_ml: bool = False) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.validations: List[Validation] = validations if validations is not None else []
self.use_ml = use_ml
csh519 marked this conversation as resolved.
Show resolved Hide resolved

self.api_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.line_data_list: List[LineData] = line_data_list if line_data_list else []
self.patterns: List[re.Pattern] = patterns if patterns else []
self.ml_probability = None
self.rule_name: str = rule_name
self.severity: Optional[Severity] = severity
self.validations: List[Validation] = validations if validations else []
self.use_ml: bool = use_ml
self.config = config
self.ml_probability: Optional[bool] = None

@property
def api_validation(self) -> KeyValidationOption:
"""api_validation getter"""
return self.__api_validation

@api_validation.setter
def api_validation(self, validation: KeyValidationOption) -> None:
"""api_validation setter"""
self.__api_validation = validation

@property
def ml_validation(self) -> KeyValidationOption:
"""ml_validation getter"""
return self.__ml_validation

@ml_validation.setter
def ml_validation(self, validation: KeyValidationOption) -> None:
"""ml_validation setter"""
self.__ml_validation = validation

@property
def line_data_list(self) -> List[LineData]:
"""line_data_list getter"""
return self.__line_data_list

@line_data_list.setter
def line_data_list(self, line_data_list: List[LineData]) -> None:
"""line_data_list setter"""
self.__line_data_list = line_data_list

@property
def patterns(self) -> List[re.Pattern]:
"""patterns getter"""
return self.__patterns

@patterns.setter
def patterns(self, patterns: List[re.Pattern]) -> None:
"""patterns setter"""
self.__patterns = patterns

@property
def rule_name(self) -> str:
"""rule_name getter"""
return self.__rule_name

@rule_name.setter
def rule_name(self, rule_name: str) -> None:
"""rule_name setter"""
self.__rule_name = rule_name

@property
def severity(self) -> Severity:
"""severity getter"""
return self.__severity

@severity.setter
def severity(self, severity: Severity) -> None:
"""severity setter"""
self.__severity = severity

def _encode(self, value: Any) -> Any:
@staticmethod
def _encode(value: Any) -> Any:
"""Encode value to the base string ascii

Args:
Expand Down
Loading