From cc253311e89d1c9014bb207b81b1e03127e24ca7 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 7 Aug 2023 11:15:28 +0300 Subject: [PATCH] VIN detection (#393) * vin dbg * removed debug * codestyle * benchmark fix * benchmark fix2 * RENAMING: value card number check * proposed PR template * doc * rollback_benchmark --- cicd/benchmark.txt | 10 ++-- credsweeper/filters/__init__.py | 1 + credsweeper/filters/value_vin_check.py | 83 ++++++++++++++++++++++++++ credsweeper/rules/config.yaml | 12 ++++ tests/__init__.py | 8 +-- tests/data/depth_3.json | 22 +++++++ tests/data/ml_threshold_0.json | 22 +++++++ tests/data/output.json | 22 +++++++ tests/samples/vin | 6 ++ 9 files changed, 177 insertions(+), 9 deletions(-) create mode 100644 credsweeper/filters/value_vin_check.py create mode 100644 tests/samples/vin diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 0ed7f2833..4d9517778 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -1,11 +1,11 @@ -Detected Credentials: 4689 -result_cnt : 4178, lost_cnt : 94, true_cnt : 3721, false_cnt : 363 -credsweeper -> TP : 3721, FP : 363, TN : 19429483, FN : 891, FPR : 0.0000186826, FNR : 0.1931916739, ACC : 0.9999354754, PRC : 0.9111165524, RCL : 0.8068083261, F1 : 0.8557957682 +Detected Credentials: 4721 +result_cnt : 4208, lost_cnt : 0, true_cnt : 3844, false_cnt : 364 +credsweeper -> TP : 3844, FP : 364, TN : 19429359, FN : 891, FPR : 0.0000187342, FNR : 0.1881731785, ACC : 0.9999354240, PRC : 0.9134980989, RCL : 0.8118268215, F1 : 0.8596667785 credsweeper Private Key -> TP : 967, FP : 0, TN : 4, FN : 34, FPR : None, FNR : 0.0339660340, ACC : 0.9661691542, PRC : 1.0000000000, RCL : 0.9660339660, F1 : 0.9827235772 credsweeper Predefined Pattern -> TP : 309, FP : 2, TN : 40, FN : 17, FPR : 0.0476190476, FNR : 0.0521472393, ACC : 0.9483695652, PRC : 0.9935691318, RCL : 0.9478527607, F1 : 0.9701726845 -credsweeper Password -> TP : 977, FP : 116, TN : 4164, FN : 422, FPR : 0.0271028037, FNR : 0.3016440315, ACC : 0.9052650114, PRC : 0.8938700823, RCL : 0.6983559685, F1 : 0.7841091493 +credsweeper Password -> TP : 981, FP : 116, TN : 4164, FN : 422, FPR : 0.0271028037, FNR : 0.3007840342, ACC : 0.9053316910, PRC : 0.8942570647, RCL : 0.6992159658, F1 : 0.7848000000 credsweeper Generic Token -> TP : 284, FP : 6, TN : 597, FN : 49, FPR : 0.0099502488, FNR : 0.1471471471, ACC : 0.9412393162, PRC : 0.9793103448, RCL : 0.8528528529, F1 : 0.9117174960 -credsweeper Other -> TP : 127, FP : 6, TN : 738, FN : 265, FPR : 0.0080645161, FNR : 0.6760204082, ACC : 0.7614436620, PRC : 0.9548872180, RCL : 0.3239795918, F1 : 0.4838095238 +credsweeper Other -> TP : 246, FP : 7, TN : 738, FN : 265, FPR : 0.0093959732, FNR : 0.5185909980, ACC : 0.7834394904, PRC : 0.9723320158, RCL : 0.4814090020, F1 : 0.6439790576 credsweeper Generic Secret -> TP : 971, FP : 2, TN : 216, FN : 84, FPR : 0.0091743119, FNR : 0.0796208531, ACC : 0.9324430479, PRC : 0.9979445015, RCL : 0.9203791469, F1 : 0.9575936884 credsweeper Seed, Salt, Nonce -> TP : 35, FP : 2, TN : 6, FN : 4, FPR : 0.2500000000, FNR : 0.1025641026, ACC : 0.8723404255, PRC : 0.9459459459, RCL : 0.8974358974, F1 : 0.9210526316 credsweeper Authentication Key & Token -> TP : 51, FP : 4, TN : 28, FN : 16, FPR : 0.1250000000, FNR : 0.2388059701, ACC : 0.7979797980, PRC : 0.9272727273, RCL : 0.7611940299, F1 : 0.8360655738 diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index 5d297af9a..ce16128e4 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -38,4 +38,5 @@ from credsweeper.filters.value_token_base64_check import ValueTokenBase64Check from credsweeper.filters.value_token_check import ValueTokenCheck from credsweeper.filters.value_useless_word_check import ValueUselessWordCheck +from credsweeper.filters.value_vin_check import ValueVinCheck from credsweeper.filters.variable_not_allowed_pattern_check import VariableNotAllowedPatternCheck diff --git a/credsweeper/filters/value_vin_check.py b/credsweeper/filters/value_vin_check.py new file mode 100644 index 000000000..da0f3ba66 --- /dev/null +++ b/credsweeper/filters/value_vin_check.py @@ -0,0 +1,83 @@ +import contextlib + +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter + + +class ValueVinCheck(Filter): + """Check that value is a VIN""" + WEIGHTS = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2] + TRANSLITERATIONS = { + "0": 0, + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5, + "6": 6, + "7": 7, + "8": 8, + "9": 9, + "A": 1, + "B": 2, + "C": 3, + "D": 4, + "E": 5, + "F": 6, + "G": 7, + "H": 8, + "J": 1, + "K": 2, + "L": 3, + "M": 4, + "N": 5, + "P": 7, + "R": 9, + "S": 2, + "T": 3, + "U": 4, + "V": 5, + "W": 6, + "X": 7, + "Y": 8, + "Z": 9, + } + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received credential candidate data 'line_data'. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + False, if the sequence is not card number. True if it is + + """ + if line_data.value is None or 17 != len(line_data.value): + return True + + with contextlib.suppress(Exception): + int(line_data.value) + return True + + # NHTSA (National Highway Traffic Safety Administration) + # https://en.wikipedia.org/wiki/Vehicle_identification_number + with contextlib.suppress(Exception): + s = 0 + for w, v in zip(ValueVinCheck.WEIGHTS, line_data.value): + s += w * ValueVinCheck.TRANSLITERATIONS[v] + r = s % 11 + c = line_data.value[8] + if "X" == c and 10 == r: + return False + elif ValueVinCheck.TRANSLITERATIONS[c] == r: + return False + + # return False when the sequence has wrong check digit + return True diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index fae5a048e..c3df5926a 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -1,3 +1,15 @@ +- name: VIN + severity: info + type: pattern + values: + - (^|[^0-9A-Za-z])(?P[A-HJ-NPR-Z0-9]{17})([^=0-9A-Za-z]|$) + filter_type: + - ValueVinCheck + - ValuePatternCheck + min_line_len: 16 + required_regex: "[a-zA-Z0-9_/+-]{15,}" + doc_available: false + - name: Credit card number severity: info type: pattern diff --git a/tests/__init__.py b/tests/__init__.py index 7fa57c051..7faa946a6 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,14 +1,14 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 107 +SAMPLES_FILES_COUNT: int = 108 # credentials count after scan -SAMPLES_CRED_COUNT: int = 104 -SAMPLES_CRED_LINE_COUNT: int = 115 +SAMPLES_CRED_COUNT: int = 105 +SAMPLES_CRED_LINE_COUNT: int = 116 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 98 +SAMPLES_POST_CRED_COUNT: int = 99 # with option --doc SAMPLES_IN_DOC = 86 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index bb3ff63b8..4f4f8d90b 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -3628,6 +3628,28 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "VIN", + "severity": "info", + "line_data_list": [ + { + "line": "1M8GDM9AXKP042788", + "line_num": 6, + "path": "tests/samples/vin", + "info": "tests/samples/vin|RAW", + "value": "1M8GDM9AXKP042788", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6901165175936654, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/ml_threshold_0.json b/tests/data/ml_threshold_0.json index c5877d248..3aadf674c 100644 --- a/tests/data/ml_threshold_0.json +++ b/tests/data/ml_threshold_0.json @@ -2364,6 +2364,28 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "VIN", + "severity": "info", + "line_data_list": [ + { + "line": "1M8GDM9AXKP042788", + "line_num": 6, + "path": "tests/samples/vin", + "info": "", + "value": "1M8GDM9AXKP042788", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6901165175936654, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/output.json b/tests/data/output.json index 70a6588af..7a986acb5 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -2254,6 +2254,28 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "VIN", + "severity": "info", + "line_data_list": [ + { + "line": "1M8GDM9AXKP042788", + "line_num": 6, + "path": "tests/samples/vin", + "info": "", + "value": "1M8GDM9AXKP042788", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6901165175936654, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/samples/vin b/tests/samples/vin new file mode 100644 index 000000000..0981084a4 --- /dev/null +++ b/tests/samples/vin @@ -0,0 +1,6 @@ +# fake or not NHTSA +00000000000000000 +98374914723492346 +M9AX4278X1MKP08GD +# valid +1M8GDM9AXKP042788