-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
272 additions
and
332 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Empty file.
12 changes: 3 additions & 9 deletions
12
credsweeper/ml_model/char_set.py → credsweeper/ml_model/features/char_set.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
from abc import ABC, abstractmethod | ||
from typing import List, Any | ||
|
||
import numpy as np | ||
|
||
from credsweeper.credentials import Candidate | ||
|
||
|
||
class Feature(ABC): | ||
"""Base class for features.""" | ||
|
||
def __init__(self): | ||
self.words = [] | ||
|
||
def __call__(self, candidates: List[Candidate]) -> np.ndarray: | ||
"""Call base class for features. | ||
Args: | ||
candidates: list of candidates to extract features | ||
""" | ||
return np.array([self.extract(candidate) for candidate in candidates]) | ||
|
||
@abstractmethod | ||
def extract(self, candidate: Candidate) -> Any: | ||
"""Abstract method of base class""" | ||
raise NotImplementedError | ||
|
||
@property | ||
def words(self) -> List[str]: | ||
"""getter""" | ||
return self.__words | ||
|
||
@words.setter | ||
def words(self, words: List[str]) -> None: | ||
"""setter""" | ||
self.__words = words | ||
|
||
def any_word_in_(self, a_string: str) -> bool: | ||
"""Returns true if any words in a string""" | ||
for i in self.words: | ||
if i in a_string: | ||
return True | ||
return False | ||
|
||
|
||
|
||
|
9 changes: 2 additions & 7 deletions
9
credsweeper/ml_model/file_extension.py → ...eeper/ml_model/features/file_extension.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
|
||
from credsweeper.ml_model.features.reny_entropy import RenyiEntropy | ||
|
||
|
||
class HartleyEntropy(RenyiEntropy): | ||
"""Hartley entropy feature.""" | ||
|
||
def __init__(self, base: str, norm: bool = False) -> None: | ||
super().__init__(base, 0.0, norm) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
|
||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
|
||
from credsweeper.common.constants import CHUNK_SIZE | ||
from credsweeper.credentials import Candidate | ||
from credsweeper.ml_model.features.feature import Feature | ||
from credsweeper.utils import Util | ||
|
||
class HasHtmlTag(Feature): | ||
"""Feature is true if line has HTML tags (HTML file).""" | ||
|
||
def __init__(self) -> None: | ||
super().__init__() | ||
self.words = [ | ||
'< img', '<img', '< script', '<script', '< p', '<p', '< link', '<link', '< meta', '<meta', '< a', '<a' | ||
] | ||
|
||
def extract(self, candidate: Candidate) -> bool: | ||
subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE) | ||
candidate_line_data_list_0_line_lower = subtext.lower() | ||
if '<' not in candidate_line_data_list_0_line_lower: | ||
# early check | ||
return False | ||
if self.any_word_in_(candidate_line_data_list_0_line_lower): | ||
return True | ||
if "/>" in candidate_line_data_list_0_line_lower or "</" in candidate_line_data_list_0_line_lower: | ||
# possible closed tag | ||
return True | ||
return False | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
|
||
from credsweeper.credentials import Candidate | ||
from credsweeper.ml_model.features.feature import Feature | ||
|
||
|
||
class IsSecretNumeric(Feature): | ||
"""Feature is true if candidate value is a numerical value.""" | ||
|
||
def extract(self, candidate: Candidate) -> bool: | ||
try: | ||
float(candidate.line_data_list[0].value) | ||
return True | ||
except ValueError: | ||
return False | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
|
||
from credsweeper.credentials import Candidate | ||
from credsweeper.ml_model.features.feature import Feature | ||
|
||
|
||
class PossibleComment(Feature): | ||
r"""Feature is true if candidate line starts with #,\*,/\*? (Possible comment).""" | ||
|
||
def extract(self, candidate: Candidate) -> bool: | ||
line = candidate.line_data_list[0].line.lstrip() | ||
for i in ["#", "*", "/*", "//"]: | ||
if line.startswith(i): | ||
return True | ||
return False |
11 changes: 3 additions & 8 deletions
11
credsweeper/ml_model/reny_entropy.py → ...sweeper/ml_model/features/reny_entropy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 2 additions & 8 deletions
10
credsweeper/ml_model/rule_name.py → credsweeper/ml_model/features/rule_name.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'.""" | ||
|
||
from credsweeper.ml_model.features.reny_entropy import RenyiEntropy | ||
|
||
|
||
class ShannonEntropy(RenyiEntropy): | ||
"""Shannon entropy feature.""" | ||
|
||
def __init__(self, base: str, norm: bool = False) -> None: | ||
super().__init__(base, 1.0, norm) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 3 additions & 8 deletions
11
credsweeper/ml_model/word_in_line.py → ...sweeper/ml_model/features/word_in_line.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 2 additions & 7 deletions
9
credsweeper/ml_model/word_in_path.py → ...sweeper/ml_model/features/word_in_path.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.