Skip to content

Commit

Permalink
refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Aug 31, 2024
1 parent 2d39f03 commit abd5ac8
Show file tree
Hide file tree
Showing 21 changed files with 272 additions and 332 deletions.
98 changes: 0 additions & 98 deletions credsweeper/ml_model/features.py

This file was deleted.

Empty file.
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import Dict

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.common.constants import Base, Chars
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features import Feature
from credsweeper.utils import Util

from credsweeper.ml_model.features.feature import Feature


class CharSet(Feature):
Expand Down
49 changes: 49 additions & 0 deletions credsweeper/ml_model/features/feature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
from abc import ABC, abstractmethod
from typing import List, Any

import numpy as np

from credsweeper.credentials import Candidate


class Feature(ABC):
"""Base class for features."""

def __init__(self):
self.words = []

def __call__(self, candidates: List[Candidate]) -> np.ndarray:
"""Call base class for features.
Args:
candidates: list of candidates to extract features
"""
return np.array([self.extract(candidate) for candidate in candidates])

@abstractmethod
def extract(self, candidate: Candidate) -> Any:
"""Abstract method of base class"""
raise NotImplementedError

@property
def words(self) -> List[str]:
"""getter"""
return self.__words

@words.setter
def words(self, words: List[str]) -> None:
"""setter"""
self.__words = words

def any_word_in_(self, a_string: str) -> bool:
"""Returns true if any words in a string"""
for i in self.words:
if i in a_string:
return True
return False




Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import List, Any

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.word_in import WordIn
from credsweeper.utils import Util
from credsweeper.ml_model.features.word_in import WordIn


class FileExtension(WordIn):
Expand Down
11 changes: 11 additions & 0 deletions credsweeper/ml_model/features/hartley_entropy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""

from credsweeper.ml_model.features.reny_entropy import RenyiEntropy


class HartleyEntropy(RenyiEntropy):
"""Hartley entropy feature."""

def __init__(self, base: str, norm: bool = False) -> None:
super().__init__(base, 0.0, norm)

30 changes: 30 additions & 0 deletions credsweeper/ml_model/features/has_html_tag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""

from credsweeper.common.constants import CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.feature import Feature
from credsweeper.utils import Util

class HasHtmlTag(Feature):
"""Feature is true if line has HTML tags (HTML file)."""

def __init__(self) -> None:
super().__init__()
self.words = [
'< img', '<img', '< script', '<script', '< p', '<p', '< link', '<link', '< meta', '<meta', '< a', '<a'
]

def extract(self, candidate: Candidate) -> bool:
subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE)
candidate_line_data_list_0_line_lower = subtext.lower()
if '<' not in candidate_line_data_list_0_line_lower:
# early check
return False
if self.any_word_in_(candidate_line_data_list_0_line_lower):
return True
if "/>" in candidate_line_data_list_0_line_lower or "</" in candidate_line_data_list_0_line_lower:
# possible closed tag
return True
return False

16 changes: 16 additions & 0 deletions credsweeper/ml_model/features/is_secret_numeric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""

from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.feature import Feature


class IsSecretNumeric(Feature):
"""Feature is true if candidate value is a numerical value."""

def extract(self, candidate: Candidate) -> bool:
try:
float(candidate.line_data_list[0].value)
return True
except ValueError:
return False

15 changes: 15 additions & 0 deletions credsweeper/ml_model/features/possible_comment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""

from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.feature import Feature


class PossibleComment(Feature):
r"""Feature is true if candidate line starts with #,\*,/\*? (Possible comment)."""

def extract(self, candidate: Candidate) -> bool:
line = candidate.line_data_list[0].line.lstrip()
for i in ["#", "*", "/*", "//"]:
if line.startswith(i):
return True
return False
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import Dict

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.common.constants import Base, Chars
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features import Feature
from credsweeper.utils import Util

from credsweeper.ml_model.features.feature import Feature


class RenyiEntropy(Feature):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import List, Any

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.word_in import WordIn
from credsweeper.utils import Util

from credsweeper.ml_model.features.word_in import WordIn


class RuleName(WordIn):
Expand Down
11 changes: 11 additions & 0 deletions credsweeper/ml_model/features/shannon_entropy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""

from credsweeper.ml_model.features.reny_entropy import RenyiEntropy


class ShannonEntropy(RenyiEntropy):
"""Shannon entropy feature."""

def __init__(self, base: str, norm: bool = False) -> None:
super().__init__(base, 1.0, norm)

Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from abc import abstractmethod
from typing import List, Any, Tuple, Set

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features import Feature
from credsweeper.utils import Util
from credsweeper.ml_model.features.feature import Feature


class WordIn(Feature):
Expand Down Expand Up @@ -52,12 +48,12 @@ def word_in_str(self, a_string: str) -> np.ndarray:
for i, word in self.enumerated_words:
if word in a_string:
result[i] = 1
return np.array([result])
return result

def word_in_set(self, a_strings_set: Set[str]) -> np.ndarray:
"""Returns array with words matches in a_strings_set"""
result = np.zeros(shape=[self.dimension], dtype=np.int8)
for i, word in self.enumerated_words:
if word in a_strings_set:
result[i] = 1
return np.array([result])
return result
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import List

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.common.constants import CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.word_in import WordIn
from credsweeper.utils import Util





class WordInLine(WordIn):
"""Feature is true if line contains at least one word from predefined list."""

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
"""Most rules are described in 'Secrets in Source Code: Reducing False Positives Using Machine Learning'."""
import contextlib
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Any, Dict, Tuple, Set
from typing import List, Any

import numpy as np

from credsweeper.common.constants import Base, Chars, CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.utils import Util


from credsweeper.ml_model.features.word_in import WordIn


class WordInPath(WordIn):
Expand Down
Loading

0 comments on commit abd5ac8

Please sign in to comment.