From 79ddd98bd262ecd89e29c40417cb5610c9b77061 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Sun, 16 Jul 2023 13:50:53 +0300 Subject: [PATCH] code refactored only --- credsweeper/app.py | 3 +-- credsweeper/deep_scanner/byte_scanner.py | 4 ++-- credsweeper/deep_scanner/deep_scanner.py | 12 ++++++------ credsweeper/deep_scanner/html_scanner.py | 4 ++-- credsweeper/deep_scanner/xml_scanner.py | 4 ++-- credsweeper/file_handler/byte_content_provider.py | 4 ++-- credsweeper/file_handler/content_provider.py | 12 +++++------- credsweeper/file_handler/data_content_provider.py | 4 ++-- credsweeper/file_handler/diff_content_provider.py | 12 +++++------- credsweeper/file_handler/string_content_provider.py | 11 +++++------ credsweeper/file_handler/struct_content_provider.py | 4 ++-- credsweeper/file_handler/text_content_provider.py | 4 ++-- credsweeper/scanner/scanner.py | 10 ++++------ 13 files changed, 40 insertions(+), 48 deletions(-) diff --git a/credsweeper/app.py b/credsweeper/app.py index 119e4f3fd..b140a990d 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -314,8 +314,7 @@ def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProv else: if content_provider.file_type not in self.config.exclude_containers: # Regular file scanning - analysis_targets = content_provider.get_analysis_target() - candidates = self.scanner.scan(analysis_targets) + candidates = self.scanner.scan(content_provider) # finally return result from 'file_scan' return candidates diff --git a/credsweeper/deep_scanner/byte_scanner.py b/credsweeper/deep_scanner/byte_scanner.py index b3c9945d7..7330d87bf 100644 --- a/credsweeper/deep_scanner/byte_scanner.py +++ b/credsweeper/deep_scanner/byte_scanner.py @@ -23,5 +23,5 @@ def data_scan( file_path=data_provider.file_path, file_type=data_provider.file_type, info=f"{data_provider.info}|RAW") - analysis_targets = byte_content_provider.get_analysis_target() - return self.scanner.scan(analysis_targets) + # analysis_targets = byte_content_provider.get_analysis_target() + return self.scanner.scan(byte_content_provider) diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index e64a46638..3f7882415 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -93,8 +93,8 @@ def scan(self, # Feature to scan files which might be containers data = content_provider.data elif isinstance(content_provider, DiffContentProvider) and content_provider.diff: - analysis_targets = content_provider.get_analysis_target() - candidates = self.scanner.scan(analysis_targets) + # analysis_targets = content_provider.get_analysis_target() + candidates = self.scanner.scan(content_provider) # Feature to scan binary diffs diff = content_provider.diff[0].get("line") # the check for legal fix mypy issue @@ -220,8 +220,8 @@ def structure_scan( file_path=struct_provider.file_path, file_type=".toml", info=f"{struct_provider.info}|STRING:`{line}`") - str_analysis_targets = str_provider.get_analysis_target() - new_candidates = self.scanner.scan(str_analysis_targets) + # str_analysis_targets = str_provider.get_analysis_target() + new_candidates = self.scanner.scan(str_provider) augment_candidates(candidates, new_candidates) elif isinstance(value, int) or isinstance(value, float): pass @@ -235,7 +235,7 @@ def structure_scan( file_path=struct_provider.file_path, file_type=".toml", info=f"{struct_provider.info}|STRING:`{line}`") - key_value_analysis_targets = key_value_provider.get_analysis_target() - new_candidates = self.scanner.scan(key_value_analysis_targets) + # key_value_analysis_targets = key_value_provider.get_analysis_target() + new_candidates = self.scanner.scan(key_value_provider) augment_candidates(candidates, new_candidates) return candidates diff --git a/credsweeper/deep_scanner/html_scanner.py b/credsweeper/deep_scanner/html_scanner.py index f2bcbebe3..85a046195 100644 --- a/credsweeper/deep_scanner/html_scanner.py +++ b/credsweeper/deep_scanner/html_scanner.py @@ -25,6 +25,6 @@ def data_scan( file_path=data_provider.file_path, file_type=data_provider.file_type, info=f"{data_provider.info}|HTML") - analysis_targets = string_data_provider.get_analysis_target() - return self.scanner.scan(analysis_targets) + # analysis_targets = string_data_provider.get_analysis_target() + return self.scanner.scan(string_data_provider) return [] diff --git a/credsweeper/deep_scanner/xml_scanner.py b/credsweeper/deep_scanner/xml_scanner.py index c54037b7d..7dc13a557 100644 --- a/credsweeper/deep_scanner/xml_scanner.py +++ b/credsweeper/deep_scanner/xml_scanner.py @@ -25,6 +25,6 @@ def data_scan( file_path=data_provider.file_path, file_type=data_provider.file_type, info=f"{data_provider.info}|XML") - analysis_targets = string_data_provider.get_analysis_target() - return self.scanner.scan(analysis_targets) + # analysis_targets = string_data_provider.get_analysis_target() + return self.scanner.scan(string_data_provider) return [] diff --git a/credsweeper/file_handler/byte_content_provider.py b/credsweeper/file_handler/byte_content_provider.py index ddbf8d2be..89cdf0efc 100644 --- a/credsweeper/file_handler/byte_content_provider.py +++ b/credsweeper/file_handler/byte_content_provider.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Generator from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -45,7 +45,7 @@ def lines(self, lines: List[str]) -> None: """lines setter for ByteContentProvider""" self.__lines = lines - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget,None,None]: """Return lines to scan. Return: diff --git a/credsweeper/file_handler/content_provider.py b/credsweeper/file_handler/content_provider.py index c081abe8c..2701a6652 100644 --- a/credsweeper/file_handler/content_provider.py +++ b/credsweeper/file_handler/content_provider.py @@ -1,7 +1,7 @@ import logging from abc import ABC, abstractmethod from functools import cached_property -from typing import List, Optional +from typing import List, Optional, Generator from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.descriptor import Descriptor @@ -31,7 +31,7 @@ def __init__( self.__descriptor = Descriptor(_file_path, _file_type, _info) @abstractmethod - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget,None,None]: """Load and preprocess file diff data to scan. Return: @@ -72,18 +72,16 @@ def data(self, data: Optional[bytes]) -> None: """abstract data setter""" raise NotImplementedError(__name__) - def lines_to_targets(self, lines: List[str], line_nums: Optional[List[int]] = None) -> List[AnalysisTarget]: + def lines_to_targets(self, lines: List[str], line_nums: Optional[List[int]] = None) -> Generator[AnalysisTarget,None,None]: """Creates list of targets with multiline concatenation""" - targets = [] if line_nums and len(line_nums) == len(lines): for line_pos in range(len(lines)): target = AnalysisTarget(line_pos, lines, line_nums, self.descriptor) - targets.append(target) + yield target else: if line_nums and len(line_nums) != len(lines): logger.warning(f"line numerations {len(line_nums)} does not match lines {len(lines)}") _line_nums = [x for x in range(len(lines))] for line_pos in range(len(lines)): target = AnalysisTarget(line_pos, lines, _line_nums, self.descriptor) - targets.append(target) - return targets + yield target diff --git a/credsweeper/file_handler/data_content_provider.py b/credsweeper/file_handler/data_content_provider.py index 698eb8c2c..3145980b2 100644 --- a/credsweeper/file_handler/data_content_provider.py +++ b/credsweeper/file_handler/data_content_provider.py @@ -2,7 +2,7 @@ import json import logging import string -from typing import List, Optional, Any +from typing import List, Optional, Any, Generator import yaml from bs4 import BeautifulSoup @@ -224,7 +224,7 @@ def represent_as_encoded(self) -> bool: return self.decoded is not None and 0 < len(self.decoded) return False - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget,None,None]: """Return nothing. The class provides only data storage. Raise: diff --git a/credsweeper/file_handler/diff_content_provider.py b/credsweeper/file_handler/diff_content_provider.py index ce1576abd..495763346 100644 --- a/credsweeper/file_handler/diff_content_provider.py +++ b/credsweeper/file_handler/diff_content_provider.py @@ -1,5 +1,5 @@ import logging -from typing import List, Tuple +from typing import List, Tuple, Generator from credsweeper.common.constants import DiffRowType from credsweeper.file_handler.analysis_target import AnalysisTarget @@ -67,7 +67,7 @@ def parse_lines_data(self, lines_data: List[DiffRowData]) -> Tuple[List[int], Li all_lines.append(line_data.line) return change_numbs, all_lines - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget, None, None]: """Preprocess file diff data to scan. Return: @@ -77,14 +77,12 @@ def get_analysis_target(self) -> List[AnalysisTarget]: lines_data = Util.preprocess_file_diff(self.diff) try: change_numbs, all_lines = self.parse_lines_data(lines_data) - return [ - AnalysisTarget( + for l_pos in range(len(change_numbs)): + target = AnalysisTarget( l_pos, # all_lines, # change_numbs, # self.descriptor) # - for l_pos in range(len(change_numbs)) - ] + yield target except Exception as exc: logger.error(f"Wrong diff {type(exc)} {exc}") - return [] diff --git a/credsweeper/file_handler/string_content_provider.py b/credsweeper/file_handler/string_content_provider.py index 55499d6cd..fce08619c 100644 --- a/credsweeper/file_handler/string_content_provider.py +++ b/credsweeper/file_handler/string_content_provider.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Generator from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -37,14 +37,13 @@ def data(self, data: bytes) -> None: """data setter for StringContentProvider""" raise NotImplementedError(__name__) - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget, None, None]: """Return lines to scan. Return: list of analysis targets based on every row in file """ - return [ - AnalysisTarget(line_pos, self.lines, self.line_numbers, self.descriptor) - for line_pos in range(len(self.lines)) - ] + for line_pos in range(len(self.lines)): + target = AnalysisTarget(line_pos, self.lines, self.line_numbers, self.descriptor) + yield target diff --git a/credsweeper/file_handler/struct_content_provider.py b/credsweeper/file_handler/struct_content_provider.py index 9dbf0ebb4..bfe0de9e7 100644 --- a/credsweeper/file_handler/struct_content_provider.py +++ b/credsweeper/file_handler/struct_content_provider.py @@ -1,5 +1,5 @@ import logging -from typing import List, Optional, Any +from typing import List, Optional, Any, Generator from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -44,7 +44,7 @@ def data(self, data: bytes) -> None: """data setter for StructContentProvider""" raise NotImplementedError(__name__) - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget,None,None]: """Return nothing. The class provides only data storage. Raise: diff --git a/credsweeper/file_handler/text_content_provider.py b/credsweeper/file_handler/text_content_provider.py index 6f607575c..51d6d52e4 100644 --- a/credsweeper/file_handler/text_content_provider.py +++ b/credsweeper/file_handler/text_content_provider.py @@ -1,7 +1,7 @@ import io import logging from pathlib import Path -from typing import List, Optional, Union, Tuple +from typing import List, Optional, Union, Tuple, Generator from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -55,7 +55,7 @@ def lines(self, lines: Optional[List[str]]) -> None: """lines setter for TextContentProvider""" self.__lines = lines - def get_analysis_target(self) -> List[AnalysisTarget]: + def yield_analysis_target(self) -> Generator[AnalysisTarget,None,None]: """Load and preprocess file content to scan. Return: diff --git a/credsweeper/scanner/scanner.py b/credsweeper/scanner/scanner.py index e95a9bab7..a7ee4f301 100644 --- a/credsweeper/scanner/scanner.py +++ b/credsweeper/scanner/scanner.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import List, Optional, Type, Tuple, Union, Dict +from credsweeper.file_handler.content_provider import ContentProvider from credsweeper.app import APP_PATH from credsweeper.common.constants import RuleType, MIN_VARIABLE_LENGTH, MIN_SEPARATOR_LENGTH, MIN_VALUE_LENGTH, \ MAX_LINE_LENGTH, Separator, PEM_BEGIN_PATTERN @@ -88,11 +89,11 @@ def _required_regex_not_matched(required_regex: re.Pattern, line: str) -> bool: return False return True - def scan(self, targets: List[AnalysisTarget]) -> List[Candidate]: + def scan(self, provider: ContentProvider) -> List[Candidate]: """Run scanning of list of target lines from 'targets' with set of rule from 'self.rules'. Args: - targets: objects with data to analyze: line, line number, + provider: objects with data to analyze: line, line number, filepath and all lines in file Return: @@ -100,11 +101,8 @@ def scan(self, targets: List[AnalysisTarget]) -> List[Candidate]: """ credentials: List[Candidate] = [] - if not targets: - # optimization for empty list - return credentials - for target in targets: + for target in provider.yield_analysis_target(): # Ignore target if it's too long line_len = len(target.line) if MAX_LINE_LENGTH < line_len: