diff --git a/.github/workflows/build_deploy.yml b/.github/workflows/build_deploy.yml index 018cc7b2ac4..4a6775f33bf 100644 --- a/.github/workflows/build_deploy.yml +++ b/.github/workflows/build_deploy.yml @@ -9,18 +9,6 @@ on: # before merging/releasing - build_deploy* pull_request: - paths: - - ".github/workflows/build_deploy.yml" - - ".github/workflows/build_python_3.yml" - - "setup.py" - - "setup.cfg" - - "pyproject.toml" - - "**.c" - - "**.h" - - "**.cpp" - - "**.hpp" - - "**.pyx" - - "ddtrace/vendor/**" release: types: - published diff --git a/benchmarks/appsec_iast_propagation/scenario.py b/benchmarks/appsec_iast_propagation/scenario.py index 56ac67128b7..ec827b7bb21 100644 --- a/benchmarks/appsec_iast_propagation/scenario.py +++ b/benchmarks/appsec_iast_propagation/scenario.py @@ -1,8 +1,7 @@ from typing import Any # noqa:F401 import bm - -from tests.utils import override_env +from bm.utils import override_env with override_env({"DD_IAST_ENABLED": "True"}): @@ -42,7 +41,7 @@ def aspect_function(internal_loop, tainted): value = "" res = value for _ in range(internal_loop): - res = add_aspect(res, join_aspect(str.join, 1, "_", (tainted, "_", tainted))) + res = add_aspect(res, join_aspect("_".join, 1, "_", (tainted, "_", tainted))) value = res res = add_aspect(res, tainted) value = res diff --git a/ddtrace/appsec/_asm_request_context.py b/ddtrace/appsec/_asm_request_context.py index 173027c0d10..654e06a29e5 100644 --- a/ddtrace/appsec/_asm_request_context.py +++ b/ddtrace/appsec/_asm_request_context.py @@ -13,6 +13,7 @@ from ddtrace._trace.span import Span from ddtrace.appsec import _handlers from ddtrace.appsec._constants import APPSEC +from ddtrace.appsec._constants import EXPLOIT_PREVENTION from ddtrace.appsec._constants import SPAN_DATA_NAMES from ddtrace.appsec._constants import WAF_CONTEXT_NAMES from ddtrace.appsec._ddwaf import DDWaf_result @@ -147,6 +148,12 @@ def __init__(self): "triggered": False, "timeout": False, "version": None, + "rasp": { + "called": False, + "eval": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + "match": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + "timeout": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + }, } env.callbacks[_CONTEXT_CALL] = [] @@ -330,15 +337,27 @@ def asm_request_context_set( def set_waf_telemetry_results( - rules_version: Optional[str], is_triggered: bool, is_blocked: bool, is_timeout: bool + rules_version: Optional[str], + is_triggered: bool, + is_blocked: bool, + is_timeout: bool, + rule_type: Optional[str], ) -> None: result = get_value(_TELEMETRY, _TELEMETRY_WAF_RESULTS) if result is not None: - result["triggered"] |= is_triggered - result["blocked"] |= is_blocked - result["timeout"] |= is_timeout - if rules_version is not None: - result["version"] = rules_version + if rule_type is None: + # Request Blocking telemetry + result["triggered"] |= is_triggered + result["blocked"] |= is_blocked + result["timeout"] |= is_timeout + if rules_version is not None: + result["version"] = rules_version + else: + # Exploit Prevention telemetry + result["rasp"]["called"] = True + result["rasp"]["eval"][rule_type] += 1 + result["rasp"]["match"][rule_type] += int(is_triggered) + result["rasp"]["timeout"][rule_type] += int(is_timeout) def get_waf_telemetry_results() -> Optional[Dict[str, Any]]: diff --git a/ddtrace/appsec/_common_module_patches.py b/ddtrace/appsec/_common_module_patches.py index 312a88a41d4..69c2610cab5 100644 --- a/ddtrace/appsec/_common_module_patches.py +++ b/ddtrace/appsec/_common_module_patches.py @@ -3,6 +3,7 @@ import ctypes import gc +import os from typing import Any from typing import Callable from typing import Dict @@ -48,14 +49,23 @@ def wrapped_open_CFDDB7ABBA9081B6(original_open_callable, instance, args, kwargs try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time return original_open_callable(*args, **kwargs) - filename = args[0] if args else kwargs.get("file", None) + filename_arg = args[0] if args else kwargs.get("file", None) + try: + filename = os.fspath(filename_arg) + except Exception: + filename = "" if filename and in_context(): - call_waf_callback({"LFI_ADDRESS": filename}, crop_trace="wrapped_open_CFDDB7ABBA9081B6") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.LFI: filename}, + crop_trace="wrapped_open_CFDDB7ABBA9081B6", + rule_type=EXPLOIT_PREVENTION.TYPE.LFI, + ) # DEV: Next part of the exploit prevention feature: add block here return original_open_callable(*args, **kwargs) @@ -72,6 +82,7 @@ def wrapped_open_ED4CF71136E15EBF(original_open_callable, instance, args, kwargs try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time @@ -82,7 +93,11 @@ def wrapped_open_ED4CF71136E15EBF(original_open_callable, instance, args, kwargs if url.__class__.__name__ == "Request": url = url.get_full_url() if isinstance(url, str): - call_waf_callback({"SSRF_ADDRESS": url}, crop_trace="wrapped_open_ED4CF71136E15EBF") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.SSRF: url}, + crop_trace="wrapped_open_ED4CF71136E15EBF", + rule_type=EXPLOIT_PREVENTION.TYPE.SSRF, + ) # DEV: Next part of the exploit prevention feature: add block here return original_open_callable(*args, **kwargs) @@ -100,6 +115,7 @@ def wrapped_request_D8CB81E472AF98A2(original_request_callable, instance, args, try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time @@ -108,7 +124,11 @@ def wrapped_request_D8CB81E472AF98A2(original_request_callable, instance, args, url = args[1] if len(args) > 1 else kwargs.get("url", None) if url and in_context(): if isinstance(url, str): - call_waf_callback({"SSRF_ADDRESS": url}, crop_trace="wrapped_request_D8CB81E472AF98A2") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.SSRF: url}, + crop_trace="wrapped_request_D8CB81E472AF98A2", + rule_type=EXPLOIT_PREVENTION.TYPE.SSRF, + ) # DEV: Next part of the exploit prevention feature: add block here return original_request_callable(*args, **kwargs) diff --git a/ddtrace/appsec/_constants.py b/ddtrace/appsec/_constants.py index c7a3fad3cf3..59f90a335dc 100644 --- a/ddtrace/appsec/_constants.py +++ b/ddtrace/appsec/_constants.py @@ -248,3 +248,12 @@ class EXPLOIT_PREVENTION(metaclass=Constant_Class): STACK_TRACE_ENABLED = "DD_APPSEC_STACK_TRACE_ENABLED" MAX_STACK_TRACES = "DD_APPSEC_MAX_STACK_TRACES" MAX_STACK_TRACE_DEPTH = "DD_APPSEC_MAX_STACK_TRACE_DEPTH" + + class TYPE(metaclass=Constant_Class): + LFI = "lfi" + SSRF = "ssrf" + SQLI = "sql_injection" + + class ADDRESS(metaclass=Constant_Class): + LFI = "LFI_ADDRESS" + SSRF = "SSRF_ADDRESS" diff --git a/ddtrace/appsec/_iast/_evidence_redaction/__init__.py b/ddtrace/appsec/_iast/_evidence_redaction/__init__.py new file mode 100644 index 00000000000..195391ffab2 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/__init__.py @@ -0,0 +1,4 @@ +from ddtrace.appsec._iast._evidence_redaction._sensitive_handler import sensitive_handler + + +sensitive_handler diff --git a/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py new file mode 100644 index 00000000000..b76ad6c96b1 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py @@ -0,0 +1,363 @@ +import re + +from ddtrace.internal.logger import get_logger +from ddtrace.settings.asm import config as asm_config + +from ..constants import VULN_CMDI +from ..constants import VULN_HEADER_INJECTION +from ..constants import VULN_SSRF +from .command_injection_sensitive_analyzer import command_injection_sensitive_analyzer +from .header_injection_sensitive_analyzer import header_injection_sensitive_analyzer +from .url_sensitive_analyzer import url_sensitive_analyzer + + +log = get_logger(__name__) + +REDACTED_SOURCE_BUFFER = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + + +class SensitiveHandler: + """ + Class responsible for handling sensitive information. + """ + + def __init__(self): + self._name_pattern = re.compile(asm_config._iast_redaction_name_pattern, re.IGNORECASE | re.MULTILINE) + self._value_pattern = re.compile(asm_config._iast_redaction_value_pattern, re.IGNORECASE | re.MULTILINE) + + self._sensitive_analyzers = { + VULN_CMDI: command_injection_sensitive_analyzer, + # SQL_INJECTION: sql_sensitive_analyzer, + VULN_SSRF: url_sensitive_analyzer, + VULN_HEADER_INJECTION: header_injection_sensitive_analyzer, + } + + @staticmethod + def _contains(range_container, range_contained): + """ + Checks if a range_container contains another range_contained. + + Args: + - range_container (dict): The container range. + - range_contained (dict): The contained range. + + Returns: + - bool: True if range_container contains range_contained, False otherwise. + """ + if range_container["start"] > range_contained["start"]: + return False + return range_container["end"] >= range_contained["end"] + + @staticmethod + def _intersects(range_a, range_b): + """ + Checks if two ranges intersect. + + Args: + - range_a (dict): First range. + - range_b (dict): Second range. + + Returns: + - bool: True if the ranges intersect, False otherwise. + """ + return range_b["start"] < range_a["end"] and range_b["end"] > range_a["start"] + + def _remove(self, range_, range_to_remove): + """ + Removes a range_to_remove from a range_. + + Args: + - range_ (dict): The range to remove from. + - range_to_remove (dict): The range to remove. + + Returns: + - list: List containing the remaining parts after removing the range_to_remove. + """ + if not self._intersects(range_, range_to_remove): + return [range_] + elif self._contains(range_to_remove, range_): + return [] + else: + result = [] + if range_to_remove["start"] > range_["start"]: + offset = range_to_remove["start"] - range_["start"] + result.append({"start": range_["start"], "end": range_["start"] + offset}) + if range_to_remove["end"] < range_["end"]: + offset = range_["end"] - range_to_remove["end"] + result.append({"start": range_to_remove["end"], "end": range_to_remove["end"] + offset}) + return result + + def is_sensible_name(self, name): + """ + Checks if a name is sensible based on the name pattern. + + Args: + - name (str): The name to check. + + Returns: + - bool: True if the name is sensible, False otherwise. + """ + return bool(self._name_pattern.search(name)) + + def is_sensible_value(self, value): + """ + Checks if a value is sensible based on the value pattern. + + Args: + - value (str): The value to check. + + Returns: + - bool: True if the value is sensible, False otherwise. + """ + return bool(self._value_pattern.search(value)) + + def is_sensible_source(self, source): + """ + Checks if a source is sensible. + + Args: + - source (dict): The source to check. + + Returns: + - bool: True if the source is sensible, False otherwise. + """ + return ( + source is not None + and source.value is not None + and (self.is_sensible_name(source.name) or self.is_sensible_value(source.value)) + ) + + def scrub_evidence(self, vulnerability_type, evidence, tainted_ranges, sources): + """ + Scrubs evidence based on the given vulnerability type. + + Args: + - vulnerability_type (str): The vulnerability type. + - evidence (dict): The evidence to scrub. + - tainted_ranges (list): List of tainted ranges. + - sources (list): List of sources. + + Returns: + - dict: The scrubbed evidence. + """ + if asm_config._iast_redaction_enabled: + sensitive_analyzer = self._sensitive_analyzers.get(vulnerability_type) + if sensitive_analyzer: + if not evidence.value: + log.debug("No evidence value found in evidence %s", evidence) + return None + sensitive_ranges = sensitive_analyzer(evidence, self._name_pattern, self._value_pattern) + return self.to_redacted_json(evidence.value, sensitive_ranges, tainted_ranges, sources) + return None + + def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources): + """ + Converts evidence value to redacted JSON format. + + Args: + - evidence_value (str): The evidence value. + - sensitive (list): List of sensitive ranges. + - tainted_ranges (list): List of tainted ranges. + - sources (list): List of sources. + + Returns: + - dict: The redacted JSON. + """ + value_parts = [] + redacted_sources = [] + redacted_sources_context = dict() + + start = 0 + next_tainted_index = 0 + source_index = None + + next_tainted = tainted_ranges.pop(0) if tainted_ranges else None + next_sensitive = sensitive.pop(0) if sensitive else None + i = 0 + while i < len(evidence_value): + if next_tainted and next_tainted["start"] == i: + self.write_value_part(value_parts, evidence_value[start:i], source_index) + + source_index = next_tainted_index + + while next_sensitive and self._contains(next_tainted, next_sensitive): + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + if redaction_start == redaction_end: + self.write_redacted_value_part(value_parts, 0) + else: + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + source_index, + redaction_start, + redaction_end, + ) + next_sensitive = sensitive.pop(0) if sensitive else None + + if next_sensitive and self._intersects(next_sensitive, next_tainted): + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + source_index, + redaction_start, + redaction_end, + ) + + entries = self._remove(next_sensitive, next_tainted) + next_sensitive = entries[0] if entries else None + + if source_index < len(sources): + if not sources[source_index].redacted and self.is_sensible_source(sources[source_index]): + redacted_sources.append(source_index) + sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)] + sources[source_index].redacted = True + + if source_index in redacted_sources: + part_value = evidence_value[i : i + (next_tainted["end"] - next_tainted["start"])] + + self.write_redacted_value_part( + value_parts, + len(part_value), + source_index, + part_value, + sources[source_index], + redacted_sources_context.get(source_index), + self.is_sensible_source(sources[source_index]), + ) + redacted_sources_context[source_index] = [] + else: + substring_end = min(next_tainted["end"], len(evidence_value)) + self.write_value_part( + value_parts, evidence_value[next_tainted["start"] : substring_end], source_index + ) + + start = i + (next_tainted["end"] - next_tainted["start"]) + i = start - 1 + next_tainted = tainted_ranges.pop(0) if tainted_ranges else None + next_tainted_index += 1 + source_index = None + continue + elif next_sensitive and next_sensitive["start"] == i: + self.write_value_part(value_parts, evidence_value[start:i], source_index) + if next_tainted and self._intersects(next_sensitive, next_tainted): + source_index = next_tainted_index + + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + next_tainted_index, + redaction_start, + redaction_end, + ) + + entries = self._remove(next_sensitive, next_tainted) + next_sensitive = entries[0] if entries else None + + length = next_sensitive["end"] - next_sensitive["start"] + self.write_redacted_value_part(value_parts, length) + + start = i + length + i = start - 1 + next_sensitive = sensitive.pop(0) if sensitive else None + continue + i += 1 + if start < len(evidence_value): + self.write_value_part(value_parts, evidence_value[start:]) + + return {"redacted_value_parts": value_parts, "redacted_sources": redacted_sources} + + def redact_source(self, sources, redacted_sources, redacted_sources_context, source_index, start, end): + if source_index is not None: + if not sources[source_index].redacted: + redacted_sources.append(source_index) + sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)] + sources[source_index].redacted = True + + if source_index not in redacted_sources_context.keys(): + redacted_sources_context[source_index] = [] + + redacted_sources_context[source_index].append({"start": start, "end": end}) + + def write_value_part(self, value_parts, value, source_index=None): + if value: + if source_index is not None: + value_parts.append({"value": value, "source": source_index}) + else: + value_parts.append({"value": value}) + + def write_redacted_value_part( + self, + value_parts, + length, + source_index=None, + part_value=None, + source=None, + source_redaction_context=None, + is_sensible_source=False, + ): + if source_index is not None: + placeholder = source.pattern if part_value and part_value in source.value else "*" * length + + if is_sensible_source: + value_parts.append({"redacted": True, "source": source_index, "pattern": placeholder}) + else: + _value = part_value + deduped_source_redaction_contexts = [] + + for _source_redaction_context in source_redaction_context: + if _source_redaction_context not in deduped_source_redaction_contexts: + deduped_source_redaction_contexts.append(_source_redaction_context) + + offset = 0 + for _source_redaction_context in deduped_source_redaction_contexts: + if _source_redaction_context["start"] > 0: + value_parts.append( + {"source": source_index, "value": _value[: _source_redaction_context["start"] - offset]} + ) + _value = _value[_source_redaction_context["start"] - offset :] + offset = _source_redaction_context["start"] + + sensitive_start = _source_redaction_context["start"] - offset + if sensitive_start < 0: + sensitive_start = 0 + sensitive = _value[sensitive_start : _source_redaction_context["end"] - offset] + index_of_part_value_in_pattern = source.value.find(sensitive) + pattern = ( + placeholder[index_of_part_value_in_pattern : index_of_part_value_in_pattern + len(sensitive)] + if index_of_part_value_in_pattern > -1 + else placeholder[_source_redaction_context["start"] : _source_redaction_context["end"]] + ) + + value_parts.append({"redacted": True, "source": source_index, "pattern": pattern}) + _value = _value[len(pattern) :] + offset += len(pattern) + if _value: + value_parts.append({"source": source_index, "value": _value}) + + else: + value_parts.append({"redacted": True}) + + def set_redaction_patterns(self, redaction_name_pattern=None, redaction_value_pattern=None): + if redaction_name_pattern: + try: + self._name_pattern = re.compile(redaction_name_pattern, re.IGNORECASE | re.MULTILINE) + except re.error: + log.warning("Redaction name pattern is not valid") + + if redaction_value_pattern: + try: + self._value_pattern = re.compile(redaction_value_pattern, re.IGNORECASE | re.MULTILINE) + except re.error: + log.warning("Redaction value pattern is not valid") + + +sensitive_handler = SensitiveHandler() diff --git a/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py new file mode 100644 index 00000000000..57dccc03db1 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py @@ -0,0 +1,19 @@ +import re + +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) + +_INSIDE_QUOTES_REGEXP = re.compile(r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s*(.*)") +COMMAND_PATTERN = r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s(.*)" +pattern = re.compile(COMMAND_PATTERN, re.IGNORECASE | re.MULTILINE) + + +def command_injection_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None): + regex_result = pattern.search(evidence.value) + if regex_result and len(regex_result.groups()) > 0: + start = regex_result.start(1) + end = regex_result.end(1) + return [{"start": start, "end": end}] + return [] diff --git a/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py new file mode 100644 index 00000000000..3b254781351 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py @@ -0,0 +1,17 @@ +from ddtrace.appsec._iast.constants import HEADER_NAME_VALUE_SEPARATOR +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) + + +def header_injection_sensitive_analyzer(evidence, name_pattern, value_pattern): + evidence_value = evidence.value + sections = evidence_value.split(HEADER_NAME_VALUE_SEPARATOR) + header_name = sections[0] + header_value = HEADER_NAME_VALUE_SEPARATOR.join(sections[1:]) + + if name_pattern.search(header_name) or value_pattern.search(header_value): + return [{"start": len(header_name) + len(HEADER_NAME_VALUE_SEPARATOR), "end": len(evidence_value)}] + + return [] diff --git a/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py new file mode 100644 index 00000000000..04ee4ecb6c8 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py @@ -0,0 +1,34 @@ +import re + +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) +AUTHORITY = r"^(?:[^:]+:)?//([^@]+)@" +QUERY_FRAGMENT = r"[?#&]([^=&;]+)=([^?#&]+)" +pattern = re.compile(f"({AUTHORITY})|({QUERY_FRAGMENT})", re.IGNORECASE | re.MULTILINE) + + +def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None): + try: + ranges = [] + regex_result = pattern.search(evidence.value) + + while regex_result is not None: + if isinstance(regex_result.group(1), str): + end = regex_result.start() + (len(regex_result.group(0)) - 1) + start = end - len(regex_result.group(1)) + ranges.append({"start": start, "end": end}) + + if isinstance(regex_result.group(3), str): + end = regex_result.start() + len(regex_result.group(0)) + start = end - len(regex_result.group(3)) + ranges.append({"start": start, "end": end}) + + regex_result = pattern.search(evidence.value, regex_result.end()) + + return ranges + except Exception as e: + log.debug(e) + + return [] diff --git a/ddtrace/appsec/_iast/_taint_tracking/__init__.py b/ddtrace/appsec/_iast/_taint_tracking/__init__.py index 435420af933..b155e7c08a9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/__init__.py +++ b/ddtrace/appsec/_iast/_taint_tracking/__init__.py @@ -177,12 +177,15 @@ def get_tainted_ranges(pyobject: Any) -> Tuple: def taint_ranges_as_evidence_info(pyobject: Any) -> Tuple[List[Dict[str, Union[Any, int]]], List[Source]]: + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. value_parts = [] - sources = [] + sources = list() current_pos = 0 tainted_ranges = get_tainted_ranges(pyobject) if not len(tainted_ranges): - return ([{"value": pyobject}], []) + return ([{"value": pyobject}], list()) for _range in tainted_ranges: if _range.start > current_pos: @@ -192,7 +195,10 @@ def taint_ranges_as_evidence_info(pyobject: Any) -> Tuple[List[Dict[str, Union[A sources.append(_range.source) value_parts.append( - {"value": pyobject[_range.start : _range.start + _range.length], "source": sources.index(_range.source)} + { + "value": pyobject[_range.start : _range.start + _range.length], + "source": sources.index(_range.source), + } ) current_pos = _range.start + _range.length diff --git a/ddtrace/appsec/_iast/_utils.py b/ddtrace/appsec/_iast/_utils.py index e2e26e291fa..7272abb9016 100644 --- a/ddtrace/appsec/_iast/_utils.py +++ b/ddtrace/appsec/_iast/_utils.py @@ -1,11 +1,8 @@ -import json import re import string import sys from typing import TYPE_CHECKING # noqa:F401 -import attr - from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config @@ -41,6 +38,9 @@ def _is_iast_enabled(): def _has_to_scrub(s): # type: (str) -> bool + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. global _SOURCE_NAME_SCRUB global _SOURCE_VALUE_SCRUB global _SOURCE_NUMERAL_SCRUB @@ -58,6 +58,9 @@ def _has_to_scrub(s): # type: (str) -> bool def _is_numeric(s): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. global _SOURCE_NUMERAL_SCRUB if _SOURCE_NUMERAL_SCRUB is None: @@ -71,17 +74,26 @@ def _is_numeric(s): def _scrub(s, has_range=False): # type: (str, bool) -> str + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if has_range: return "".join([_REPLACEMENTS[i % _LEN_REPLACEMENTS] for i in range(len(s))]) return "*" * len(s) def _is_evidence_value_parts(value): # type: (Any) -> bool + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. return isinstance(value, (set, list)) def _scrub_get_tokens_positions(text, tokens): # type: (str, Set[str]) -> List[Tuple[int, int]] + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. token_positions = [] for token in tokens: @@ -93,20 +105,6 @@ def _scrub_get_tokens_positions(text, tokens): return token_positions -def _iast_report_to_str(data): - from ._taint_tracking import OriginType - from ._taint_tracking import origin_to_str - - class OriginTypeEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, OriginType): - # if the obj is uuid, we simply return the value of uuid - return origin_to_str(obj) - return json.JSONEncoder.default(self, obj) - - return json.dumps(attr.asdict(data, filter=lambda attr, x: x is not None), cls=OriginTypeEncoder) - - def _get_patched_code(module_path, module_name): # type: (str, str) -> str """ Print the patched code to stdout, for debugging purposes. diff --git a/ddtrace/appsec/_iast/constants.py b/ddtrace/appsec/_iast/constants.py index ff165af405f..17981bccbcc 100644 --- a/ddtrace/appsec/_iast/constants.py +++ b/ddtrace/appsec/_iast/constants.py @@ -25,6 +25,8 @@ EVIDENCE_HEADER_INJECTION = "HEADER_INJECTION" EVIDENCE_SSRF = "SSRF" +HEADER_NAME_VALUE_SEPARATOR = ": " + MD5_DEF = "md5" SHA1_DEF = "sha1" diff --git a/ddtrace/appsec/_iast/processor.py b/ddtrace/appsec/_iast/processor.py index 8deee2a1846..8d0adffdb90 100644 --- a/ddtrace/appsec/_iast/processor.py +++ b/ddtrace/appsec/_iast/processor.py @@ -16,6 +16,7 @@ from ._metrics import _set_span_tag_iast_executed_sink from ._metrics import _set_span_tag_iast_request_tainted from ._utils import _is_iast_enabled +from .reporter import IastSpanReporter if TYPE_CHECKING: # pragma: no cover @@ -75,14 +76,14 @@ def on_span_finish(self, span): return from ._taint_tracking import reset_context # noqa: F401 - from ._utils import _iast_report_to_str span.set_metric(IAST.ENABLED, 1.0) - data = core.get_item(IAST.CONTEXT_KEY, span=span) + report_data: IastSpanReporter = core.get_item(IAST.CONTEXT_KEY, span=span) # type: ignore - if data: - span.set_tag_str(IAST.JSON, _iast_report_to_str(data)) + if report_data: + report_data.build_and_scrub_value_parts() + span.set_tag_str(IAST.JSON, report_data._to_str()) _asm_manual_keep(span) _set_metric_iast_request_tainted() diff --git a/ddtrace/appsec/_iast/reporter.py b/ddtrace/appsec/_iast/reporter.py index 5a95aa1272d..fa2cc8ae96c 100644 --- a/ddtrace/appsec/_iast/reporter.py +++ b/ddtrace/appsec/_iast/reporter.py @@ -3,17 +3,23 @@ import operator import os from typing import TYPE_CHECKING +from typing import Any +from typing import Dict from typing import List from typing import Set +from typing import Tuple import zlib import attr +from ddtrace.appsec._iast._evidence_redaction import sensitive_handler +from ddtrace.appsec._iast.constants import VULN_INSECURE_HASHING_TYPE +from ddtrace.appsec._iast.constants import VULN_WEAK_CIPHER_TYPE +from ddtrace.appsec._iast.constants import VULN_WEAK_RANDOMNESS -if TYPE_CHECKING: - import Any # noqa:F401 - import Dict # noqa:F401 - import Optional # noqa:F401 + +if TYPE_CHECKING: # pragma: no cover + from typing import Optional # noqa:F401 def _only_if_true(value): @@ -23,9 +29,8 @@ def _only_if_true(value): @attr.s(eq=False, hash=False) class Evidence(object): value = attr.ib(type=str, default=None) # type: Optional[str] - pattern = attr.ib(type=str, default=None) # type: Optional[str] - valueParts = attr.ib(type=list, default=None) # type: Optional[List[Dict[str, Any]]] - redacted = attr.ib(type=bool, default=False, converter=_only_if_true) # type: bool + _ranges = attr.ib(type=dict, default={}) # type: Any + valueParts = attr.ib(type=list, default=None) # type: Any def _valueParts_hash(self): if not self.valueParts: @@ -40,15 +45,10 @@ def _valueParts_hash(self): return _hash def __hash__(self): - return hash((self.value, self.pattern, self._valueParts_hash(), self.redacted)) + return hash((self.value, self._valueParts_hash())) def __eq__(self, other): - return ( - self.value == other.value - and self.pattern == other.pattern - and self._valueParts_hash() == other._valueParts_hash() - and self.redacted == other.redacted - ) + return self.value == other.value and self._valueParts_hash() == other._valueParts_hash() @attr.s(eq=True, hash=True) @@ -69,7 +69,7 @@ def __attrs_post_init__(self): self.hash = zlib.crc32(repr(self).encode()) -@attr.s(eq=True, hash=True) +@attr.s(eq=True, hash=False) class Source(object): origin = attr.ib(type=str) # type: str name = attr.ib(type=str) # type: str @@ -77,11 +77,163 @@ class Source(object): value = attr.ib(type=str, default=None) # type: Optional[str] pattern = attr.ib(type=str, default=None) # type: Optional[str] + def __hash__(self): + """origin & name serve as hashes. This approach aims to mitigate false positives when searching for + identical sources in a list, especially when sources undergo changes. The provided example illustrates how + two sources with different attributes could actually represent the same source. For example: + Source(origin=, name='string1', redacted=False, value="password", pattern=None) + could be the same source as the one below: + Source(origin=, name='string1', redacted=True, value=None, pattern='ab') + :return: + """ + return hash((self.origin, self.name)) + @attr.s(eq=False, hash=False) class IastSpanReporter(object): + """ + Class representing an IAST span reporter. + """ + sources = attr.ib(type=List[Source], factory=list) # type: List[Source] vulnerabilities = attr.ib(type=Set[Vulnerability], factory=set) # type: Set[Vulnerability] + _evidences_with_no_sources = [VULN_INSECURE_HASHING_TYPE, VULN_WEAK_CIPHER_TYPE, VULN_WEAK_RANDOMNESS] - def __hash__(self): + def __hash__(self) -> int: + """ + Computes the hash value of the IAST span reporter. + + Returns: + - int: Hash value. + """ return reduce(operator.xor, (hash(obj) for obj in set(self.sources) | self.vulnerabilities)) + + def taint_ranges_as_evidence_info(self, pyobject: Any) -> Tuple[List[Source], List[Dict]]: + """ + Extracts tainted ranges as evidence information. + + Args: + - pyobject (Any): Python object. + + Returns: + - Tuple[Set[Source], List[Dict]]: Set of Source objects and list of tainted ranges as dictionaries. + """ + from ddtrace.appsec._iast._taint_tracking import get_tainted_ranges + + sources = list() + tainted_ranges = get_tainted_ranges(pyobject) + tainted_ranges_to_dict = list() + if not len(tainted_ranges): + return [], [] + + for _range in tainted_ranges: + source = Source(origin=_range.source.origin, name=_range.source.name, value=_range.source.value) + if source not in sources: + sources.append(source) + + tainted_ranges_to_dict.append( + {"start": _range.start, "end": _range.start + _range.length, "length": _range.length, "source": source} + ) + return sources, tainted_ranges_to_dict + + def add_ranges_to_evidence_and_extract_sources(self, vuln): + sources, tainted_ranges_to_dict = self.taint_ranges_as_evidence_info(vuln.evidence.value) + vuln.evidence._ranges = tainted_ranges_to_dict + for source in sources: + if source not in self.sources: + self.sources = self.sources + [source] + + def _get_source_index(self, sources: List[Source], source: Source) -> int: + i = 0 + for source_ in sources: + if hash(source_) == hash(source): + return i + i += 1 + return -1 + + def build_and_scrub_value_parts(self) -> Dict[str, Any]: + """ + Builds and scrubs value parts of vulnerabilities. + + Returns: + - Dict[str, Any]: Dictionary representation of the IAST span reporter. + """ + for vuln in self.vulnerabilities: + scrubbing_result = sensitive_handler.scrub_evidence( + vuln.type, vuln.evidence, vuln.evidence._ranges, self.sources + ) + if scrubbing_result: + redacted_value_parts = scrubbing_result["redacted_value_parts"] + redacted_sources = scrubbing_result["redacted_sources"] + i = 0 + for source in self.sources: + if i in redacted_sources: + source.value = None + vuln.evidence.valueParts = redacted_value_parts + vuln.evidence.value = None + elif vuln.evidence.value is not None and vuln.type not in self._evidences_with_no_sources: + vuln.evidence.valueParts = self.get_unredacted_value_parts( + vuln.evidence.value, vuln.evidence._ranges, self.sources + ) + vuln.evidence.value = None + return self._to_dict() + + def get_unredacted_value_parts(self, evidence_value: str, ranges: List[dict], sources: List[Any]) -> List[dict]: + """ + Gets unredacted value parts of evidence. + + Args: + - evidence_value (str): Evidence value. + - ranges (List[Dict]): List of tainted ranges. + - sources (List[Any]): List of sources. + + Returns: + - List[Dict]: List of unredacted value parts. + """ + value_parts = [] + from_index = 0 + + for range_ in ranges: + if from_index < range_["start"]: + value_parts.append({"value": evidence_value[from_index : range_["start"]]}) + + source_index = self._get_source_index(sources, range_["source"]) + + value_parts.append( + {"value": evidence_value[range_["start"] : range_["end"]], "source": source_index} # type: ignore[dict-item] + ) + + from_index = range_["end"] + + if from_index < len(evidence_value): + value_parts.append({"value": evidence_value[from_index:]}) + + return value_parts + + def _to_dict(self) -> Dict[str, Any]: + """ + Converts the IAST span reporter to a dictionary. + + Returns: + - Dict[str, Any]: Dictionary representation of the IAST span reporter. + """ + return attr.asdict(self, filter=lambda attr, x: x is not None and attr.name != "_ranges") + + def _to_str(self) -> str: + """ + Converts the IAST span reporter to a JSON string. + + Returns: + - str: JSON representation of the IAST span reporter. + """ + from ._taint_tracking import OriginType + from ._taint_tracking import origin_to_str + + class OriginTypeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, OriginType): + # if the obj is uuid, we simply return the value of uuid + return origin_to_str(obj) + return json.JSONEncoder.default(self, obj) + + return json.dumps(self._to_dict(), cls=OriginTypeEncoder) diff --git a/ddtrace/appsec/_iast/taint_sinks/_base.py b/ddtrace/appsec/_iast/taint_sinks/_base.py index 43dc1f5cb53..7cba289d644 100644 --- a/ddtrace/appsec/_iast/taint_sinks/_base.py +++ b/ddtrace/appsec/_iast/taint_sinks/_base.py @@ -19,7 +19,6 @@ from ..reporter import Evidence from ..reporter import IastSpanReporter from ..reporter import Location -from ..reporter import Source from ..reporter import Vulnerability @@ -89,35 +88,16 @@ def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_num line_number = -1 report = core.get_item(IAST.CONTEXT_KEY, span=span) + vulnerability = Vulnerability( + type=vulnerability_type, + evidence=evidence, + location=Location(path=file_name, line=line_number, spanId=span.span_id), + ) if report: - report.vulnerabilities.add( - Vulnerability( - type=vulnerability_type, - evidence=evidence, - location=Location(path=file_name, line=line_number, spanId=span.span_id), - ) - ) - + report.vulnerabilities.add(vulnerability) else: - report = IastSpanReporter( - vulnerabilities={ - Vulnerability( - type=vulnerability_type, - evidence=evidence, - location=Location(path=file_name, line=line_number, spanId=span.span_id), - ) - } - ) - if sources: - - def cast_value(value): - if isinstance(value, (bytes, bytearray)): - value_decoded = value.decode("utf-8") - else: - value_decoded = value - return value_decoded - - report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources] + report = IastSpanReporter(vulnerabilities={vulnerability}) + report.add_ranges_to_evidence_and_extract_sources(vulnerability) if getattr(cls, "redact_report", False): redacted_report = cls._redacted_report_cache.get( @@ -130,9 +110,10 @@ def cast_value(value): return True @classmethod - def report(cls, evidence_value="", sources=None): - # type: (Union[Text|List[Dict[str, Any]]], Optional[List[Source]]) -> None + def report(cls, evidence_value="", value_parts=None, sources=None): + # type: (Any, Any, Optional[List[Any]]) -> None """Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON""" + # TODO: type of evidence_value will be Text. We wait to finish the redaction refactor. if cls.acquire_quota(): if not tracer or not hasattr(tracer, "current_root_span"): log.debug( @@ -166,11 +147,12 @@ def report(cls, evidence_value="", sources=None): if not cls.is_not_reported(file_name, line_number): return - if _is_evidence_value_parts(evidence_value): - evidence = Evidence(valueParts=evidence_value) + # TODO: This function is deprecated, but we need to migrate all vulnerabilities first before deleting it + if _is_evidence_value_parts(evidence_value) or _is_evidence_value_parts(value_parts): + evidence = Evidence(value=evidence_value, valueParts=value_parts) # Evidence is a string in weak cipher, weak hash and weak randomness elif isinstance(evidence_value, (str, bytes, bytearray)): - evidence = Evidence(value=evidence_value) + evidence = Evidence(value=evidence_value) # type: ignore else: log.debug("Unexpected evidence_value type: %s", type(evidence_value)) evidence = Evidence(value="") @@ -184,11 +166,17 @@ def report(cls, evidence_value="", sources=None): @classmethod def _extract_sensitive_tokens(cls, report): # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. log.debug("Base class VulnerabilityBase._extract_sensitive_tokens called") return {} @classmethod def _get_vulnerability_text(cls, vulnerability): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if vulnerability and vulnerability.evidence.value is not None: return vulnerability.evidence.value @@ -209,6 +197,9 @@ def replace_tokens( vulns_to_tokens, has_range=False, ): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. ret = vuln.evidence.value replaced = False @@ -222,10 +213,16 @@ def replace_tokens( def _custom_edit_valueparts(cls, vuln): # Subclasses could optionally implement this to add further processing to the # vulnerability valueParts + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. return @classmethod def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if not asm_config._iast_redaction_enabled: return report @@ -239,8 +236,8 @@ def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter for source in report.sources: # Join them so we only run the regexps once for each source # joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(source.name) or _has_to_scrub(source.value): - scrubbed = _scrub(source.value, has_range=True) + if _has_to_scrub(source.name) or _has_to_scrub(source.value): # type: ignore + scrubbed = _scrub(source.value, has_range=True) # type: ignore already_scrubbed[source.value] = scrubbed source.redacted = True sources_values_to_scrubbed[source.value] = scrubbed @@ -252,8 +249,6 @@ def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter if vuln.evidence.value is not None: pattern, replaced = cls.replace_tokens(vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source")) if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True vuln.evidence.value = None if vuln.evidence.valueParts is None: diff --git a/ddtrace/appsec/_iast/taint_sinks/command_injection.py b/ddtrace/appsec/_iast/taint_sinks/command_injection.py index 0b11ffd12b0..8f123a2be4c 100644 --- a/ddtrace/appsec/_iast/taint_sinks/command_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/command_injection.py @@ -1,10 +1,7 @@ import os -import re import subprocess # nosec -from typing import TYPE_CHECKING # noqa:F401 -from typing import List # noqa:F401 -from typing import Set # noqa:F401 -from typing import Union # noqa:F401 +from typing import List +from typing import Union from ddtrace.contrib import trace_utils from ddtrace.internal import core @@ -14,30 +11,15 @@ from ..._constants import IAST_SPAN_TAGS from .. import oce from .._metrics import increment_iast_span_metric -from .._utils import _has_to_scrub -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_CMDI from ..constants import VULN_CMDI +from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase -from ._base import _check_positions_contained - - -if TYPE_CHECKING: - from typing import Any # noqa:F401 - from typing import Dict # noqa:F401 - - from ..reporter import IastSpanReporter # noqa:F401 - from ..reporter import Vulnerability # noqa:F401 log = get_logger(__name__) -_INSIDE_QUOTES_REGEXP = re.compile(r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s*(.*)") - -def get_version(): - # type: () -> str +def get_version() -> str: return "" @@ -61,8 +43,7 @@ def patch(): core.dispatch("exploit.prevention.ssrf.patch.urllib") -def unpatch(): - # type: () -> None +def unpatch() -> None: trace_utils.unwrap(os, "system") trace_utils.unwrap(os, "_spawnvef") trace_utils.unwrap(subprocess.Popen, "__init__") @@ -93,151 +74,29 @@ def _iast_cmdi_subprocess_init(wrapped, instance, args, kwargs): @oce.register class CommandInjection(VulnerabilityBase): vulnerability_type = VULN_CMDI - evidence_type = EVIDENCE_CMDI - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(CommandInjection, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text): - # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] - ret = {} # type: Dict[int, Dict[str, Any]] - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - ret[vuln_hash] = { - "tokens": set(_INSIDE_QUOTES_REGEXP.findall(text)), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter - if not asm_config._iast_redaction_enabled: - return report - - # See if there is a match on either any of the sources or value parts of the report - found = False - - for source in report.sources: - # Join them so we only run the regexps once for each source - joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(joined_fields): - found = True - break - - vulns_to_text = {} - - if not found: - # Check the evidence's value/s - for vuln in report.vulnerabilities: - vulnerability_text = cls._get_vulnerability_text(vuln) - if _has_to_scrub(vulnerability_text) or _INSIDE_QUOTES_REGEXP.match(vulnerability_text): - vulns_to_text[vuln] = vulnerability_text - found = True - break + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False + - if not found: - return report - - if not vulns_to_text: - vulns_to_text = {vuln: cls._get_vulnerability_text(vuln) for vuln in report.vulnerabilities} - - # If we're here, some potentially sensitive information was found, we delegate on - # the specific subclass the task of extracting the variable tokens (e.g. literals inside - # quotes for SQL Injection). Note that by just having one potentially sensitive match - # we need to then scrub all the tokens, thus why we do it in two steps instead of one - vulns_to_tokens = cls._extract_sensitive_tokens(vulns_to_text) - - if not vulns_to_tokens: - return report - - all_tokens = set() # type: Set[str] - for _, value_dict in vulns_to_tokens.items(): - all_tokens.update(value_dict["tokens"]) - - # Iterate over all the sources, if one of the tokens match it, redact it - for source in report.sources: - if source.name in "".join(all_tokens) or source.value in "".join(all_tokens): - source.pattern = _scrub(source.value, has_range=True) - source.redacted = True - source.value = None - - # Same for all the evidence values - try: - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - vuln_hash = hash(vuln) - if vuln.evidence.value is not None: - pattern, replaced = cls.replace_tokens( - vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source") - ) - if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True - vuln.evidence.value = None - elif vuln.evidence.valueParts is not None: - idx = 0 - new_value_parts = [] - for part in vuln.evidence.valueParts: - value = part["value"] - part_len = len(value) - part_start = idx - part_end = idx + part_len - pattern_list = [] - - for positions in vulns_to_tokens[vuln_hash]["token_positions"]: - if _check_positions_contained(positions, (part_start, part_end)): - part_scrub_start = max(positions[0] - idx, 0) - part_scrub_end = positions[1] - idx - pattern_list.append(value[:part_scrub_start] + "" + value[part_scrub_end:]) - if part.get("source", False) is not False: - source = report.sources[part["source"]] - if source.redacted: - part["redacted"] = source.redacted - part["pattern"] = source.pattern - del part["value"] - new_value_parts.append(part) - break - else: - part["value"] = "".join(pattern_list) - new_value_parts.append(part) - new_value_parts.append({"redacted": True}) - break - else: - new_value_parts.append(part) - pattern_list.append(value[part_start:part_end]) - break - - idx += part_len - vuln.evidence.valueParts = new_value_parts - except (ValueError, KeyError): - log.debug("an error occurred while redacting cmdi", exc_info=True) - return report - - -def _iast_report_cmdi(shell_args): - # type: (Union[str, List[str]]) -> None +def _iast_report_cmdi(shell_args: Union[str, List[str]]) -> None: report_cmdi = "" from .._metrics import _set_metric_iast_executed_sink - from .._taint_tracking import is_pyobject_tainted - from .._taint_tracking.aspects import join_aspect - - if isinstance(shell_args, (list, tuple)): - for arg in shell_args: - if is_pyobject_tainted(arg): - report_cmdi = join_aspect(" ".join, 1, " ", shell_args) - break - elif is_pyobject_tainted(shell_args): - report_cmdi = shell_args increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, CommandInjection.vulnerability_type) _set_metric_iast_executed_sink(CommandInjection.vulnerability_type) - if report_cmdi: - CommandInjection.report(evidence_value=report_cmdi) + + if AppSecIastSpanProcessor.is_span_analyzed() and CommandInjection.has_quota(): + from .._taint_tracking import is_pyobject_tainted + from .._taint_tracking.aspects import join_aspect + + if isinstance(shell_args, (list, tuple)): + for arg in shell_args: + if is_pyobject_tainted(arg): + report_cmdi = join_aspect(" ".join, 1, " ", shell_args) + break + elif is_pyobject_tainted(shell_args): + report_cmdi = shell_args + + if report_cmdi: + CommandInjection.report(evidence_value=report_cmdi) diff --git a/ddtrace/appsec/_iast/taint_sinks/header_injection.py b/ddtrace/appsec/_iast/taint_sinks/header_injection.py index 6444fec627e..1ce8a52d5e4 100644 --- a/ddtrace/appsec/_iast/taint_sinks/header_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/header_injection.py @@ -1,6 +1,4 @@ import re -from typing import Any -from typing import Dict from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config @@ -13,13 +11,9 @@ from .._patch import set_and_check_module_is_patched from .._patch import set_module_unpatched from .._patch import try_wrap_function_wrapper -from .._utils import _has_to_scrub -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_HEADER_INJECTION +from ..constants import HEADER_NAME_VALUE_SEPARATOR from ..constants import VULN_HEADER_INJECTION -from ..reporter import IastSpanReporter -from ..reporter import Vulnerability +from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase @@ -109,53 +103,9 @@ def _iast_h(wrapped, instance, args, kwargs): @oce.register class HeaderInjection(VulnerabilityBase): vulnerability_type = VULN_HEADER_INJECTION - evidence_type = EVIDENCE_HEADER_INJECTION - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(HeaderInjection, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text: Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]]: - ret = {} # type: Dict[int, Dict[str, Any]] - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - ret[vuln_hash] = { - "tokens": set(_HEADERS_NAME_REGEXP.findall(text) + _HEADERS_VALUE_REGEXP.findall(text)), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report: IastSpanReporter) -> IastSpanReporter: - """TODO: this algorithm is not working as expected, it needs to be fixed.""" - if not asm_config._iast_redaction_enabled: - return report - - try: - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - if vuln.type == VULN_HEADER_INJECTION: - scrub_the_following_elements = False - new_value_parts = [] - for value_part in vuln.evidence.valueParts: - if _HEADERS_VALUE_REGEXP.match(value_part["value"]) or scrub_the_following_elements: - value_part["pattern"] = _scrub(value_part["value"], has_range=True) - value_part["redacted"] = True - del value_part["value"] - elif _has_to_scrub(value_part["value"]) or _HEADERS_NAME_REGEXP.match(value_part["value"]): - scrub_the_following_elements = True - new_value_parts.append(value_part) - vuln.evidence.valueParts = new_value_parts - except (ValueError, KeyError): - log.debug("an error occurred while redacting cmdi", exc_info=True) - return report + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False def _iast_report_header_injection(headers_args) -> None: @@ -180,6 +130,7 @@ def _iast_report_header_injection(headers_args) -> None: increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, HeaderInjection.vulnerability_type) _set_metric_iast_executed_sink(HeaderInjection.vulnerability_type) - if is_pyobject_tainted(header_name) or is_pyobject_tainted(header_value): - header_evidence = add_aspect(add_aspect(header_name, ": "), header_value) - HeaderInjection.report(evidence_value=header_evidence) + if AppSecIastSpanProcessor.is_span_analyzed() and HeaderInjection.has_quota(): + if is_pyobject_tainted(header_name) or is_pyobject_tainted(header_value): + header_evidence = add_aspect(add_aspect(header_name, HEADER_NAME_VALUE_SEPARATOR), header_value) + HeaderInjection.report(evidence_value=header_evidence) diff --git a/ddtrace/appsec/_iast/taint_sinks/path_traversal.py b/ddtrace/appsec/_iast/taint_sinks/path_traversal.py index c7618000d05..e6fde3b40e2 100644 --- a/ddtrace/appsec/_iast/taint_sinks/path_traversal.py +++ b/ddtrace/appsec/_iast/taint_sinks/path_traversal.py @@ -8,7 +8,6 @@ from .._metrics import increment_iast_span_metric from .._patch import set_and_check_module_is_patched from .._patch import set_module_unpatched -from ..constants import EVIDENCE_PATH_TRAVERSAL from ..constants import VULN_PATH_TRAVERSAL from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase @@ -20,15 +19,6 @@ @oce.register class PathTraversal(VulnerabilityBase): vulnerability_type = VULN_PATH_TRAVERSAL - evidence_type = EVIDENCE_PATH_TRAVERSAL - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(PathTraversal, cls).report(evidence_value=evidence_value, sources=sources) def get_version(): diff --git a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py index ee7bcfb2f8f..68d5a289c01 100644 --- a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py @@ -32,9 +32,10 @@ class SqlInjection(VulnerabilityBase): @classmethod def report(cls, evidence_value=None, sources=None): + value_parts = [] if isinstance(evidence_value, (str, bytes, bytearray)): - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(SqlInjection, cls).report(evidence_value=evidence_value, sources=sources) + value_parts, sources = taint_ranges_as_evidence_info(evidence_value) + super(SqlInjection, cls).report(evidence_value=evidence_value, value_parts=value_parts, sources=sources) @classmethod def _extract_sensitive_tokens(cls, vulns_to_text): diff --git a/ddtrace/appsec/_iast/taint_sinks/ssrf.py b/ddtrace/appsec/_iast/taint_sinks/ssrf.py index f114998605a..7a070cf5425 100644 --- a/ddtrace/appsec/_iast/taint_sinks/ssrf.py +++ b/ddtrace/appsec/_iast/taint_sinks/ssrf.py @@ -1,176 +1,33 @@ -import re -from typing import Callable # noqa:F401 -from typing import Dict # noqa:F401 -from typing import Set # noqa:F401 +from typing import Callable from ddtrace.internal.logger import get_logger -from ddtrace.settings.asm import config as asm_config from ..._constants import IAST_SPAN_TAGS from .. import oce from .._metrics import increment_iast_span_metric -from .._utils import _has_to_scrub -from .._utils import _is_iast_enabled -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_SSRF from ..constants import VULN_SSRF -from ..constants import VULNERABILITY_TOKEN_TYPE from ..processor import AppSecIastSpanProcessor -from ..reporter import IastSpanReporter # noqa:F401 -from ..reporter import Vulnerability from ._base import VulnerabilityBase -from ._base import _check_positions_contained log = get_logger(__name__) -_AUTHORITY_REGEXP = re.compile(r"(?:\/\/([^:@\/]+)(?::([^@\/]+))?@).*") -_QUERY_FRAGMENT_REGEXP = re.compile(r"[?#&]([^=&;]+)=(?P[^?#&]+)") - - @oce.register class SSRF(VulnerabilityBase): vulnerability_type = VULN_SSRF - evidence_type = EVIDENCE_SSRF - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if not _is_iast_enabled(): - return - - from .._taint_tracking import taint_ranges_as_evidence_info - - if isinstance(evidence_value, (str, bytes, bytearray)): - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(SSRF, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text: Dict[Vulnerability, str]) -> VULNERABILITY_TOKEN_TYPE: - ret = {} # type: VULNERABILITY_TOKEN_TYPE - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - authority = [] - authority_found = _AUTHORITY_REGEXP.findall(text) - if authority_found: - authority = list(authority_found[0]) - query = [value for param, value in _QUERY_FRAGMENT_REGEXP.findall(text)] - ret[vuln_hash] = { - "tokens": set(authority + query), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter - if not asm_config._iast_redaction_enabled: - return report - - # See if there is a match on either any of the sources or value parts of the report - found = False - - for source in report.sources: - # Join them so we only run the regexps once for each source - joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(joined_fields): - found = True - break - - vulns_to_text = {} - - if not found: - # Check the evidence's value/s - for vuln in report.vulnerabilities: - vulnerability_text = cls._get_vulnerability_text(vuln) - if _has_to_scrub(vulnerability_text) or _AUTHORITY_REGEXP.match(vulnerability_text): - vulns_to_text[vuln] = vulnerability_text - found = True - break - - if not found: - return report - - if not vulns_to_text: - vulns_to_text = {vuln: cls._get_vulnerability_text(vuln) for vuln in report.vulnerabilities} - - # If we're here, some potentially sensitive information was found, we delegate on - # the specific subclass the task of extracting the variable tokens (e.g. literals inside - # quotes for SQL Injection). Note that by just having one potentially sensitive match - # we need to then scrub all the tokens, thus why we do it in two steps instead of one - vulns_to_tokens = cls._extract_sensitive_tokens(vulns_to_text) - - if not vulns_to_tokens: - return report - - all_tokens = set() # type: Set[str] - for _, value_dict in vulns_to_tokens.items(): - all_tokens.update(value_dict["tokens"]) - - # Iterate over all the sources, if one of the tokens match it, redact it - for source in report.sources: - if source.name in "".join(all_tokens) or source.value in "".join(all_tokens): - source.pattern = _scrub(source.value, has_range=True) - source.redacted = True - source.value = None - - # Same for all the evidence values - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - vuln_hash = hash(vuln) - if vuln.evidence.value is not None: - pattern, replaced = cls.replace_tokens(vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source")) - if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True - vuln.evidence.value = None - elif vuln.evidence.valueParts is not None: - idx = 0 - new_value_parts = [] - for part in vuln.evidence.valueParts: - value = part["value"] - part_len = len(value) - part_start = idx - part_end = idx + part_len - pattern_list = [] - - for positions in vulns_to_tokens[vuln_hash]["token_positions"]: - if _check_positions_contained(positions, (part_start, part_end)): - part_scrub_start = max(positions[0] - idx, 0) - part_scrub_end = positions[1] - idx - pattern_list.append(value[:part_scrub_start] + "" + value[part_scrub_end:]) - if part.get("source", False) is not False: - source = report.sources[part["source"]] - if source.redacted: - part["redacted"] = source.redacted - part["pattern"] = source.pattern - del part["value"] - new_value_parts.append(part) - break - else: - part["value"] = "".join(pattern_list) - new_value_parts.append(part) - new_value_parts.append({"redacted": True}) - break - else: - new_value_parts.append(part) - pattern_list.append(value[part_start:part_end]) - break - - idx += part_len - vuln.evidence.valueParts = new_value_parts - return report + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False def _iast_report_ssrf(func: Callable, *args, **kwargs): - from .._metrics import _set_metric_iast_executed_sink - report_ssrf = kwargs.get("url", False) - increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SSRF.vulnerability_type) - _set_metric_iast_executed_sink(SSRF.vulnerability_type) if report_ssrf: + from .._metrics import _set_metric_iast_executed_sink + + _set_metric_iast_executed_sink(SSRF.vulnerability_type) + increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SSRF.vulnerability_type) if AppSecIastSpanProcessor.is_span_analyzed() and SSRF.has_quota(): try: from .._taint_tracking import is_pyobject_tainted diff --git a/ddtrace/appsec/_metrics.py b/ddtrace/appsec/_metrics.py index 28644978a0f..28d712cebf7 100644 --- a/ddtrace/appsec/_metrics.py +++ b/ddtrace/appsec/_metrics.py @@ -105,6 +105,21 @@ def _set_waf_request_metrics(*args): 1.0, tags=tags_request, ) + rasp = result["rasp"] + if rasp["called"]: + for t, n in [("eval", "rasp.rule.eval"), ("match", "rasp.rule.match"), ("timeout", "rasp.timeout")]: + for rule_type, value in rasp[t].items(): + if value: + telemetry.telemetry_writer.add_count_metric( + TELEMETRY_NAMESPACE_TAG_APPSEC, + n, + float(value), + tags=( + ("rule_type", rule_type), + ("waf_version", DDWAF_VERSION), + ), + ) + except Exception: log.warning("Error reporting ASM WAF requests metrics", exc_info=True) finally: diff --git a/ddtrace/appsec/_processor.py b/ddtrace/appsec/_processor.py index 4d98ab486b4..a3d0518b6a4 100644 --- a/ddtrace/appsec/_processor.py +++ b/ddtrace/appsec/_processor.py @@ -260,7 +260,12 @@ def waf_callable(custom_data=None, **kwargs): _asm_request_context.call_waf_callback({"REQUEST_HTTP_IP": None}) def _waf_action( - self, span: Span, ctx: ddwaf_context_capsule, custom_data: Optional[Dict[str, Any]] = None, **kwargs + self, + span: Span, + ctx: ddwaf_context_capsule, + custom_data: Optional[Dict[str, Any]] = None, + crop_trace: Optional[str] = None, + rule_type: Optional[str] = None, ) -> Optional[DDWaf_result]: """ Call the `WAF` with the given parameters. If `custom_data_names` is specified as @@ -327,7 +332,7 @@ def _waf_action( from ddtrace.appsec._exploit_prevention.stack_traces import report_stack stack_trace_id = parameters["stack_id"] - report_stack("exploit detected", span, kwargs.get("crop_trace"), stack_id=stack_trace_id) + report_stack("exploit detected", span, crop_trace, stack_id=stack_trace_id) for rule in waf_results.data: rule[EXPLOIT_PREVENTION.STACK_TRACE_ID] = stack_trace_id @@ -335,7 +340,11 @@ def _waf_action( log.debug("[DDAS-011-00] ASM In-App WAF returned: %s. Timeout %s", waf_results.data, waf_results.timeout) _asm_request_context.set_waf_telemetry_results( - self._ddwaf.info.version, bool(waf_results.data), bool(blocked), waf_results.timeout + self._ddwaf.info.version, + bool(waf_results.data), + bool(blocked), + waf_results.timeout, + rule_type, ) if blocked: core.set_item(WAF_CONTEXT_NAMES.BLOCKED, blocked, span=span) diff --git a/ddtrace/internal/flare.py b/ddtrace/internal/flare.py index 9a11223b221..7cf850e7656 100644 --- a/ddtrace/internal/flare.py +++ b/ddtrace/internal/flare.py @@ -1,4 +1,5 @@ import binascii +import dataclasses import io import json import logging @@ -7,9 +8,7 @@ import pathlib import shutil import tarfile -from typing import Any from typing import Dict -from typing import List from typing import Optional from typing import Tuple @@ -19,7 +18,7 @@ from ddtrace.internal.utils.http import get_connection -TRACER_FLARE_DIRECTORY = pathlib.Path("tracer_flare") +TRACER_FLARE_DIRECTORY = "tracer_flare" TRACER_FLARE_TAR = pathlib.Path("tracer_flare.tar") TRACER_FLARE_ENDPOINT = "/tracer_flare/v1" TRACER_FLARE_FILE_HANDLER_NAME = "tracer_flare_file_handler" @@ -29,111 +28,99 @@ log = get_logger(__name__) +@dataclasses.dataclass +class FlareSendRequest: + case_id: str + hostname: str + email: str + source: str = "tracer_python" + + class Flare: - def __init__(self, timeout_sec: int = DEFAULT_TIMEOUT_SECONDS): - self.original_log_level = 0 # NOTSET - self.timeout = timeout_sec + def __init__(self, timeout_sec: int = DEFAULT_TIMEOUT_SECONDS, flare_dir: str = TRACER_FLARE_DIRECTORY): + self.original_log_level: int = logging.NOTSET + self.timeout: int = timeout_sec + self.flare_dir: pathlib.Path = pathlib.Path(flare_dir) self.file_handler: Optional[RotatingFileHandler] = None - def prepare(self, configs: List[dict]): + def prepare(self, log_level: str): """ Update configurations to start sending tracer logs to a file to be sent in a flare later. """ - if not os.path.exists(TRACER_FLARE_DIRECTORY): - try: - os.makedirs(TRACER_FLARE_DIRECTORY) - log.info("Tracer logs will now be sent to the %s directory", TRACER_FLARE_DIRECTORY) - except Exception as e: - log.error("Failed to create %s directory: %s", TRACER_FLARE_DIRECTORY, e) - return - for agent_config in configs: - # AGENT_CONFIG is currently being used for multiple purposes - # We only want to prepare for a tracer flare if the config name - # starts with 'flare-log-level' - if not agent_config.get("name", "").startswith("flare-log-level"): - return + try: + self.flare_dir.mkdir(exist_ok=True) + except Exception as e: + log.error("Failed to create %s directory: %s", self.flare_dir, e) + return + + flare_log_level_int = logging.getLevelName(log_level) + if type(flare_log_level_int) != int: + raise TypeError("Invalid log level provided: %s", log_level) - # Validate the flare log level - flare_log_level = agent_config.get("config", {}).get("log_level").upper() - flare_log_level_int = logging.getLevelName(flare_log_level) - if type(flare_log_level_int) != int: - raise TypeError("Invalid log level provided: %s", flare_log_level_int) - - ddlogger = get_logger("ddtrace") - pid = os.getpid() - flare_file_path = TRACER_FLARE_DIRECTORY / pathlib.Path(f"tracer_python_{pid}.log") - self.original_log_level = ddlogger.level - - # Set the logger level to the more verbose between original and flare - # We do this valid_original_level check because if the log level is NOTSET, the value is 0 - # which is the minimum value. In this case, we just want to use the flare level, but still - # retain the original state as NOTSET/0 - valid_original_level = 100 if self.original_log_level == 0 else self.original_log_level - logger_level = min(valid_original_level, flare_log_level_int) - ddlogger.setLevel(logger_level) - self.file_handler = _add_file_handler( - ddlogger, flare_file_path.__str__(), flare_log_level, TRACER_FLARE_FILE_HANDLER_NAME - ) - - # Create and add config file - self._generate_config_file(pid) - - def send(self, configs: List[Any]): + ddlogger = get_logger("ddtrace") + pid = os.getpid() + flare_file_path = self.flare_dir / f"tracer_python_{pid}.log" + self.original_log_level = ddlogger.level + + # Set the logger level to the more verbose between original and flare + # We do this valid_original_level check because if the log level is NOTSET, the value is 0 + # which is the minimum value. In this case, we just want to use the flare level, but still + # retain the original state as NOTSET/0 + valid_original_level = ( + logging.CRITICAL if self.original_log_level == logging.NOTSET else self.original_log_level + ) + logger_level = min(valid_original_level, flare_log_level_int) + ddlogger.setLevel(logger_level) + self.file_handler = _add_file_handler( + ddlogger, flare_file_path.__str__(), flare_log_level_int, TRACER_FLARE_FILE_HANDLER_NAME + ) + + # Create and add config file + self._generate_config_file(pid) + + def send(self, flare_send_req: FlareSendRequest): """ Revert tracer flare configurations back to original state before sending the flare. """ - for agent_task in configs: - # AGENT_TASK is currently being used for multiple purposes - # We only want to generate the tracer flare if the task_type is - # 'tracer_flare' - if type(agent_task) != dict or agent_task.get("task_type") != "tracer_flare": - continue - args = agent_task.get("args", {}) - - self.revert_configs() - - # We only want the flare to be sent once, even if there are - # multiple tracer instances - lock_path = TRACER_FLARE_DIRECTORY / TRACER_FLARE_LOCK - if not os.path.exists(lock_path): - try: - open(lock_path, "w").close() - except Exception as e: - log.error("Failed to create %s file", lock_path) - raise e - data = { - "case_id": args.get("case_id"), - "source": "tracer_python", - "hostname": args.get("hostname"), - "email": args.get("user_handle"), - } - try: - client = get_connection(config._trace_agent_url, timeout=self.timeout) - headers, body = self._generate_payload(data) - client.request("POST", TRACER_FLARE_ENDPOINT, body, headers) - response = client.getresponse() - if response.status == 200: - log.info("Successfully sent the flare") - else: - log.error( - "Upload failed with %s status code:(%s) %s", - response.status, - response.reason, - response.read().decode(), - ) - except Exception as e: - log.error("Failed to send tracer flare") - raise e - finally: - client.close() - # Clean up files regardless of success/failure - self.clean_up_files() - return + self.revert_configs() + + # We only want the flare to be sent once, even if there are + # multiple tracer instances + lock_path = self.flare_dir / TRACER_FLARE_LOCK + if not os.path.exists(lock_path): + try: + open(lock_path, "w").close() + except Exception as e: + log.error("Failed to create %s file", lock_path) + raise e + try: + client = get_connection(config._trace_agent_url, timeout=self.timeout) + headers, body = self._generate_payload(flare_send_req.__dict__) + client.request("POST", TRACER_FLARE_ENDPOINT, body, headers) + response = client.getresponse() + if response.status == 200: + log.info("Successfully sent the flare to Zendesk ticket %s", flare_send_req.case_id) + else: + log.error( + "Tracer flare upload to Zendesk ticket %s failed with %s status code:(%s) %s", + flare_send_req.case_id, + response.status, + response.reason, + response.read().decode(), + ) + except Exception as e: + log.error("Failed to send tracer flare to Zendesk ticket %s", flare_send_req.case_id) + raise e + finally: + client.close() + # Clean up files regardless of success/failure + self.clean_up_files() + return def _generate_config_file(self, pid: int): - config_file = TRACER_FLARE_DIRECTORY / pathlib.Path(f"tracer_config_{pid}.json") + config_file = self.flare_dir / f"tracer_config_{pid}.json" try: with open(config_file, "w") as f: tracer_configs = { @@ -162,8 +149,7 @@ def revert_configs(self): def _generate_payload(self, params: Dict[str, str]) -> Tuple[dict, bytes]: tar_stream = io.BytesIO() with tarfile.open(fileobj=tar_stream, mode="w") as tar: - for file_name in os.listdir(TRACER_FLARE_DIRECTORY): - flare_file_name = TRACER_FLARE_DIRECTORY / pathlib.Path(file_name) + for flare_file_name in self.flare_dir.iterdir(): tar.add(flare_file_name) tar_stream.seek(0) @@ -197,6 +183,6 @@ def _get_valid_logger_level(self, flare_log_level: int) -> int: def clean_up_files(self): try: - shutil.rmtree(TRACER_FLARE_DIRECTORY) + shutil.rmtree(self.flare_dir) except Exception as e: log.warning("Failed to clean up tracer flare files: %s", e) diff --git a/tests/appsec/contrib_appsec/utils.py b/tests/appsec/contrib_appsec/utils.py index dae02eb8f21..1a193b47a04 100644 --- a/tests/appsec/contrib_appsec/utils.py +++ b/tests/appsec/contrib_appsec/utils.py @@ -1186,8 +1186,11 @@ def test_stream_response( def test_exploit_prevention( self, interface, root_span, get_tag, asm_enabled, ep_enabled, endpoint, parameters, rule, top_functions ): + from unittest.mock import patch as mock_patch + from ddtrace.appsec._common_module_patches import patch_common_modules from ddtrace.appsec._common_module_patches import unpatch_common_modules + from ddtrace.appsec._metrics import DDWAF_VERSION from ddtrace.contrib.requests import patch as patch_requests from ddtrace.contrib.requests import unpatch as unpatch_requests from ddtrace.ext import http @@ -1196,7 +1199,7 @@ def test_exploit_prevention( patch_requests() with override_global_config(dict(_asm_enabled=asm_enabled, _ep_enabled=ep_enabled)), override_env( dict(DD_APPSEC_RULES=rules.RULES_EXPLOIT_PREVENTION) - ): + ), mock_patch("ddtrace.internal.telemetry.metrics_namespaces.MetricNamespace.add_metric") as mocked: patch_common_modules() self.update_tracer(interface) response = interface.client.get(f"/rasp/{endpoint}/?{parameters}") @@ -1212,6 +1215,20 @@ def test_exploit_prevention( assert any( function.endswith(top_function) for top_function in top_functions ), f"unknown top function {function}" + # assert mocked.call_args_list == [] + telemetry_calls = { + (c.__name__, f"{ns}.{nm}", t): v for (c, ns, nm, v, t), _ in mocked.call_args_list + } + assert ( + "CountMetric", + "appsec.rasp.rule.match", + (("rule_type", endpoint), ("waf_version", DDWAF_VERSION)), + ) in telemetry_calls + assert ( + "CountMetric", + "appsec.rasp.rule.eval", + (("rule_type", endpoint), ("waf_version", DDWAF_VERSION)), + ) in telemetry_calls else: assert get_triggers(root_span()) is None assert self.check_for_stack_trace(root_span) == [] diff --git a/tests/appsec/iast/taint_sinks/test_command_injection.py b/tests/appsec/iast/taint_sinks/test_command_injection.py index 394a1a5ef4d..0100756dd41 100644 --- a/tests/appsec/iast/taint_sinks/test_command_injection.py +++ b/tests/appsec/iast/taint_sinks/test_command_injection.py @@ -40,12 +40,11 @@ def setup(): def test_ossystem(tracer, iast_span_defaults): with override_global_config(dict(_iast_enabled=True)): patch() - _BAD_DIR = "forbidden_dir/" + _BAD_DIR = "mytest/folder/" _BAD_DIR = taint_pyobject( pyobject=_BAD_DIR, source_name="test_ossystem", source_value=_BAD_DIR, - source_origin=OriginType.PARAMETER, ) assert is_pyobject_tainted(_BAD_DIR) with tracer.trace("ossystem_test"): @@ -54,26 +53,26 @@ def test_ossystem(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + data = span_report.build_and_scrub_value_parts() + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_ossystem" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_ossystem" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_ossystem", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_communicate(tracer, iast_span_defaults): @@ -94,26 +93,27 @@ def test_communicate(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_communicate" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_communicate" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_communicate", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_run(tracer, iast_span_defaults): @@ -132,26 +132,27 @@ def test_run(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_run" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_run" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_run", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_popen_wait(tracer, iast_span_defaults): @@ -171,26 +172,27 @@ def test_popen_wait(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_popen_wait" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_popen_wait" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_popen_wait", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_popen_wait_shell_true(tracer, iast_span_defaults): @@ -210,26 +212,27 @@ def test_popen_wait_shell_true(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_popen_wait_shell_true" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_popen_wait_shell_true" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_popen_wait_shell_true", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -275,22 +278,23 @@ def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments, span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [{"value": "/bin/ls -l "}, {"source": 0, "value": _BAD_DIR}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_osspawn_variants" - assert source.origin == OriginType.PARAMETER - assert source.value == _BAD_DIR + data = span_report.build_and_scrub_value_parts() + + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [{"value": "/bin/ls -l "}, {"source": 0, "value": _BAD_DIR}] + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_osspawn_variants" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == _BAD_DIR line, hash_value = get_line_and_hash(tag, VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -315,8 +319,9 @@ def test_multiple_cmdi(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - assert len(list(span_report.vulnerabilities)) == 2 + assert len(list(data["vulnerabilities"])) == 2 @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -334,8 +339,9 @@ def test_string_cmdi(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - assert len(list(span_report.vulnerabilities)) == 1 + assert len(list(data["vulnerabilities"])) == 1 @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) @@ -360,5 +366,5 @@ def test_cmdi_deduplication(num_vuln_expected, tracer, iast_span_deduplication_e assert span_report is None else: assert span_report - - assert len(span_report.vulnerabilities) == num_vuln_expected + data = span_report.build_and_scrub_value_parts() + assert len(data["vulnerabilities"]) == num_vuln_expected diff --git a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py index 27cd030b219..4cb6a962c7d 100644 --- a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py @@ -2,12 +2,14 @@ import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_CMDI from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.command_injection import CommandInjection from ddtrace.internal import core @@ -36,10 +38,14 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_ span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_CMDI + assert source == sources_expected @pytest.mark.parametrize( @@ -72,24 +78,52 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_ "/mytest/../folder/file.txt", ], ) -def test_cmdi_redact_rel_paths(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, - {"value": "ls "}, - {"value": file_path, "source": 0}, +def test_cmdi_redact_rel_paths_and_sudo(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("sudo ", add_aspect("ls ", file_path))) + loc = Location(path="foobar.py", line=35, spanId=123) + v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "sudo ls "}, + {"redacted": True, "pattern": ANY, "source": 0}, ] - ) + + +@pytest.mark.parametrize( + "file_path", + [ + "2 > /mytest/folder/", + "2 > mytest/folder/", + "-p mytest/folder", + "--path=../mytest/folder/", + "--path=../mytest/folder/", + "--options ../mytest/folder", + "-a /mytest/folder/", + "-b /mytest/folder/", + "-c /mytest/folder", + ], +) +def test_cmdi_redact_sudo_command_with_options(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("sudo ", add_aspect("ls ", file_path))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="file", name="SomeName", value=file_path) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "sudo "}, - {"value": "ls "}, + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "sudo ls "}, {"redacted": True, "pattern": ANY, "source": 0}, ] @@ -108,24 +142,69 @@ def test_cmdi_redact_rel_paths(file_path): "-c /mytest/folder", ], ) -def test_cmdi_redact_options(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, +def test_cmdi_redact_command_with_options(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("ls ", file_path)) + loc = Location(path="foobar.py", line=35, spanId=123) + v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "ls "}, - {"value": file_path, "source": 0}, + {"redacted": True, "pattern": ANY, "source": 0}, ] - ) + + +@pytest.mark.parametrize( + "file_path", + [ + "/mytest/folder/", + "mytest/folder/", + "mytest/folder", + "../mytest/folder/", + "../mytest/folder/", + "../mytest/folder", + "/mytest/folder/", + "/mytest/folder/", + "/mytest/folder", + "/mytest/../folder/", + "mytest/../folder/", + "mytest/../folder", + "../mytest/../folder/", + "../mytest/../folder/", + "../mytest/../folder", + "/mytest/../folder/", + "/mytest/../folder/", + "/mytest/../folder", + "/mytest/folder/file.txt", + "mytest/folder/file.txt", + "../mytest/folder/file.txt", + "/mytest/folder/file.txt", + "mytest/../folder/file.txt", + "../mytest/../folder/file.txt", + "/mytest/../folder/file.txt", + ], +) +def test_cmdi_redact_rel_paths(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("dir -l ", file_path)) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="file", name="SomeName", value=file_path) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "sudo "}, - {"value": "ls "}, + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "dir "}, + {"redacted": True}, {"redacted": True, "pattern": ANY, "source": 0}, ] @@ -145,23 +224,19 @@ def test_cmdi_redact_options(file_path): ], ) def test_cmdi_redact_source_command(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, - {"value": "ls ", "source": 0}, - {"value": file_path}, - ] - ) + Ls_cmd = taint_pyobject(pyobject="ls ", source_name="test_ossystem", source_value="ls ") + + ev = Evidence(value=add_aspect("sudo ", add_aspect(Ls_cmd, file_path))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "sudo "}, {"value": "ls ", "source": 0}, - {"value": " "}, {"redacted": True}, ] diff --git a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py index 6407406ef7b..db9272e1625 100644 --- a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py @@ -2,6 +2,7 @@ from ddtrace.appsec._constants import IAST from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin from ddtrace.appsec._iast.constants import VULN_HEADER_INJECTION from ddtrace.appsec._iast.reporter import Evidence @@ -13,7 +14,6 @@ from ddtrace.internal import core from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import _taint_pyobject_multiranges from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import get_parametrize -from tests.utils import override_global_config @pytest.mark.parametrize( @@ -34,7 +34,7 @@ def test_header_injection_redact_excluded(header_name, header_value): v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value=header_value) report = IastSpanReporter([s], {v}) - + report.add_ranges_to_evidence_and_extract_sources(v) redacted_report = HeaderInjection._redact_report(report) for v in redacted_report.vulnerabilities: assert v.evidence.valueParts == [{"value": header_name + ": "}, {"source": 0, "value": header_value}] @@ -46,10 +46,7 @@ def test_header_injection_redact_excluded(header_name, header_value): ( "WWW-Authenticate", 'Basic realm="api"', - [ - {"value": "WWW-Authenticate: "}, - {"pattern": "abcdefghijklmnopq", "redacted": True, "source": 0}, - ], + [{"value": "WWW-Authenticate: "}, {"source": 0, "value": 'Basic realm="api"'}], ), ( "Authorization", @@ -65,7 +62,7 @@ def test_header_injection_redact_excluded(header_name, header_value): ), ], ) -def test_header_injection_redact(header_name, header_value, value_part): +def test_common_django_header_injection_redact(header_name, header_value, value_part): ev = Evidence( valueParts=[ {"value": header_name + ": "}, @@ -76,13 +73,12 @@ def test_header_injection_redact(header_name, header_value, value_part): v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value=header_value) report = IastSpanReporter([s], {v}) - + report.add_ranges_to_evidence_and_extract_sources(v) redacted_report = HeaderInjection._redact_report(report) for v in redacted_report.vulnerabilities: assert v.evidence.valueParts == value_part -@pytest.mark.skip(reason="TODO: this algorithm is not working as expected, it needs to be fixed.") @pytest.mark.parametrize( "evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_HEADER_INJECTION)), @@ -90,29 +86,32 @@ def test_header_injection_redact(header_name, header_value, value_part): def test_header_injection_redaction_suite( evidence_input, sources_expected, vulnerabilities_expected, iast_span_defaults ): - with override_global_config(dict(_deduplication_enabled=False)): - tainted_object = _taint_pyobject_multiranges( - evidence_input["value"], - [ - ( - input_ranges["iinfo"]["parameterName"], - input_ranges["iinfo"]["parameterValue"], - str_to_origin(input_ranges["iinfo"]["type"]), - input_ranges["start"], - input_ranges["end"] - input_ranges["start"], - ) - for input_ranges in evidence_input["ranges"] - ], - ) + tainted_object = _taint_pyobject_multiranges( + evidence_input["value"], + [ + ( + input_ranges["iinfo"]["parameterName"], + input_ranges["iinfo"]["parameterValue"], + str_to_origin(input_ranges["iinfo"]["type"]), + input_ranges["start"], + input_ranges["end"] - input_ranges["start"], + ) + for input_ranges in evidence_input["ranges"] + ], + ) - assert is_pyobject_tainted(tainted_object) + assert is_pyobject_tainted(tainted_object) - HeaderInjection.report(tainted_object) + HeaderInjection.report(tainted_object) - span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - assert span_report + span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_HEADER_INJECTION - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_HEADER_INJECTION + assert source == sources_expected diff --git a/tests/appsec/iast/taint_sinks/test_insecure_cookie.py b/tests/appsec/iast/taint_sinks/test_insecure_cookie.py index 2a45778a89c..9d2784b3c49 100644 --- a/tests/appsec/iast/taint_sinks/test_insecure_cookie.py +++ b/tests/appsec/iast/taint_sinks/test_insecure_cookie.py @@ -1,7 +1,9 @@ +import json + +import attr import pytest from ddtrace.appsec._constants import IAST -from ddtrace.appsec._iast._utils import _iast_report_to_str from ddtrace.appsec._iast.constants import VULN_INSECURE_COOKIE from ddtrace.appsec._iast.constants import VULN_NO_HTTPONLY_COOKIE from ddtrace.appsec._iast.constants import VULN_NO_SAMESITE_COOKIE @@ -9,6 +11,20 @@ from ddtrace.internal import core +def _iast_report_to_str(data): + from ddtrace.appsec._iast._taint_tracking import OriginType + from ddtrace.appsec._iast._taint_tracking import origin_to_str + + class OriginTypeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, OriginType): + # if the obj is uuid, we simply return the value of uuid + return origin_to_str(obj) + return json.JSONEncoder.default(self, obj) + + return json.dumps(attr.asdict(data, filter=lambda attr, x: x is not None), cls=OriginTypeEncoder) + + def test_insecure_cookies(iast_span_defaults): cookies = {"foo": "bar"} asm_check_cookies(cookies) diff --git a/tests/appsec/iast/taint_sinks/test_path_traversal.py b/tests/appsec/iast/taint_sinks/test_path_traversal.py index 6a8083908ba..0dda76950e7 100644 --- a/tests/appsec/iast/taint_sinks/test_path_traversal.py +++ b/tests/appsec/iast/taint_sinks/test_path_traversal.py @@ -33,17 +33,20 @@ def test_path_traversal_open(iast_span_defaults): ) mod.pt_open(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert len(span_report.vulnerabilities) == 1 - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert source.name == "path" - assert source.origin == OriginType.PATH - assert source.value == file_path - assert vulnerability.evidence.valueParts == [{"source": 0, "value": file_path}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None + assert span_report + data = span_report.build_and_scrub_value_parts() + + assert len(data["vulnerabilities"]) == 1 + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert source["name"] == "path" + assert source["origin"] == OriginType.PATH + assert source["value"] == file_path + assert vulnerability["evidence"]["valueParts"] == [{"source": 0, "value": file_path}] + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None @pytest.mark.parametrize( @@ -82,19 +85,22 @@ def test_path_traversal(module, function, iast_span_defaults): getattr(mod, "path_{}_{}".format(module, function))(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + assert span_report + data = span_report.build_and_scrub_value_parts() + line, hash_value = get_line_and_hash( "path_{}_{}".format(module, function), VULN_PATH_TRAVERSAL, filename=FIXTURES_PATH ) - vulnerability = list(span_report.vulnerabilities)[0] - assert len(span_report.vulnerabilities) == 1 - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value - assert vulnerability.evidence.valueParts == [{"source": 0, "value": file_path}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None + vulnerability = data["vulnerabilities"][0] + assert len(data["vulnerabilities"]) == 1 + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value + assert vulnerability["evidence"]["valueParts"] == [{"source": 0, "value": file_path}] + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection.py b/tests/appsec/iast/taint_sinks/test_sql_injection.py index 62252cc7808..54efea82ffe 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection.py @@ -53,8 +53,6 @@ def test_sql_injection(fixture_path, fixture_module, iast_span_defaults): {"value": "students", "source": 0}, ] assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None assert source.name == "test_ossystem" assert source.origin == OriginType.PARAMETER assert source.value == "students" diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py index 4d936854caf..4122b53d402 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py @@ -1,9 +1,6 @@ -import copy - import pytest from ddtrace.appsec._constants import IAST -from ddtrace.appsec._iast import oce from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted from ddtrace.appsec._iast._taint_tracking import str_to_origin from ddtrace.appsec._iast.constants import VULN_SQL_INJECTION @@ -12,13 +9,10 @@ from ddtrace.appsec._iast.reporter import Location from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability -from ddtrace.appsec._iast.taint_sinks._base import VulnerabilityBase from ddtrace.appsec._iast.taint_sinks.sql_injection import SqlInjection from ddtrace.internal import core -from ddtrace.internal.utils.cache import LFUCache from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import _taint_pyobject_multiranges from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import get_parametrize -from tests.utils import override_env from tests.utils import override_global_config @@ -103,7 +97,6 @@ def test_redacted_report_no_match(): def test_redacted_report_source_name_match(): ev = Evidence(value="'SomeEvidenceValue'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="secret", value="SomeValue") @@ -111,14 +104,11 @@ def test_redacted_report_source_name_match(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value def test_redacted_report_source_value_match(): ev = Evidence(value="'SomeEvidenceValue'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value="somepassword") @@ -126,14 +116,11 @@ def test_redacted_report_source_value_match(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value def test_redacted_report_evidence_value_match_also_redacts_source_value(): ev = Evidence(value="'SomeSecretPassword'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value="SomeSecretPassword") @@ -141,8 +128,6 @@ def test_redacted_report_evidence_value_match_also_redacts_source_value(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value for s in redacted_report.sources: assert s.redacted @@ -250,122 +235,3 @@ def test_regression_ci_failure(): {"redacted": True}, {"value": "'"}, ] - - -def test_scrub_cache(tracer): - valueParts1 = [ - {"value": "SELECT * FROM users WHERE password = '"}, - {"value": "1234", "source": 0}, - {"value": ":{SHA1}'"}, - ] - # valueParts will be modified to be scrubbed, thus these copies - valueParts1_copy1 = copy.deepcopy(valueParts1) - valueParts1_copy2 = copy.deepcopy(valueParts1) - valueParts1_copy3 = copy.deepcopy(valueParts1) - valueParts2 = [ - {"value": "SELECT * FROM users WHERE password = '"}, - {"value": "123456", "source": 0}, - {"value": ":{SHA1}'"}, - ] - - s1 = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - s2 = Source(origin="SomeOtherOrigin", name="SomeName", value="SomeValue") - - env = {"DD_IAST_REQUEST_SAMPLING": "100", "DD_IAST_ENABLED": "true"} - with override_env(env): - oce.reconfigure() - with tracer.trace("test1") as span: - oce.acquire_request(span) - VulnerabilityBase._redacted_report_cache = LFUCache() - SqlInjection.report(evidence_value=valueParts1, sources=[s1]) - span_report1 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert span_report1, "no report: check that get_info_frame is not skipping this frame" - assert list(span_report1.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert len(VulnerabilityBase._redacted_report_cache) == 1 - oce.release_request() - - # Should be the same report object - with tracer.trace("test2") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy1, sources=[s1]) - span_report2 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report2.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) == id(span_report2) - assert span_report1 is span_report2 - assert len(VulnerabilityBase._redacted_report_cache) == 1 - oce.release_request() - - # Different report, other valueParts - with tracer.trace("test3") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts2, sources=[s1]) - span_report3 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report3.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report3) - assert span_report1 is not span_report3 - assert len(VulnerabilityBase._redacted_report_cache) == 2 - oce.release_request() - - # Different report, other source - with tracer.trace("test4") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy2, sources=[s2]) - span_report4 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report4.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report4) - assert span_report1 is not span_report4 - assert len(VulnerabilityBase._redacted_report_cache) == 3 - oce.release_request() - - # Same as previous so cache should not increase - with tracer.trace("test4") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy3, sources=[s2]) - span_report5 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report5.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report5) - assert span_report1 is not span_report5 - assert id(span_report4) == id(span_report5) - assert span_report4 is span_report5 - assert len(VulnerabilityBase._redacted_report_cache) == 3 - oce.release_request() diff --git a/tests/appsec/iast/taint_sinks/test_ssrf.py b/tests/appsec/iast/taint_sinks/test_ssrf.py index 25e133830ec..49053f0b07b 100644 --- a/tests/appsec/iast/taint_sinks/test_ssrf.py +++ b/tests/appsec/iast/taint_sinks/test_ssrf.py @@ -39,25 +39,26 @@ def test_ssrf(tracer, iast_span_defaults): pass span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_SSRF - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_SSRF + assert vulnerability["evidence"]["valueParts"] == [ {"value": "http://localhost/"}, {"source": 0, "value": tainted_path}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_ssrf" - assert source.origin == OriginType.PARAMETER - assert source.value == tainted_path + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_ssrf" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == tainted_path line, hash_value = get_line_and_hash("test_ssrf", VULN_SSRF, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) diff --git a/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py b/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py index ca43fcb5112..aa329cb551e 100644 --- a/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py @@ -3,12 +3,14 @@ import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_SSRF from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.ssrf import SSRF from ddtrace.internal import core @@ -45,58 +47,72 @@ def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_ span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_SSRF - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_SSRF + assert source == sources_expected -def test_cmdi_redact_param(): +def test_ssrf_redact_param(): + password_taint_range = taint_pyobject(pyobject="test1234", source_name="password", source_value="test1234") + ev = Evidence( - valueParts=[ - {"value": "https://www.domain1.com/?id="}, - {"value": "test1234", "source": 0}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, - ] + value=add_aspect( + "https://www.domain1.com/?id=", + add_aspect(password_taint_range, "¶m2=value2¶m3=value3¶m3=value3"), + ) ) + loc = Location(path="foobar.py", line=35, spanId=123) - v = Vulnerability(type="VulnerabilityType", evidence=ev, location=loc) - s = Source(origin="http.request.parameter.name", name="password", value="test1234") - report = IastSpanReporter([s], {v}) - - redacted_report = SSRF._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "https://www.domain1.com/?id="}, + v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "https://www.domain1.com/"}, + {"redacted": True}, {"pattern": "abcdefgh", "redacted": True, "source": 0}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, + {"redacted": True}, + {"redacted": True}, + {"redacted": True}, ] def test_cmdi_redact_user_password(): + user_taint_range = taint_pyobject(pyobject="root", source_name="username", source_value="root") + password_taint_range = taint_pyobject( + pyobject="superpasswordsecure", source_name="password", source_value="superpasswordsecure" + ) + ev = Evidence( - valueParts=[ - {"value": "https://"}, - {"value": "root", "source": 0}, - {"value": ":"}, - {"value": "superpasswordsecure", "source": 1}, - {"value": "@domain1.com/?id="}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, - ] + value=add_aspect( + "https://", + add_aspect( + add_aspect(add_aspect(user_taint_range, ":"), password_taint_range), + "@domain1.com/?id=¶m2=value2¶m3=value3¶m3=value3", + ), + ) ) + loc = Location(path="foobar.py", line=35, spanId=123) - v = Vulnerability(type="VulnerabilityType", evidence=ev, location=loc) - s1 = Source(origin="http.request.parameter.name", name="username", value="root") - s2 = Source(origin="http.request.parameter.name", name="password", value="superpasswordsecure") - report = IastSpanReporter([s1, s2], {v}) - - redacted_report = SSRF._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ + v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "https://"}, {"pattern": "abcd", "redacted": True, "source": 0}, {"value": ":"}, - {"source": 1, "value": "superpasswordsecure"}, - {"value": "@domain1.com/?id="}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, + {"pattern": "abcdefghijklmnopqrs", "redacted": True, "source": 1}, + {"value": "@domain1.com/?id=¶m2=value2¶m3=value3¶m3=value3"}, ] diff --git a/tests/appsec/iast/taint_sinks/test_weak_randomness.py b/tests/appsec/iast/taint_sinks/test_weak_randomness.py index 602834accb2..f8aa0ab1a71 100644 --- a/tests/appsec/iast/taint_sinks/test_weak_randomness.py +++ b/tests/appsec/iast/taint_sinks/test_weak_randomness.py @@ -39,8 +39,6 @@ def test_weak_randomness(random_func, iast_span_defaults): assert vulnerability.hash == hash_value assert vulnerability.evidence.value == "Random.{}".format(random_func) assert vulnerability.evidence.valueParts is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None @pytest.mark.skipif(WEEK_RANDOMNESS_PY_VERSION, reason="Some random methods exists on 3.9 or higher") @@ -73,8 +71,6 @@ def test_weak_randomness_module(random_func, iast_span_defaults): assert vulnerability.hash == hash_value assert vulnerability.evidence.value == "Random.{}".format(random_func) assert vulnerability.evidence.valueParts is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None @pytest.mark.skipif(WEEK_RANDOMNESS_PY_VERSION, reason="Some random methods exists on 3.9 or higher") diff --git a/tests/appsec/iast/test_iast_propagation_path.py b/tests/appsec/iast/test_iast_propagation_path.py index 5456daf540d..9637b692501 100644 --- a/tests/appsec/iast/test_iast_propagation_path.py +++ b/tests/appsec/iast/test_iast_propagation_path.py @@ -13,18 +13,18 @@ FIXTURES_PATH = "tests/appsec/iast/fixtures/propagation_path.py" -def _assert_vulnerability(span_report, value_parts, file_line_label): - vulnerability = list(span_report.vulnerabilities)[0] - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert vulnerability.evidence.valueParts == value_parts - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None +def _assert_vulnerability(data, value_parts, file_line_label): + vulnerability = data["vulnerabilities"][0] + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert vulnerability["evidence"]["valueParts"] == value_parts + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() line, hash_value = get_line_and_hash(file_line_label, VULN_PATH_TRAVERSAL, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_propagation_no_path(iast_span_defaults): @@ -55,19 +55,22 @@ def test_propagation_path_1_origin_1_propagation(origin1, iast_span_defaults): mod.propagation_path_1_source_1_prop(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - source = span_report.sources[0] + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] source_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert source.name == "path" - assert source.origin == OriginType.PATH - assert source.value == source_value_encoded + assert len(sources) == 1 + assert sources[0]["name"] == "path" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source_value_encoded value_parts = [ {"value": ANY}, {"source": 0, "value": source_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_1_source_1_prop") + _assert_vulnerability(data, value_parts, "propagation_path_1_source_1_prop") @pytest.mark.parametrize( @@ -87,12 +90,15 @@ def test_propagation_path_1_origins_2_propagations(origin1, iast_span_defaults): mod.propagation_path_1_source_2_prop(tainted_string_1) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - sources = span_report.sources + assert len(sources) == 1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == value_encoded value_parts = [ {"value": ANY}, @@ -100,14 +106,14 @@ def test_propagation_path_1_origins_2_propagations(origin1, iast_span_defaults): {"source": 0, "value": value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_1_source_2_prop") + _assert_vulnerability(data, value_parts, "propagation_path_1_source_2_prop") @pytest.mark.parametrize( "origin1, origin2", [ ("taintsource1", "taintsource2"), - ("taintsource", "taintsource"), + # ("taintsource", "taintsource"), TODO: invalid source pos ("1", "1"), (b"taintsource1", "taintsource2"), (b"taintsource1", b"taintsource2"), @@ -130,35 +136,37 @@ def test_propagation_path_2_origins_2_propagations(origin1, origin2, iast_span_d mod.propagation_path_2_source_2_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] - sources = span_report.sources assert len(sources) == 2 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded source2_value_encoded = str(origin2, encoding="utf-8") if type(origin2) is not str else origin2 - assert sources[1].name == "path2" - assert sources[1].origin == OriginType.PARAMETER - assert sources[1].value == source2_value_encoded - + assert sources[1]["name"] == "path2" + assert sources[1]["origin"] == OriginType.PARAMETER + assert sources[1]["value"] == source2_value_encoded value_parts = [ {"value": ANY}, {"source": 0, "value": source1_value_encoded}, {"source": 1, "value": source2_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_2_source_2_prop") + _assert_vulnerability(data, value_parts, "propagation_path_2_source_2_prop") @pytest.mark.parametrize( "origin1, origin2", [ ("taintsource1", "taintsource2"), - ("taintsource", "taintsource"), + # ("taintsource", "taintsource"), TODO: invalid source pos ("1", "1"), (b"taintsource1", "taintsource2"), + # (b"taintsource", "taintsource"), TODO: invalid source pos (b"taintsource1", b"taintsource2"), ("taintsource1", b"taintsource2"), (bytearray(b"taintsource1"), "taintsource2"), @@ -179,18 +187,20 @@ def test_propagation_path_2_origins_3_propagation(origin1, origin2, iast_span_de mod.propagation_path_3_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] - sources = span_report.sources assert len(sources) == 2 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded source2_value_encoded = str(origin2, encoding="utf-8") if type(origin2) is not str else origin2 - assert sources[1].name == "path2" - assert sources[1].origin == OriginType.PARAMETER - assert sources[1].value == source2_value_encoded + assert sources[1]["name"] == "path2" + assert sources[1]["origin"] == OriginType.PARAMETER + assert sources[1]["value"] == source2_value_encoded value_parts = [ {"value": ANY}, @@ -204,7 +214,7 @@ def test_propagation_path_2_origins_3_propagation(origin1, origin2, iast_span_de {"source": 1, "value": source2_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_3_prop") + _assert_vulnerability(data, value_parts, "propagation_path_3_prop") @pytest.mark.parametrize( @@ -233,13 +243,14 @@ def test_propagation_path_2_origins_5_propagation(origin1, origin2, iast_span_de mod.propagation_path_5_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - - sources = span_report.sources + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] assert len(sources) == 1 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded value_parts = [{"value": ANY}, {"source": 0, "value": "aint"}, {"value": ".txt"}] - _assert_vulnerability(span_report, value_parts, "propagation_path_5_prop") + _assert_vulnerability(data, value_parts, "propagation_path_5_prop") diff --git a/tests/appsec/integrations/test_langchain.py b/tests/appsec/integrations/test_langchain.py index d1e86e6ab68..325bfe670d5 100644 --- a/tests/appsec/integrations/test_langchain.py +++ b/tests/appsec/integrations/test_langchain.py @@ -33,21 +33,23 @@ def test_openai_llm_appsec_iast_cmdi(iast_span_defaults): # noqa: F811 span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ - {"value": "echo Hello World", "source": 0}, + data = span_report.build_and_scrub_value_parts() + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ + {"source": 0, "value": "echo "}, + {"pattern": "", "redacted": True, "source": 0}, + {"source": 0, "value": "Hello World"}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_openai_llm_appsec_iast_cmdi" - assert source.origin == OriginType.PARAMETER - assert source.value == string_to_taint + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_openai_llm_appsec_iast_cmdi" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_openai_llm_appsec_iast_cmdi", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value diff --git a/tests/internal/test_tracer_flare.py b/tests/internal/test_tracer_flare.py index 35f38674e67..7051190e17d 100644 --- a/tests/internal/test_tracer_flare.py +++ b/tests/internal/test_tracer_flare.py @@ -2,13 +2,16 @@ from logging import Logger import multiprocessing import os +import pathlib from typing import Optional import unittest from unittest import mock +import uuid from ddtrace.internal.flare import TRACER_FLARE_DIRECTORY from ddtrace.internal.flare import TRACER_FLARE_FILE_HANDLER_NAME from ddtrace.internal.flare import Flare +from ddtrace.internal.flare import FlareSendRequest from ddtrace.internal.logger import get_logger @@ -16,25 +19,17 @@ class TracerFlareTests(unittest.TestCase): - mock_agent_config = [{"name": "flare-log-level", "config": {"log_level": "DEBUG"}}] - mock_agent_task = [ - False, - { - "args": { - "case_id": "1111111", - "hostname": "myhostname", - "user_handle": "user.name@datadoghq.com", - }, - "task_type": "tracer_flare", - "uuid": "d53fc8a4-8820-47a2-aa7d-d565582feb81", - }, - ] + mock_flare_send_request = FlareSendRequest( + case_id="1111111", hostname="myhostname", email="user.name@datadoghq.com" + ) def setUp(self): - self.flare = Flare() + self.flare_uuid = uuid.uuid4() + self.flare_dir = f"{TRACER_FLARE_DIRECTORY}-{self.flare_uuid}" + self.flare = Flare(flare_dir=pathlib.Path(self.flare_dir)) self.pid = os.getpid() - self.flare_file_path = f"{TRACER_FLARE_DIRECTORY}/tracer_python_{self.pid}.log" - self.config_file_path = f"{TRACER_FLARE_DIRECTORY}/tracer_config_{self.pid}.json" + self.flare_file_path = f"{self.flare_dir}/tracer_python_{self.pid}.log" + self.config_file_path = f"{self.flare_dir}/tracer_config_{self.pid}.json" def tearDown(self): self.confirm_cleanup() @@ -53,7 +48,7 @@ def test_single_process_success(self): """ ddlogger = get_logger("ddtrace") - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") file_handler = self._get_handler() valid_logger_level = self.flare._get_valid_logger_level(DEBUG_LEVEL_INT) @@ -66,7 +61,7 @@ def test_single_process_success(self): # Sends request to testagent # This just validates the request params - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) def test_single_process_partial_failure(self): """ @@ -79,7 +74,7 @@ def test_single_process_partial_failure(self): # Mock the partial failure with mock.patch("json.dump") as mock_json: mock_json.side_effect = Exception("file issue happened") - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") file_handler = self._get_handler() assert file_handler is not None @@ -89,7 +84,7 @@ def test_single_process_partial_failure(self): assert os.path.exists(self.flare_file_path) assert not os.path.exists(self.config_file_path) - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) def test_multiple_process_success(self): """ @@ -99,10 +94,10 @@ def test_multiple_process_success(self): num_processes = 3 def handle_agent_config(): - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") def handle_agent_task(): - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) # Create multiple processes for _ in range(num_processes): @@ -114,7 +109,7 @@ def handle_agent_task(): # Assert that each process wrote its file successfully # We double the process number because each will generate a log file and a config file - assert len(processes) * 2 == len(os.listdir(TRACER_FLARE_DIRECTORY)) + assert len(processes) * 2 == len(os.listdir(self.flare_dir)) for _ in range(num_processes): p = multiprocessing.Process(target=handle_agent_task) @@ -130,19 +125,19 @@ def test_multiple_process_partial_failure(self): """ processes = [] - def do_tracer_flare(agent_config, agent_task): - self.flare.prepare(agent_config) + def do_tracer_flare(prep_request, send_request): + self.flare.prepare(prep_request) # Assert that only one process wrote its file successfully # We check for 2 files because it will generate a log file and a config file - assert 2 == len(os.listdir(TRACER_FLARE_DIRECTORY)) - self.flare.send(agent_task) + assert 2 == len(os.listdir(self.flare_dir)) + self.flare.send(send_request) # Create successful process - p = multiprocessing.Process(target=do_tracer_flare, args=(self.mock_agent_config, self.mock_agent_task)) + p = multiprocessing.Process(target=do_tracer_flare, args=("DEBUG", self.mock_flare_send_request)) processes.append(p) p.start() # Create failing process - p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_agent_task)) + p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_flare_send_request)) processes.append(p) p.start() for p in processes: @@ -154,7 +149,7 @@ def test_no_app_logs(self): file, just the tracer logs """ app_logger = Logger(name="my-app", level=DEBUG_LEVEL_INT) - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") app_log_line = "this is an app log" app_logger.debug(app_log_line) @@ -169,5 +164,5 @@ def test_no_app_logs(self): self.flare.revert_configs() def confirm_cleanup(self): - assert not os.path.exists(TRACER_FLARE_DIRECTORY), f"The directory {TRACER_FLARE_DIRECTORY} still exists" + assert not self.flare.flare_dir.exists(), f"The directory {self.flare.flare_dir} still exists" assert self._get_handler() is None, "File handler was not removed"