diff --git a/.github/workflows/build_deploy.yml b/.github/workflows/build_deploy.yml index 018cc7b2ac4..4a6775f33bf 100644 --- a/.github/workflows/build_deploy.yml +++ b/.github/workflows/build_deploy.yml @@ -9,18 +9,6 @@ on: # before merging/releasing - build_deploy* pull_request: - paths: - - ".github/workflows/build_deploy.yml" - - ".github/workflows/build_python_3.yml" - - "setup.py" - - "setup.cfg" - - "pyproject.toml" - - "**.c" - - "**.h" - - "**.cpp" - - "**.hpp" - - "**.pyx" - - "ddtrace/vendor/**" release: types: - published diff --git a/benchmarks/appsec_iast_propagation/scenario.py b/benchmarks/appsec_iast_propagation/scenario.py index 56ac67128b7..ec827b7bb21 100644 --- a/benchmarks/appsec_iast_propagation/scenario.py +++ b/benchmarks/appsec_iast_propagation/scenario.py @@ -1,8 +1,7 @@ from typing import Any # noqa:F401 import bm - -from tests.utils import override_env +from bm.utils import override_env with override_env({"DD_IAST_ENABLED": "True"}): @@ -42,7 +41,7 @@ def aspect_function(internal_loop, tainted): value = "" res = value for _ in range(internal_loop): - res = add_aspect(res, join_aspect(str.join, 1, "_", (tainted, "_", tainted))) + res = add_aspect(res, join_aspect("_".join, 1, "_", (tainted, "_", tainted))) value = res res = add_aspect(res, tainted) value = res diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py index f68bf25bd94..de4b4686828 100644 --- a/ddtrace/_trace/trace_handlers.py +++ b/ddtrace/_trace/trace_handlers.py @@ -636,8 +636,9 @@ def _on_botocore_patched_bedrock_api_call_exception(ctx, exc_info): span = ctx[ctx["call_key"]] span.set_exc_info(*exc_info) prompt = ctx["prompt"] + model_name = ctx["model_name"] integration = ctx["bedrock_integration"] - if integration.is_pc_sampled_llmobs(span): + if integration.is_pc_sampled_llmobs(span) and "embed" not in model_name: integration.llmobs_set_tags(span, formatted_response=None, prompt=prompt, err=True) span.finish() @@ -659,6 +660,7 @@ def _on_botocore_bedrock_process_response( ) -> None: text = formatted_response["text"] span = ctx[ctx["call_key"]] + model_name = ctx["model_name"] if should_set_choice_ids: for i in range(len(text)): span.set_tag_str("bedrock.response.choices.{}.id".format(i), str(body["generations"][i]["id"])) @@ -666,6 +668,10 @@ def _on_botocore_bedrock_process_response( if metadata is not None: for k, v in metadata.items(): span.set_tag_str("bedrock.{}".format(k), str(v)) + if "embed" in model_name: + span.set_metric("bedrock.response.embedding_length", len(formatted_response["text"][0])) + span.finish() + return for i in range(len(formatted_response["text"])): if integration.is_pc_sampled_span(span): span.set_tag_str( diff --git a/ddtrace/appsec/_asm_request_context.py b/ddtrace/appsec/_asm_request_context.py index 173027c0d10..654e06a29e5 100644 --- a/ddtrace/appsec/_asm_request_context.py +++ b/ddtrace/appsec/_asm_request_context.py @@ -13,6 +13,7 @@ from ddtrace._trace.span import Span from ddtrace.appsec import _handlers from ddtrace.appsec._constants import APPSEC +from ddtrace.appsec._constants import EXPLOIT_PREVENTION from ddtrace.appsec._constants import SPAN_DATA_NAMES from ddtrace.appsec._constants import WAF_CONTEXT_NAMES from ddtrace.appsec._ddwaf import DDWaf_result @@ -147,6 +148,12 @@ def __init__(self): "triggered": False, "timeout": False, "version": None, + "rasp": { + "called": False, + "eval": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + "match": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + "timeout": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE}, + }, } env.callbacks[_CONTEXT_CALL] = [] @@ -330,15 +337,27 @@ def asm_request_context_set( def set_waf_telemetry_results( - rules_version: Optional[str], is_triggered: bool, is_blocked: bool, is_timeout: bool + rules_version: Optional[str], + is_triggered: bool, + is_blocked: bool, + is_timeout: bool, + rule_type: Optional[str], ) -> None: result = get_value(_TELEMETRY, _TELEMETRY_WAF_RESULTS) if result is not None: - result["triggered"] |= is_triggered - result["blocked"] |= is_blocked - result["timeout"] |= is_timeout - if rules_version is not None: - result["version"] = rules_version + if rule_type is None: + # Request Blocking telemetry + result["triggered"] |= is_triggered + result["blocked"] |= is_blocked + result["timeout"] |= is_timeout + if rules_version is not None: + result["version"] = rules_version + else: + # Exploit Prevention telemetry + result["rasp"]["called"] = True + result["rasp"]["eval"][rule_type] += 1 + result["rasp"]["match"][rule_type] += int(is_triggered) + result["rasp"]["timeout"][rule_type] += int(is_timeout) def get_waf_telemetry_results() -> Optional[Dict[str, Any]]: diff --git a/ddtrace/appsec/_common_module_patches.py b/ddtrace/appsec/_common_module_patches.py index 312a88a41d4..69c2610cab5 100644 --- a/ddtrace/appsec/_common_module_patches.py +++ b/ddtrace/appsec/_common_module_patches.py @@ -3,6 +3,7 @@ import ctypes import gc +import os from typing import Any from typing import Callable from typing import Dict @@ -48,14 +49,23 @@ def wrapped_open_CFDDB7ABBA9081B6(original_open_callable, instance, args, kwargs try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time return original_open_callable(*args, **kwargs) - filename = args[0] if args else kwargs.get("file", None) + filename_arg = args[0] if args else kwargs.get("file", None) + try: + filename = os.fspath(filename_arg) + except Exception: + filename = "" if filename and in_context(): - call_waf_callback({"LFI_ADDRESS": filename}, crop_trace="wrapped_open_CFDDB7ABBA9081B6") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.LFI: filename}, + crop_trace="wrapped_open_CFDDB7ABBA9081B6", + rule_type=EXPLOIT_PREVENTION.TYPE.LFI, + ) # DEV: Next part of the exploit prevention feature: add block here return original_open_callable(*args, **kwargs) @@ -72,6 +82,7 @@ def wrapped_open_ED4CF71136E15EBF(original_open_callable, instance, args, kwargs try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time @@ -82,7 +93,11 @@ def wrapped_open_ED4CF71136E15EBF(original_open_callable, instance, args, kwargs if url.__class__.__name__ == "Request": url = url.get_full_url() if isinstance(url, str): - call_waf_callback({"SSRF_ADDRESS": url}, crop_trace="wrapped_open_ED4CF71136E15EBF") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.SSRF: url}, + crop_trace="wrapped_open_ED4CF71136E15EBF", + rule_type=EXPLOIT_PREVENTION.TYPE.SSRF, + ) # DEV: Next part of the exploit prevention feature: add block here return original_open_callable(*args, **kwargs) @@ -100,6 +115,7 @@ def wrapped_request_D8CB81E472AF98A2(original_request_callable, instance, args, try: from ddtrace.appsec._asm_request_context import call_waf_callback from ddtrace.appsec._asm_request_context import in_context + from ddtrace.appsec._constants import EXPLOIT_PREVENTION except ImportError: # open is used during module initialization # and shouldn't be changed at that time @@ -108,7 +124,11 @@ def wrapped_request_D8CB81E472AF98A2(original_request_callable, instance, args, url = args[1] if len(args) > 1 else kwargs.get("url", None) if url and in_context(): if isinstance(url, str): - call_waf_callback({"SSRF_ADDRESS": url}, crop_trace="wrapped_request_D8CB81E472AF98A2") + call_waf_callback( + {EXPLOIT_PREVENTION.ADDRESS.SSRF: url}, + crop_trace="wrapped_request_D8CB81E472AF98A2", + rule_type=EXPLOIT_PREVENTION.TYPE.SSRF, + ) # DEV: Next part of the exploit prevention feature: add block here return original_request_callable(*args, **kwargs) diff --git a/ddtrace/appsec/_constants.py b/ddtrace/appsec/_constants.py index c7a3fad3cf3..59f90a335dc 100644 --- a/ddtrace/appsec/_constants.py +++ b/ddtrace/appsec/_constants.py @@ -248,3 +248,12 @@ class EXPLOIT_PREVENTION(metaclass=Constant_Class): STACK_TRACE_ENABLED = "DD_APPSEC_STACK_TRACE_ENABLED" MAX_STACK_TRACES = "DD_APPSEC_MAX_STACK_TRACES" MAX_STACK_TRACE_DEPTH = "DD_APPSEC_MAX_STACK_TRACE_DEPTH" + + class TYPE(metaclass=Constant_Class): + LFI = "lfi" + SSRF = "ssrf" + SQLI = "sql_injection" + + class ADDRESS(metaclass=Constant_Class): + LFI = "LFI_ADDRESS" + SSRF = "SSRF_ADDRESS" diff --git a/ddtrace/appsec/_iast/_evidence_redaction/__init__.py b/ddtrace/appsec/_iast/_evidence_redaction/__init__.py new file mode 100644 index 00000000000..195391ffab2 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/__init__.py @@ -0,0 +1,4 @@ +from ddtrace.appsec._iast._evidence_redaction._sensitive_handler import sensitive_handler + + +sensitive_handler diff --git a/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py new file mode 100644 index 00000000000..b76ad6c96b1 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py @@ -0,0 +1,363 @@ +import re + +from ddtrace.internal.logger import get_logger +from ddtrace.settings.asm import config as asm_config + +from ..constants import VULN_CMDI +from ..constants import VULN_HEADER_INJECTION +from ..constants import VULN_SSRF +from .command_injection_sensitive_analyzer import command_injection_sensitive_analyzer +from .header_injection_sensitive_analyzer import header_injection_sensitive_analyzer +from .url_sensitive_analyzer import url_sensitive_analyzer + + +log = get_logger(__name__) + +REDACTED_SOURCE_BUFFER = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + + +class SensitiveHandler: + """ + Class responsible for handling sensitive information. + """ + + def __init__(self): + self._name_pattern = re.compile(asm_config._iast_redaction_name_pattern, re.IGNORECASE | re.MULTILINE) + self._value_pattern = re.compile(asm_config._iast_redaction_value_pattern, re.IGNORECASE | re.MULTILINE) + + self._sensitive_analyzers = { + VULN_CMDI: command_injection_sensitive_analyzer, + # SQL_INJECTION: sql_sensitive_analyzer, + VULN_SSRF: url_sensitive_analyzer, + VULN_HEADER_INJECTION: header_injection_sensitive_analyzer, + } + + @staticmethod + def _contains(range_container, range_contained): + """ + Checks if a range_container contains another range_contained. + + Args: + - range_container (dict): The container range. + - range_contained (dict): The contained range. + + Returns: + - bool: True if range_container contains range_contained, False otherwise. + """ + if range_container["start"] > range_contained["start"]: + return False + return range_container["end"] >= range_contained["end"] + + @staticmethod + def _intersects(range_a, range_b): + """ + Checks if two ranges intersect. + + Args: + - range_a (dict): First range. + - range_b (dict): Second range. + + Returns: + - bool: True if the ranges intersect, False otherwise. + """ + return range_b["start"] < range_a["end"] and range_b["end"] > range_a["start"] + + def _remove(self, range_, range_to_remove): + """ + Removes a range_to_remove from a range_. + + Args: + - range_ (dict): The range to remove from. + - range_to_remove (dict): The range to remove. + + Returns: + - list: List containing the remaining parts after removing the range_to_remove. + """ + if not self._intersects(range_, range_to_remove): + return [range_] + elif self._contains(range_to_remove, range_): + return [] + else: + result = [] + if range_to_remove["start"] > range_["start"]: + offset = range_to_remove["start"] - range_["start"] + result.append({"start": range_["start"], "end": range_["start"] + offset}) + if range_to_remove["end"] < range_["end"]: + offset = range_["end"] - range_to_remove["end"] + result.append({"start": range_to_remove["end"], "end": range_to_remove["end"] + offset}) + return result + + def is_sensible_name(self, name): + """ + Checks if a name is sensible based on the name pattern. + + Args: + - name (str): The name to check. + + Returns: + - bool: True if the name is sensible, False otherwise. + """ + return bool(self._name_pattern.search(name)) + + def is_sensible_value(self, value): + """ + Checks if a value is sensible based on the value pattern. + + Args: + - value (str): The value to check. + + Returns: + - bool: True if the value is sensible, False otherwise. + """ + return bool(self._value_pattern.search(value)) + + def is_sensible_source(self, source): + """ + Checks if a source is sensible. + + Args: + - source (dict): The source to check. + + Returns: + - bool: True if the source is sensible, False otherwise. + """ + return ( + source is not None + and source.value is not None + and (self.is_sensible_name(source.name) or self.is_sensible_value(source.value)) + ) + + def scrub_evidence(self, vulnerability_type, evidence, tainted_ranges, sources): + """ + Scrubs evidence based on the given vulnerability type. + + Args: + - vulnerability_type (str): The vulnerability type. + - evidence (dict): The evidence to scrub. + - tainted_ranges (list): List of tainted ranges. + - sources (list): List of sources. + + Returns: + - dict: The scrubbed evidence. + """ + if asm_config._iast_redaction_enabled: + sensitive_analyzer = self._sensitive_analyzers.get(vulnerability_type) + if sensitive_analyzer: + if not evidence.value: + log.debug("No evidence value found in evidence %s", evidence) + return None + sensitive_ranges = sensitive_analyzer(evidence, self._name_pattern, self._value_pattern) + return self.to_redacted_json(evidence.value, sensitive_ranges, tainted_ranges, sources) + return None + + def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources): + """ + Converts evidence value to redacted JSON format. + + Args: + - evidence_value (str): The evidence value. + - sensitive (list): List of sensitive ranges. + - tainted_ranges (list): List of tainted ranges. + - sources (list): List of sources. + + Returns: + - dict: The redacted JSON. + """ + value_parts = [] + redacted_sources = [] + redacted_sources_context = dict() + + start = 0 + next_tainted_index = 0 + source_index = None + + next_tainted = tainted_ranges.pop(0) if tainted_ranges else None + next_sensitive = sensitive.pop(0) if sensitive else None + i = 0 + while i < len(evidence_value): + if next_tainted and next_tainted["start"] == i: + self.write_value_part(value_parts, evidence_value[start:i], source_index) + + source_index = next_tainted_index + + while next_sensitive and self._contains(next_tainted, next_sensitive): + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + if redaction_start == redaction_end: + self.write_redacted_value_part(value_parts, 0) + else: + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + source_index, + redaction_start, + redaction_end, + ) + next_sensitive = sensitive.pop(0) if sensitive else None + + if next_sensitive and self._intersects(next_sensitive, next_tainted): + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + source_index, + redaction_start, + redaction_end, + ) + + entries = self._remove(next_sensitive, next_tainted) + next_sensitive = entries[0] if entries else None + + if source_index < len(sources): + if not sources[source_index].redacted and self.is_sensible_source(sources[source_index]): + redacted_sources.append(source_index) + sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)] + sources[source_index].redacted = True + + if source_index in redacted_sources: + part_value = evidence_value[i : i + (next_tainted["end"] - next_tainted["start"])] + + self.write_redacted_value_part( + value_parts, + len(part_value), + source_index, + part_value, + sources[source_index], + redacted_sources_context.get(source_index), + self.is_sensible_source(sources[source_index]), + ) + redacted_sources_context[source_index] = [] + else: + substring_end = min(next_tainted["end"], len(evidence_value)) + self.write_value_part( + value_parts, evidence_value[next_tainted["start"] : substring_end], source_index + ) + + start = i + (next_tainted["end"] - next_tainted["start"]) + i = start - 1 + next_tainted = tainted_ranges.pop(0) if tainted_ranges else None + next_tainted_index += 1 + source_index = None + continue + elif next_sensitive and next_sensitive["start"] == i: + self.write_value_part(value_parts, evidence_value[start:i], source_index) + if next_tainted and self._intersects(next_sensitive, next_tainted): + source_index = next_tainted_index + + redaction_start = next_sensitive["start"] - next_tainted["start"] + redaction_end = next_sensitive["end"] - next_tainted["start"] + self.redact_source( + sources, + redacted_sources, + redacted_sources_context, + next_tainted_index, + redaction_start, + redaction_end, + ) + + entries = self._remove(next_sensitive, next_tainted) + next_sensitive = entries[0] if entries else None + + length = next_sensitive["end"] - next_sensitive["start"] + self.write_redacted_value_part(value_parts, length) + + start = i + length + i = start - 1 + next_sensitive = sensitive.pop(0) if sensitive else None + continue + i += 1 + if start < len(evidence_value): + self.write_value_part(value_parts, evidence_value[start:]) + + return {"redacted_value_parts": value_parts, "redacted_sources": redacted_sources} + + def redact_source(self, sources, redacted_sources, redacted_sources_context, source_index, start, end): + if source_index is not None: + if not sources[source_index].redacted: + redacted_sources.append(source_index) + sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)] + sources[source_index].redacted = True + + if source_index not in redacted_sources_context.keys(): + redacted_sources_context[source_index] = [] + + redacted_sources_context[source_index].append({"start": start, "end": end}) + + def write_value_part(self, value_parts, value, source_index=None): + if value: + if source_index is not None: + value_parts.append({"value": value, "source": source_index}) + else: + value_parts.append({"value": value}) + + def write_redacted_value_part( + self, + value_parts, + length, + source_index=None, + part_value=None, + source=None, + source_redaction_context=None, + is_sensible_source=False, + ): + if source_index is not None: + placeholder = source.pattern if part_value and part_value in source.value else "*" * length + + if is_sensible_source: + value_parts.append({"redacted": True, "source": source_index, "pattern": placeholder}) + else: + _value = part_value + deduped_source_redaction_contexts = [] + + for _source_redaction_context in source_redaction_context: + if _source_redaction_context not in deduped_source_redaction_contexts: + deduped_source_redaction_contexts.append(_source_redaction_context) + + offset = 0 + for _source_redaction_context in deduped_source_redaction_contexts: + if _source_redaction_context["start"] > 0: + value_parts.append( + {"source": source_index, "value": _value[: _source_redaction_context["start"] - offset]} + ) + _value = _value[_source_redaction_context["start"] - offset :] + offset = _source_redaction_context["start"] + + sensitive_start = _source_redaction_context["start"] - offset + if sensitive_start < 0: + sensitive_start = 0 + sensitive = _value[sensitive_start : _source_redaction_context["end"] - offset] + index_of_part_value_in_pattern = source.value.find(sensitive) + pattern = ( + placeholder[index_of_part_value_in_pattern : index_of_part_value_in_pattern + len(sensitive)] + if index_of_part_value_in_pattern > -1 + else placeholder[_source_redaction_context["start"] : _source_redaction_context["end"]] + ) + + value_parts.append({"redacted": True, "source": source_index, "pattern": pattern}) + _value = _value[len(pattern) :] + offset += len(pattern) + if _value: + value_parts.append({"source": source_index, "value": _value}) + + else: + value_parts.append({"redacted": True}) + + def set_redaction_patterns(self, redaction_name_pattern=None, redaction_value_pattern=None): + if redaction_name_pattern: + try: + self._name_pattern = re.compile(redaction_name_pattern, re.IGNORECASE | re.MULTILINE) + except re.error: + log.warning("Redaction name pattern is not valid") + + if redaction_value_pattern: + try: + self._value_pattern = re.compile(redaction_value_pattern, re.IGNORECASE | re.MULTILINE) + except re.error: + log.warning("Redaction value pattern is not valid") + + +sensitive_handler = SensitiveHandler() diff --git a/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py new file mode 100644 index 00000000000..57dccc03db1 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py @@ -0,0 +1,19 @@ +import re + +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) + +_INSIDE_QUOTES_REGEXP = re.compile(r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s*(.*)") +COMMAND_PATTERN = r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s(.*)" +pattern = re.compile(COMMAND_PATTERN, re.IGNORECASE | re.MULTILINE) + + +def command_injection_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None): + regex_result = pattern.search(evidence.value) + if regex_result and len(regex_result.groups()) > 0: + start = regex_result.start(1) + end = regex_result.end(1) + return [{"start": start, "end": end}] + return [] diff --git a/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py new file mode 100644 index 00000000000..3b254781351 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py @@ -0,0 +1,17 @@ +from ddtrace.appsec._iast.constants import HEADER_NAME_VALUE_SEPARATOR +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) + + +def header_injection_sensitive_analyzer(evidence, name_pattern, value_pattern): + evidence_value = evidence.value + sections = evidence_value.split(HEADER_NAME_VALUE_SEPARATOR) + header_name = sections[0] + header_value = HEADER_NAME_VALUE_SEPARATOR.join(sections[1:]) + + if name_pattern.search(header_name) or value_pattern.search(header_value): + return [{"start": len(header_name) + len(HEADER_NAME_VALUE_SEPARATOR), "end": len(evidence_value)}] + + return [] diff --git a/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py new file mode 100644 index 00000000000..04ee4ecb6c8 --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py @@ -0,0 +1,34 @@ +import re + +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) +AUTHORITY = r"^(?:[^:]+:)?//([^@]+)@" +QUERY_FRAGMENT = r"[?#&]([^=&;]+)=([^?#&]+)" +pattern = re.compile(f"({AUTHORITY})|({QUERY_FRAGMENT})", re.IGNORECASE | re.MULTILINE) + + +def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None): + try: + ranges = [] + regex_result = pattern.search(evidence.value) + + while regex_result is not None: + if isinstance(regex_result.group(1), str): + end = regex_result.start() + (len(regex_result.group(0)) - 1) + start = end - len(regex_result.group(1)) + ranges.append({"start": start, "end": end}) + + if isinstance(regex_result.group(3), str): + end = regex_result.start() + len(regex_result.group(0)) + start = end - len(regex_result.group(3)) + ranges.append({"start": start, "end": end}) + + regex_result = pattern.search(evidence.value, regex_result.end()) + + return ranges + except Exception as e: + log.debug(e) + + return [] diff --git a/ddtrace/appsec/_iast/_taint_tracking/__init__.py b/ddtrace/appsec/_iast/_taint_tracking/__init__.py index 435420af933..b155e7c08a9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/__init__.py +++ b/ddtrace/appsec/_iast/_taint_tracking/__init__.py @@ -177,12 +177,15 @@ def get_tainted_ranges(pyobject: Any) -> Tuple: def taint_ranges_as_evidence_info(pyobject: Any) -> Tuple[List[Dict[str, Union[Any, int]]], List[Source]]: + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. value_parts = [] - sources = [] + sources = list() current_pos = 0 tainted_ranges = get_tainted_ranges(pyobject) if not len(tainted_ranges): - return ([{"value": pyobject}], []) + return ([{"value": pyobject}], list()) for _range in tainted_ranges: if _range.start > current_pos: @@ -192,7 +195,10 @@ def taint_ranges_as_evidence_info(pyobject: Any) -> Tuple[List[Dict[str, Union[A sources.append(_range.source) value_parts.append( - {"value": pyobject[_range.start : _range.start + _range.length], "source": sources.index(_range.source)} + { + "value": pyobject[_range.start : _range.start + _range.length], + "source": sources.index(_range.source), + } ) current_pos = _range.start + _range.length diff --git a/ddtrace/appsec/_iast/_utils.py b/ddtrace/appsec/_iast/_utils.py index e2e26e291fa..7272abb9016 100644 --- a/ddtrace/appsec/_iast/_utils.py +++ b/ddtrace/appsec/_iast/_utils.py @@ -1,11 +1,8 @@ -import json import re import string import sys from typing import TYPE_CHECKING # noqa:F401 -import attr - from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config @@ -41,6 +38,9 @@ def _is_iast_enabled(): def _has_to_scrub(s): # type: (str) -> bool + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. global _SOURCE_NAME_SCRUB global _SOURCE_VALUE_SCRUB global _SOURCE_NUMERAL_SCRUB @@ -58,6 +58,9 @@ def _has_to_scrub(s): # type: (str) -> bool def _is_numeric(s): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. global _SOURCE_NUMERAL_SCRUB if _SOURCE_NUMERAL_SCRUB is None: @@ -71,17 +74,26 @@ def _is_numeric(s): def _scrub(s, has_range=False): # type: (str, bool) -> str + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if has_range: return "".join([_REPLACEMENTS[i % _LEN_REPLACEMENTS] for i in range(len(s))]) return "*" * len(s) def _is_evidence_value_parts(value): # type: (Any) -> bool + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. return isinstance(value, (set, list)) def _scrub_get_tokens_positions(text, tokens): # type: (str, Set[str]) -> List[Tuple[int, int]] + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. token_positions = [] for token in tokens: @@ -93,20 +105,6 @@ def _scrub_get_tokens_positions(text, tokens): return token_positions -def _iast_report_to_str(data): - from ._taint_tracking import OriginType - from ._taint_tracking import origin_to_str - - class OriginTypeEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, OriginType): - # if the obj is uuid, we simply return the value of uuid - return origin_to_str(obj) - return json.JSONEncoder.default(self, obj) - - return json.dumps(attr.asdict(data, filter=lambda attr, x: x is not None), cls=OriginTypeEncoder) - - def _get_patched_code(module_path, module_name): # type: (str, str) -> str """ Print the patched code to stdout, for debugging purposes. diff --git a/ddtrace/appsec/_iast/constants.py b/ddtrace/appsec/_iast/constants.py index ff165af405f..17981bccbcc 100644 --- a/ddtrace/appsec/_iast/constants.py +++ b/ddtrace/appsec/_iast/constants.py @@ -25,6 +25,8 @@ EVIDENCE_HEADER_INJECTION = "HEADER_INJECTION" EVIDENCE_SSRF = "SSRF" +HEADER_NAME_VALUE_SEPARATOR = ": " + MD5_DEF = "md5" SHA1_DEF = "sha1" diff --git a/ddtrace/appsec/_iast/processor.py b/ddtrace/appsec/_iast/processor.py index 8deee2a1846..8d0adffdb90 100644 --- a/ddtrace/appsec/_iast/processor.py +++ b/ddtrace/appsec/_iast/processor.py @@ -16,6 +16,7 @@ from ._metrics import _set_span_tag_iast_executed_sink from ._metrics import _set_span_tag_iast_request_tainted from ._utils import _is_iast_enabled +from .reporter import IastSpanReporter if TYPE_CHECKING: # pragma: no cover @@ -75,14 +76,14 @@ def on_span_finish(self, span): return from ._taint_tracking import reset_context # noqa: F401 - from ._utils import _iast_report_to_str span.set_metric(IAST.ENABLED, 1.0) - data = core.get_item(IAST.CONTEXT_KEY, span=span) + report_data: IastSpanReporter = core.get_item(IAST.CONTEXT_KEY, span=span) # type: ignore - if data: - span.set_tag_str(IAST.JSON, _iast_report_to_str(data)) + if report_data: + report_data.build_and_scrub_value_parts() + span.set_tag_str(IAST.JSON, report_data._to_str()) _asm_manual_keep(span) _set_metric_iast_request_tainted() diff --git a/ddtrace/appsec/_iast/reporter.py b/ddtrace/appsec/_iast/reporter.py index 5a95aa1272d..fa2cc8ae96c 100644 --- a/ddtrace/appsec/_iast/reporter.py +++ b/ddtrace/appsec/_iast/reporter.py @@ -3,17 +3,23 @@ import operator import os from typing import TYPE_CHECKING +from typing import Any +from typing import Dict from typing import List from typing import Set +from typing import Tuple import zlib import attr +from ddtrace.appsec._iast._evidence_redaction import sensitive_handler +from ddtrace.appsec._iast.constants import VULN_INSECURE_HASHING_TYPE +from ddtrace.appsec._iast.constants import VULN_WEAK_CIPHER_TYPE +from ddtrace.appsec._iast.constants import VULN_WEAK_RANDOMNESS -if TYPE_CHECKING: - import Any # noqa:F401 - import Dict # noqa:F401 - import Optional # noqa:F401 + +if TYPE_CHECKING: # pragma: no cover + from typing import Optional # noqa:F401 def _only_if_true(value): @@ -23,9 +29,8 @@ def _only_if_true(value): @attr.s(eq=False, hash=False) class Evidence(object): value = attr.ib(type=str, default=None) # type: Optional[str] - pattern = attr.ib(type=str, default=None) # type: Optional[str] - valueParts = attr.ib(type=list, default=None) # type: Optional[List[Dict[str, Any]]] - redacted = attr.ib(type=bool, default=False, converter=_only_if_true) # type: bool + _ranges = attr.ib(type=dict, default={}) # type: Any + valueParts = attr.ib(type=list, default=None) # type: Any def _valueParts_hash(self): if not self.valueParts: @@ -40,15 +45,10 @@ def _valueParts_hash(self): return _hash def __hash__(self): - return hash((self.value, self.pattern, self._valueParts_hash(), self.redacted)) + return hash((self.value, self._valueParts_hash())) def __eq__(self, other): - return ( - self.value == other.value - and self.pattern == other.pattern - and self._valueParts_hash() == other._valueParts_hash() - and self.redacted == other.redacted - ) + return self.value == other.value and self._valueParts_hash() == other._valueParts_hash() @attr.s(eq=True, hash=True) @@ -69,7 +69,7 @@ def __attrs_post_init__(self): self.hash = zlib.crc32(repr(self).encode()) -@attr.s(eq=True, hash=True) +@attr.s(eq=True, hash=False) class Source(object): origin = attr.ib(type=str) # type: str name = attr.ib(type=str) # type: str @@ -77,11 +77,163 @@ class Source(object): value = attr.ib(type=str, default=None) # type: Optional[str] pattern = attr.ib(type=str, default=None) # type: Optional[str] + def __hash__(self): + """origin & name serve as hashes. This approach aims to mitigate false positives when searching for + identical sources in a list, especially when sources undergo changes. The provided example illustrates how + two sources with different attributes could actually represent the same source. For example: + Source(origin=, name='string1', redacted=False, value="password", pattern=None) + could be the same source as the one below: + Source(origin=, name='string1', redacted=True, value=None, pattern='ab') + :return: + """ + return hash((self.origin, self.name)) + @attr.s(eq=False, hash=False) class IastSpanReporter(object): + """ + Class representing an IAST span reporter. + """ + sources = attr.ib(type=List[Source], factory=list) # type: List[Source] vulnerabilities = attr.ib(type=Set[Vulnerability], factory=set) # type: Set[Vulnerability] + _evidences_with_no_sources = [VULN_INSECURE_HASHING_TYPE, VULN_WEAK_CIPHER_TYPE, VULN_WEAK_RANDOMNESS] - def __hash__(self): + def __hash__(self) -> int: + """ + Computes the hash value of the IAST span reporter. + + Returns: + - int: Hash value. + """ return reduce(operator.xor, (hash(obj) for obj in set(self.sources) | self.vulnerabilities)) + + def taint_ranges_as_evidence_info(self, pyobject: Any) -> Tuple[List[Source], List[Dict]]: + """ + Extracts tainted ranges as evidence information. + + Args: + - pyobject (Any): Python object. + + Returns: + - Tuple[Set[Source], List[Dict]]: Set of Source objects and list of tainted ranges as dictionaries. + """ + from ddtrace.appsec._iast._taint_tracking import get_tainted_ranges + + sources = list() + tainted_ranges = get_tainted_ranges(pyobject) + tainted_ranges_to_dict = list() + if not len(tainted_ranges): + return [], [] + + for _range in tainted_ranges: + source = Source(origin=_range.source.origin, name=_range.source.name, value=_range.source.value) + if source not in sources: + sources.append(source) + + tainted_ranges_to_dict.append( + {"start": _range.start, "end": _range.start + _range.length, "length": _range.length, "source": source} + ) + return sources, tainted_ranges_to_dict + + def add_ranges_to_evidence_and_extract_sources(self, vuln): + sources, tainted_ranges_to_dict = self.taint_ranges_as_evidence_info(vuln.evidence.value) + vuln.evidence._ranges = tainted_ranges_to_dict + for source in sources: + if source not in self.sources: + self.sources = self.sources + [source] + + def _get_source_index(self, sources: List[Source], source: Source) -> int: + i = 0 + for source_ in sources: + if hash(source_) == hash(source): + return i + i += 1 + return -1 + + def build_and_scrub_value_parts(self) -> Dict[str, Any]: + """ + Builds and scrubs value parts of vulnerabilities. + + Returns: + - Dict[str, Any]: Dictionary representation of the IAST span reporter. + """ + for vuln in self.vulnerabilities: + scrubbing_result = sensitive_handler.scrub_evidence( + vuln.type, vuln.evidence, vuln.evidence._ranges, self.sources + ) + if scrubbing_result: + redacted_value_parts = scrubbing_result["redacted_value_parts"] + redacted_sources = scrubbing_result["redacted_sources"] + i = 0 + for source in self.sources: + if i in redacted_sources: + source.value = None + vuln.evidence.valueParts = redacted_value_parts + vuln.evidence.value = None + elif vuln.evidence.value is not None and vuln.type not in self._evidences_with_no_sources: + vuln.evidence.valueParts = self.get_unredacted_value_parts( + vuln.evidence.value, vuln.evidence._ranges, self.sources + ) + vuln.evidence.value = None + return self._to_dict() + + def get_unredacted_value_parts(self, evidence_value: str, ranges: List[dict], sources: List[Any]) -> List[dict]: + """ + Gets unredacted value parts of evidence. + + Args: + - evidence_value (str): Evidence value. + - ranges (List[Dict]): List of tainted ranges. + - sources (List[Any]): List of sources. + + Returns: + - List[Dict]: List of unredacted value parts. + """ + value_parts = [] + from_index = 0 + + for range_ in ranges: + if from_index < range_["start"]: + value_parts.append({"value": evidence_value[from_index : range_["start"]]}) + + source_index = self._get_source_index(sources, range_["source"]) + + value_parts.append( + {"value": evidence_value[range_["start"] : range_["end"]], "source": source_index} # type: ignore[dict-item] + ) + + from_index = range_["end"] + + if from_index < len(evidence_value): + value_parts.append({"value": evidence_value[from_index:]}) + + return value_parts + + def _to_dict(self) -> Dict[str, Any]: + """ + Converts the IAST span reporter to a dictionary. + + Returns: + - Dict[str, Any]: Dictionary representation of the IAST span reporter. + """ + return attr.asdict(self, filter=lambda attr, x: x is not None and attr.name != "_ranges") + + def _to_str(self) -> str: + """ + Converts the IAST span reporter to a JSON string. + + Returns: + - str: JSON representation of the IAST span reporter. + """ + from ._taint_tracking import OriginType + from ._taint_tracking import origin_to_str + + class OriginTypeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, OriginType): + # if the obj is uuid, we simply return the value of uuid + return origin_to_str(obj) + return json.JSONEncoder.default(self, obj) + + return json.dumps(self._to_dict(), cls=OriginTypeEncoder) diff --git a/ddtrace/appsec/_iast/taint_sinks/_base.py b/ddtrace/appsec/_iast/taint_sinks/_base.py index 43dc1f5cb53..7cba289d644 100644 --- a/ddtrace/appsec/_iast/taint_sinks/_base.py +++ b/ddtrace/appsec/_iast/taint_sinks/_base.py @@ -19,7 +19,6 @@ from ..reporter import Evidence from ..reporter import IastSpanReporter from ..reporter import Location -from ..reporter import Source from ..reporter import Vulnerability @@ -89,35 +88,16 @@ def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_num line_number = -1 report = core.get_item(IAST.CONTEXT_KEY, span=span) + vulnerability = Vulnerability( + type=vulnerability_type, + evidence=evidence, + location=Location(path=file_name, line=line_number, spanId=span.span_id), + ) if report: - report.vulnerabilities.add( - Vulnerability( - type=vulnerability_type, - evidence=evidence, - location=Location(path=file_name, line=line_number, spanId=span.span_id), - ) - ) - + report.vulnerabilities.add(vulnerability) else: - report = IastSpanReporter( - vulnerabilities={ - Vulnerability( - type=vulnerability_type, - evidence=evidence, - location=Location(path=file_name, line=line_number, spanId=span.span_id), - ) - } - ) - if sources: - - def cast_value(value): - if isinstance(value, (bytes, bytearray)): - value_decoded = value.decode("utf-8") - else: - value_decoded = value - return value_decoded - - report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources] + report = IastSpanReporter(vulnerabilities={vulnerability}) + report.add_ranges_to_evidence_and_extract_sources(vulnerability) if getattr(cls, "redact_report", False): redacted_report = cls._redacted_report_cache.get( @@ -130,9 +110,10 @@ def cast_value(value): return True @classmethod - def report(cls, evidence_value="", sources=None): - # type: (Union[Text|List[Dict[str, Any]]], Optional[List[Source]]) -> None + def report(cls, evidence_value="", value_parts=None, sources=None): + # type: (Any, Any, Optional[List[Any]]) -> None """Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON""" + # TODO: type of evidence_value will be Text. We wait to finish the redaction refactor. if cls.acquire_quota(): if not tracer or not hasattr(tracer, "current_root_span"): log.debug( @@ -166,11 +147,12 @@ def report(cls, evidence_value="", sources=None): if not cls.is_not_reported(file_name, line_number): return - if _is_evidence_value_parts(evidence_value): - evidence = Evidence(valueParts=evidence_value) + # TODO: This function is deprecated, but we need to migrate all vulnerabilities first before deleting it + if _is_evidence_value_parts(evidence_value) or _is_evidence_value_parts(value_parts): + evidence = Evidence(value=evidence_value, valueParts=value_parts) # Evidence is a string in weak cipher, weak hash and weak randomness elif isinstance(evidence_value, (str, bytes, bytearray)): - evidence = Evidence(value=evidence_value) + evidence = Evidence(value=evidence_value) # type: ignore else: log.debug("Unexpected evidence_value type: %s", type(evidence_value)) evidence = Evidence(value="") @@ -184,11 +166,17 @@ def report(cls, evidence_value="", sources=None): @classmethod def _extract_sensitive_tokens(cls, report): # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. log.debug("Base class VulnerabilityBase._extract_sensitive_tokens called") return {} @classmethod def _get_vulnerability_text(cls, vulnerability): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if vulnerability and vulnerability.evidence.value is not None: return vulnerability.evidence.value @@ -209,6 +197,9 @@ def replace_tokens( vulns_to_tokens, has_range=False, ): + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. ret = vuln.evidence.value replaced = False @@ -222,10 +213,16 @@ def replace_tokens( def _custom_edit_valueparts(cls, vuln): # Subclasses could optionally implement this to add further processing to the # vulnerability valueParts + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. return @classmethod def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter + # TODO: This function is deprecated. + # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. if not asm_config._iast_redaction_enabled: return report @@ -239,8 +236,8 @@ def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter for source in report.sources: # Join them so we only run the regexps once for each source # joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(source.name) or _has_to_scrub(source.value): - scrubbed = _scrub(source.value, has_range=True) + if _has_to_scrub(source.name) or _has_to_scrub(source.value): # type: ignore + scrubbed = _scrub(source.value, has_range=True) # type: ignore already_scrubbed[source.value] = scrubbed source.redacted = True sources_values_to_scrubbed[source.value] = scrubbed @@ -252,8 +249,6 @@ def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter if vuln.evidence.value is not None: pattern, replaced = cls.replace_tokens(vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source")) if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True vuln.evidence.value = None if vuln.evidence.valueParts is None: diff --git a/ddtrace/appsec/_iast/taint_sinks/command_injection.py b/ddtrace/appsec/_iast/taint_sinks/command_injection.py index 0b11ffd12b0..8f123a2be4c 100644 --- a/ddtrace/appsec/_iast/taint_sinks/command_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/command_injection.py @@ -1,10 +1,7 @@ import os -import re import subprocess # nosec -from typing import TYPE_CHECKING # noqa:F401 -from typing import List # noqa:F401 -from typing import Set # noqa:F401 -from typing import Union # noqa:F401 +from typing import List +from typing import Union from ddtrace.contrib import trace_utils from ddtrace.internal import core @@ -14,30 +11,15 @@ from ..._constants import IAST_SPAN_TAGS from .. import oce from .._metrics import increment_iast_span_metric -from .._utils import _has_to_scrub -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_CMDI from ..constants import VULN_CMDI +from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase -from ._base import _check_positions_contained - - -if TYPE_CHECKING: - from typing import Any # noqa:F401 - from typing import Dict # noqa:F401 - - from ..reporter import IastSpanReporter # noqa:F401 - from ..reporter import Vulnerability # noqa:F401 log = get_logger(__name__) -_INSIDE_QUOTES_REGEXP = re.compile(r"^(?:\s*(?:sudo|doas)\s+)?\b\S+\b\s*(.*)") - -def get_version(): - # type: () -> str +def get_version() -> str: return "" @@ -61,8 +43,7 @@ def patch(): core.dispatch("exploit.prevention.ssrf.patch.urllib") -def unpatch(): - # type: () -> None +def unpatch() -> None: trace_utils.unwrap(os, "system") trace_utils.unwrap(os, "_spawnvef") trace_utils.unwrap(subprocess.Popen, "__init__") @@ -93,151 +74,29 @@ def _iast_cmdi_subprocess_init(wrapped, instance, args, kwargs): @oce.register class CommandInjection(VulnerabilityBase): vulnerability_type = VULN_CMDI - evidence_type = EVIDENCE_CMDI - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(CommandInjection, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text): - # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] - ret = {} # type: Dict[int, Dict[str, Any]] - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - ret[vuln_hash] = { - "tokens": set(_INSIDE_QUOTES_REGEXP.findall(text)), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter - if not asm_config._iast_redaction_enabled: - return report - - # See if there is a match on either any of the sources or value parts of the report - found = False - - for source in report.sources: - # Join them so we only run the regexps once for each source - joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(joined_fields): - found = True - break - - vulns_to_text = {} - - if not found: - # Check the evidence's value/s - for vuln in report.vulnerabilities: - vulnerability_text = cls._get_vulnerability_text(vuln) - if _has_to_scrub(vulnerability_text) or _INSIDE_QUOTES_REGEXP.match(vulnerability_text): - vulns_to_text[vuln] = vulnerability_text - found = True - break + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False + - if not found: - return report - - if not vulns_to_text: - vulns_to_text = {vuln: cls._get_vulnerability_text(vuln) for vuln in report.vulnerabilities} - - # If we're here, some potentially sensitive information was found, we delegate on - # the specific subclass the task of extracting the variable tokens (e.g. literals inside - # quotes for SQL Injection). Note that by just having one potentially sensitive match - # we need to then scrub all the tokens, thus why we do it in two steps instead of one - vulns_to_tokens = cls._extract_sensitive_tokens(vulns_to_text) - - if not vulns_to_tokens: - return report - - all_tokens = set() # type: Set[str] - for _, value_dict in vulns_to_tokens.items(): - all_tokens.update(value_dict["tokens"]) - - # Iterate over all the sources, if one of the tokens match it, redact it - for source in report.sources: - if source.name in "".join(all_tokens) or source.value in "".join(all_tokens): - source.pattern = _scrub(source.value, has_range=True) - source.redacted = True - source.value = None - - # Same for all the evidence values - try: - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - vuln_hash = hash(vuln) - if vuln.evidence.value is not None: - pattern, replaced = cls.replace_tokens( - vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source") - ) - if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True - vuln.evidence.value = None - elif vuln.evidence.valueParts is not None: - idx = 0 - new_value_parts = [] - for part in vuln.evidence.valueParts: - value = part["value"] - part_len = len(value) - part_start = idx - part_end = idx + part_len - pattern_list = [] - - for positions in vulns_to_tokens[vuln_hash]["token_positions"]: - if _check_positions_contained(positions, (part_start, part_end)): - part_scrub_start = max(positions[0] - idx, 0) - part_scrub_end = positions[1] - idx - pattern_list.append(value[:part_scrub_start] + "" + value[part_scrub_end:]) - if part.get("source", False) is not False: - source = report.sources[part["source"]] - if source.redacted: - part["redacted"] = source.redacted - part["pattern"] = source.pattern - del part["value"] - new_value_parts.append(part) - break - else: - part["value"] = "".join(pattern_list) - new_value_parts.append(part) - new_value_parts.append({"redacted": True}) - break - else: - new_value_parts.append(part) - pattern_list.append(value[part_start:part_end]) - break - - idx += part_len - vuln.evidence.valueParts = new_value_parts - except (ValueError, KeyError): - log.debug("an error occurred while redacting cmdi", exc_info=True) - return report - - -def _iast_report_cmdi(shell_args): - # type: (Union[str, List[str]]) -> None +def _iast_report_cmdi(shell_args: Union[str, List[str]]) -> None: report_cmdi = "" from .._metrics import _set_metric_iast_executed_sink - from .._taint_tracking import is_pyobject_tainted - from .._taint_tracking.aspects import join_aspect - - if isinstance(shell_args, (list, tuple)): - for arg in shell_args: - if is_pyobject_tainted(arg): - report_cmdi = join_aspect(" ".join, 1, " ", shell_args) - break - elif is_pyobject_tainted(shell_args): - report_cmdi = shell_args increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, CommandInjection.vulnerability_type) _set_metric_iast_executed_sink(CommandInjection.vulnerability_type) - if report_cmdi: - CommandInjection.report(evidence_value=report_cmdi) + + if AppSecIastSpanProcessor.is_span_analyzed() and CommandInjection.has_quota(): + from .._taint_tracking import is_pyobject_tainted + from .._taint_tracking.aspects import join_aspect + + if isinstance(shell_args, (list, tuple)): + for arg in shell_args: + if is_pyobject_tainted(arg): + report_cmdi = join_aspect(" ".join, 1, " ", shell_args) + break + elif is_pyobject_tainted(shell_args): + report_cmdi = shell_args + + if report_cmdi: + CommandInjection.report(evidence_value=report_cmdi) diff --git a/ddtrace/appsec/_iast/taint_sinks/header_injection.py b/ddtrace/appsec/_iast/taint_sinks/header_injection.py index 6444fec627e..1ce8a52d5e4 100644 --- a/ddtrace/appsec/_iast/taint_sinks/header_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/header_injection.py @@ -1,6 +1,4 @@ import re -from typing import Any -from typing import Dict from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config @@ -13,13 +11,9 @@ from .._patch import set_and_check_module_is_patched from .._patch import set_module_unpatched from .._patch import try_wrap_function_wrapper -from .._utils import _has_to_scrub -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_HEADER_INJECTION +from ..constants import HEADER_NAME_VALUE_SEPARATOR from ..constants import VULN_HEADER_INJECTION -from ..reporter import IastSpanReporter -from ..reporter import Vulnerability +from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase @@ -109,53 +103,9 @@ def _iast_h(wrapped, instance, args, kwargs): @oce.register class HeaderInjection(VulnerabilityBase): vulnerability_type = VULN_HEADER_INJECTION - evidence_type = EVIDENCE_HEADER_INJECTION - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(HeaderInjection, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text: Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]]: - ret = {} # type: Dict[int, Dict[str, Any]] - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - ret[vuln_hash] = { - "tokens": set(_HEADERS_NAME_REGEXP.findall(text) + _HEADERS_VALUE_REGEXP.findall(text)), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report: IastSpanReporter) -> IastSpanReporter: - """TODO: this algorithm is not working as expected, it needs to be fixed.""" - if not asm_config._iast_redaction_enabled: - return report - - try: - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - if vuln.type == VULN_HEADER_INJECTION: - scrub_the_following_elements = False - new_value_parts = [] - for value_part in vuln.evidence.valueParts: - if _HEADERS_VALUE_REGEXP.match(value_part["value"]) or scrub_the_following_elements: - value_part["pattern"] = _scrub(value_part["value"], has_range=True) - value_part["redacted"] = True - del value_part["value"] - elif _has_to_scrub(value_part["value"]) or _HEADERS_NAME_REGEXP.match(value_part["value"]): - scrub_the_following_elements = True - new_value_parts.append(value_part) - vuln.evidence.valueParts = new_value_parts - except (ValueError, KeyError): - log.debug("an error occurred while redacting cmdi", exc_info=True) - return report + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False def _iast_report_header_injection(headers_args) -> None: @@ -180,6 +130,7 @@ def _iast_report_header_injection(headers_args) -> None: increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, HeaderInjection.vulnerability_type) _set_metric_iast_executed_sink(HeaderInjection.vulnerability_type) - if is_pyobject_tainted(header_name) or is_pyobject_tainted(header_value): - header_evidence = add_aspect(add_aspect(header_name, ": "), header_value) - HeaderInjection.report(evidence_value=header_evidence) + if AppSecIastSpanProcessor.is_span_analyzed() and HeaderInjection.has_quota(): + if is_pyobject_tainted(header_name) or is_pyobject_tainted(header_value): + header_evidence = add_aspect(add_aspect(header_name, HEADER_NAME_VALUE_SEPARATOR), header_value) + HeaderInjection.report(evidence_value=header_evidence) diff --git a/ddtrace/appsec/_iast/taint_sinks/path_traversal.py b/ddtrace/appsec/_iast/taint_sinks/path_traversal.py index c7618000d05..e6fde3b40e2 100644 --- a/ddtrace/appsec/_iast/taint_sinks/path_traversal.py +++ b/ddtrace/appsec/_iast/taint_sinks/path_traversal.py @@ -8,7 +8,6 @@ from .._metrics import increment_iast_span_metric from .._patch import set_and_check_module_is_patched from .._patch import set_module_unpatched -from ..constants import EVIDENCE_PATH_TRAVERSAL from ..constants import VULN_PATH_TRAVERSAL from ..processor import AppSecIastSpanProcessor from ._base import VulnerabilityBase @@ -20,15 +19,6 @@ @oce.register class PathTraversal(VulnerabilityBase): vulnerability_type = VULN_PATH_TRAVERSAL - evidence_type = EVIDENCE_PATH_TRAVERSAL - - @classmethod - def report(cls, evidence_value=None, sources=None): - if isinstance(evidence_value, (str, bytes, bytearray)): - from .._taint_tracking import taint_ranges_as_evidence_info - - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(PathTraversal, cls).report(evidence_value=evidence_value, sources=sources) def get_version(): diff --git a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py index ee7bcfb2f8f..68d5a289c01 100644 --- a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py @@ -32,9 +32,10 @@ class SqlInjection(VulnerabilityBase): @classmethod def report(cls, evidence_value=None, sources=None): + value_parts = [] if isinstance(evidence_value, (str, bytes, bytearray)): - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(SqlInjection, cls).report(evidence_value=evidence_value, sources=sources) + value_parts, sources = taint_ranges_as_evidence_info(evidence_value) + super(SqlInjection, cls).report(evidence_value=evidence_value, value_parts=value_parts, sources=sources) @classmethod def _extract_sensitive_tokens(cls, vulns_to_text): diff --git a/ddtrace/appsec/_iast/taint_sinks/ssrf.py b/ddtrace/appsec/_iast/taint_sinks/ssrf.py index f114998605a..7a070cf5425 100644 --- a/ddtrace/appsec/_iast/taint_sinks/ssrf.py +++ b/ddtrace/appsec/_iast/taint_sinks/ssrf.py @@ -1,176 +1,33 @@ -import re -from typing import Callable # noqa:F401 -from typing import Dict # noqa:F401 -from typing import Set # noqa:F401 +from typing import Callable from ddtrace.internal.logger import get_logger -from ddtrace.settings.asm import config as asm_config from ..._constants import IAST_SPAN_TAGS from .. import oce from .._metrics import increment_iast_span_metric -from .._utils import _has_to_scrub -from .._utils import _is_iast_enabled -from .._utils import _scrub -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_SSRF from ..constants import VULN_SSRF -from ..constants import VULNERABILITY_TOKEN_TYPE from ..processor import AppSecIastSpanProcessor -from ..reporter import IastSpanReporter # noqa:F401 -from ..reporter import Vulnerability from ._base import VulnerabilityBase -from ._base import _check_positions_contained log = get_logger(__name__) -_AUTHORITY_REGEXP = re.compile(r"(?:\/\/([^:@\/]+)(?::([^@\/]+))?@).*") -_QUERY_FRAGMENT_REGEXP = re.compile(r"[?#&]([^=&;]+)=(?P[^?#&]+)") - - @oce.register class SSRF(VulnerabilityBase): vulnerability_type = VULN_SSRF - evidence_type = EVIDENCE_SSRF - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - if not _is_iast_enabled(): - return - - from .._taint_tracking import taint_ranges_as_evidence_info - - if isinstance(evidence_value, (str, bytes, bytearray)): - evidence_value, sources = taint_ranges_as_evidence_info(evidence_value) - super(SSRF, cls).report(evidence_value=evidence_value, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text: Dict[Vulnerability, str]) -> VULNERABILITY_TOKEN_TYPE: - ret = {} # type: VULNERABILITY_TOKEN_TYPE - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - authority = [] - authority_found = _AUTHORITY_REGEXP.findall(text) - if authority_found: - authority = list(authority_found[0]) - query = [value for param, value in _QUERY_FRAGMENT_REGEXP.findall(text)] - ret[vuln_hash] = { - "tokens": set(authority + query), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter - if not asm_config._iast_redaction_enabled: - return report - - # See if there is a match on either any of the sources or value parts of the report - found = False - - for source in report.sources: - # Join them so we only run the regexps once for each source - joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(joined_fields): - found = True - break - - vulns_to_text = {} - - if not found: - # Check the evidence's value/s - for vuln in report.vulnerabilities: - vulnerability_text = cls._get_vulnerability_text(vuln) - if _has_to_scrub(vulnerability_text) or _AUTHORITY_REGEXP.match(vulnerability_text): - vulns_to_text[vuln] = vulnerability_text - found = True - break - - if not found: - return report - - if not vulns_to_text: - vulns_to_text = {vuln: cls._get_vulnerability_text(vuln) for vuln in report.vulnerabilities} - - # If we're here, some potentially sensitive information was found, we delegate on - # the specific subclass the task of extracting the variable tokens (e.g. literals inside - # quotes for SQL Injection). Note that by just having one potentially sensitive match - # we need to then scrub all the tokens, thus why we do it in two steps instead of one - vulns_to_tokens = cls._extract_sensitive_tokens(vulns_to_text) - - if not vulns_to_tokens: - return report - - all_tokens = set() # type: Set[str] - for _, value_dict in vulns_to_tokens.items(): - all_tokens.update(value_dict["tokens"]) - - # Iterate over all the sources, if one of the tokens match it, redact it - for source in report.sources: - if source.name in "".join(all_tokens) or source.value in "".join(all_tokens): - source.pattern = _scrub(source.value, has_range=True) - source.redacted = True - source.value = None - - # Same for all the evidence values - for vuln in report.vulnerabilities: - # Use the initial hash directly as iteration key since the vuln itself will change - vuln_hash = hash(vuln) - if vuln.evidence.value is not None: - pattern, replaced = cls.replace_tokens(vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source")) - if replaced: - vuln.evidence.pattern = pattern - vuln.evidence.redacted = True - vuln.evidence.value = None - elif vuln.evidence.valueParts is not None: - idx = 0 - new_value_parts = [] - for part in vuln.evidence.valueParts: - value = part["value"] - part_len = len(value) - part_start = idx - part_end = idx + part_len - pattern_list = [] - - for positions in vulns_to_tokens[vuln_hash]["token_positions"]: - if _check_positions_contained(positions, (part_start, part_end)): - part_scrub_start = max(positions[0] - idx, 0) - part_scrub_end = positions[1] - idx - pattern_list.append(value[:part_scrub_start] + "" + value[part_scrub_end:]) - if part.get("source", False) is not False: - source = report.sources[part["source"]] - if source.redacted: - part["redacted"] = source.redacted - part["pattern"] = source.pattern - del part["value"] - new_value_parts.append(part) - break - else: - part["value"] = "".join(pattern_list) - new_value_parts.append(part) - new_value_parts.append({"redacted": True}) - break - else: - new_value_parts.append(part) - pattern_list.append(value[part_start:part_end]) - break - - idx += part_len - vuln.evidence.valueParts = new_value_parts - return report + # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate + # all vulnerabilities to use it first. + redact_report = False def _iast_report_ssrf(func: Callable, *args, **kwargs): - from .._metrics import _set_metric_iast_executed_sink - report_ssrf = kwargs.get("url", False) - increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SSRF.vulnerability_type) - _set_metric_iast_executed_sink(SSRF.vulnerability_type) if report_ssrf: + from .._metrics import _set_metric_iast_executed_sink + + _set_metric_iast_executed_sink(SSRF.vulnerability_type) + increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SSRF.vulnerability_type) if AppSecIastSpanProcessor.is_span_analyzed() and SSRF.has_quota(): try: from .._taint_tracking import is_pyobject_tainted diff --git a/ddtrace/appsec/_metrics.py b/ddtrace/appsec/_metrics.py index 28644978a0f..28d712cebf7 100644 --- a/ddtrace/appsec/_metrics.py +++ b/ddtrace/appsec/_metrics.py @@ -105,6 +105,21 @@ def _set_waf_request_metrics(*args): 1.0, tags=tags_request, ) + rasp = result["rasp"] + if rasp["called"]: + for t, n in [("eval", "rasp.rule.eval"), ("match", "rasp.rule.match"), ("timeout", "rasp.timeout")]: + for rule_type, value in rasp[t].items(): + if value: + telemetry.telemetry_writer.add_count_metric( + TELEMETRY_NAMESPACE_TAG_APPSEC, + n, + float(value), + tags=( + ("rule_type", rule_type), + ("waf_version", DDWAF_VERSION), + ), + ) + except Exception: log.warning("Error reporting ASM WAF requests metrics", exc_info=True) finally: diff --git a/ddtrace/appsec/_processor.py b/ddtrace/appsec/_processor.py index 4d98ab486b4..a3d0518b6a4 100644 --- a/ddtrace/appsec/_processor.py +++ b/ddtrace/appsec/_processor.py @@ -260,7 +260,12 @@ def waf_callable(custom_data=None, **kwargs): _asm_request_context.call_waf_callback({"REQUEST_HTTP_IP": None}) def _waf_action( - self, span: Span, ctx: ddwaf_context_capsule, custom_data: Optional[Dict[str, Any]] = None, **kwargs + self, + span: Span, + ctx: ddwaf_context_capsule, + custom_data: Optional[Dict[str, Any]] = None, + crop_trace: Optional[str] = None, + rule_type: Optional[str] = None, ) -> Optional[DDWaf_result]: """ Call the `WAF` with the given parameters. If `custom_data_names` is specified as @@ -327,7 +332,7 @@ def _waf_action( from ddtrace.appsec._exploit_prevention.stack_traces import report_stack stack_trace_id = parameters["stack_id"] - report_stack("exploit detected", span, kwargs.get("crop_trace"), stack_id=stack_trace_id) + report_stack("exploit detected", span, crop_trace, stack_id=stack_trace_id) for rule in waf_results.data: rule[EXPLOIT_PREVENTION.STACK_TRACE_ID] = stack_trace_id @@ -335,7 +340,11 @@ def _waf_action( log.debug("[DDAS-011-00] ASM In-App WAF returned: %s. Timeout %s", waf_results.data, waf_results.timeout) _asm_request_context.set_waf_telemetry_results( - self._ddwaf.info.version, bool(waf_results.data), bool(blocked), waf_results.timeout + self._ddwaf.info.version, + bool(waf_results.data), + bool(blocked), + waf_results.timeout, + rule_type, ) if blocked: core.set_item(WAF_CONTEXT_NAMES.BLOCKED, blocked, span=span) diff --git a/ddtrace/contrib/botocore/services/bedrock.py b/ddtrace/contrib/botocore/services/bedrock.py index 0e13fecbf2d..e0896833e1c 100644 --- a/ddtrace/contrib/botocore/services/bedrock.py +++ b/ddtrace/contrib/botocore/services/bedrock.py @@ -42,19 +42,16 @@ def read(self, amt=None): self._body.append(json.loads(body)) if self.__wrapped__.tell() == int(self.__wrapped__._content_length): formatted_response = _extract_text_and_response_reason(self._execution_ctx, self._body[0]) + model_provider = self._execution_ctx["model_provider"] + model_name = self._execution_ctx["model_name"] + should_set_choice_ids = model_provider == _COHERE and "embed" not in model_name core.dispatch( "botocore.bedrock.process_response", - [ - self._execution_ctx, - formatted_response, - None, - self._body[0], - self._execution_ctx["model_provider"] == _COHERE, - ], + [self._execution_ctx, formatted_response, None, self._body[0], should_set_choice_ids], ) return body except Exception: - core.dispatch("botocore.patched_bedrock_api_call.exception", [self._execution_context, sys.exc_info()]) + core.dispatch("botocore.patched_bedrock_api_call.exception", [self._execution_ctx, sys.exc_info()]) raise def readlines(self): @@ -64,15 +61,12 @@ def readlines(self): for line in lines: self._body.append(json.loads(line)) formatted_response = _extract_text_and_response_reason(self._execution_ctx, self._body[0]) + model_provider = self._execution_ctx["model_provider"] + model_name = self._execution_ctx["model_name"] + should_set_choice_ids = model_provider == _COHERE and "embed" not in model_name core.dispatch( "botocore.bedrock.process_response", - [ - self._execution_ctx, - formatted_response, - None, - self._body[0], - self._execution_ctx["model_provider"] == _COHERE, - ], + [self._execution_ctx, formatted_response, None, self._body[0], should_set_choice_ids], ) return lines except Exception: @@ -87,15 +81,14 @@ def __iter__(self): yield line metadata = _extract_streamed_response_metadata(self._execution_ctx, self._body) formatted_response = _extract_streamed_response(self._execution_ctx, self._body) + model_provider = self._execution_ctx["model_provider"] + model_name = self._execution_ctx["model_name"] + should_set_choice_ids = ( + model_provider == _COHERE and "is_finished" not in self._body[0] and "embed" not in model_name + ) core.dispatch( "botocore.bedrock.process_response", - [ - self._execution_ctx, - formatted_response, - metadata, - self._body, - self._execution_ctx["model_provider"] == _COHERE and "is_finished" not in self._body[0], - ], + [self._execution_ctx, formatted_response, metadata, self._body, should_set_choice_ids], ) except Exception: core.dispatch("botocore.patched_bedrock_api_call.exception", [self._execution_ctx, sys.exc_info()]) @@ -107,6 +100,7 @@ def _extract_request_params(params: Dict[str, Any], provider: str) -> Dict[str, Extracts request parameters including prompt, temperature, top_p, max_tokens, and stop_sequences. """ request_body = json.loads(params.get("body")) + model_id = params.get("modelId") if provider == _AI21: return { "prompt": request_body.get("prompt"), @@ -115,6 +109,8 @@ def _extract_request_params(params: Dict[str, Any], provider: str) -> Dict[str, "max_tokens": request_body.get("maxTokens", ""), "stop_sequences": request_body.get("stopSequences", []), } + elif provider == _AMAZON and "embed" in model_id: + return {"prompt": request_body.get("inputText")} elif provider == _AMAZON: text_generation_config = request_body.get("textGenerationConfig", {}) return { @@ -135,6 +131,12 @@ def _extract_request_params(params: Dict[str, Any], provider: str) -> Dict[str, "max_tokens": request_body.get("max_tokens_to_sample", ""), "stop_sequences": request_body.get("stop_sequences", []), } + elif provider == _COHERE and "embed" in model_id: + return { + "prompt": request_body.get("texts"), + "input_type": request_body.get("input_type", ""), + "truncate": request_body.get("truncate", ""), + } elif provider == _COHERE: return { "prompt": request_body.get("prompt"), @@ -161,17 +163,22 @@ def _extract_request_params(params: Dict[str, Any], provider: str) -> Dict[str, def _extract_text_and_response_reason(ctx: core.ExecutionContext, body: Dict[str, Any]) -> Dict[str, List[str]]: text, finish_reason = "", "" + model_name = ctx["model_name"] provider = ctx["model_provider"] try: if provider == _AI21: text = body.get("completions")[0].get("data").get("text") finish_reason = body.get("completions")[0].get("finishReason") + elif provider == _AMAZON and "embed" in model_name: + text = [body.get("embedding", [])] elif provider == _AMAZON: text = body.get("results")[0].get("outputText") finish_reason = body.get("results")[0].get("completionReason") elif provider == _ANTHROPIC: text = body.get("completion", "") or body.get("content", "") finish_reason = body.get("stop_reason") + elif provider == _COHERE and "embed" in model_name: + text = body.get("embeddings", [[]]) elif provider == _COHERE: text = [generation["text"] for generation in body.get("generations")] finish_reason = [generation["finish_reason"] for generation in body.get("generations")] @@ -194,10 +201,13 @@ def _extract_text_and_response_reason(ctx: core.ExecutionContext, body: Dict[str def _extract_streamed_response(ctx: core.ExecutionContext, streamed_body: List[Dict[str, Any]]) -> Dict[str, List[str]]: text, finish_reason = "", "" + model_name = ctx["model_name"] provider = ctx["model_provider"] try: if provider == _AI21: - pass # note: ai21 does not support streamed responses + pass # DEV: ai21 does not support streamed responses + elif provider == _AMAZON and "embed" in model_name: + pass # DEV: amazon embed models do not support streamed responses elif provider == _AMAZON: text = "".join([chunk["outputText"] for chunk in streamed_body]) finish_reason = streamed_body[-1]["completionReason"] @@ -211,7 +221,9 @@ def _extract_streamed_response(ctx: core.ExecutionContext, streamed_body: List[D text += chunk["delta"].get("text", "") if "stop_reason" in chunk["delta"]: finish_reason = str(chunk["delta"]["stop_reason"]) - elif provider == _COHERE and streamed_body: + elif provider == _COHERE and "embed" in model_name: + pass # DEV: cohere embed models do not support streamed responses + elif provider == _COHERE: if "is_finished" in streamed_body[0]: # streamed response if "index" in streamed_body[0]: # n >= 2 num_generations = int(ctx.get_item("num_generations") or 0) @@ -230,8 +242,7 @@ def _extract_streamed_response(ctx: core.ExecutionContext, streamed_body: List[D text = "".join([chunk["generation"] for chunk in streamed_body]) finish_reason = streamed_body[-1]["stop_reason"] elif provider == _STABILITY: - # We do not yet support image modality models - pass + pass # DEV: we do not yet support image modality models except (IndexError, AttributeError): log.warning("Unable to extract text/finish_reason from response body. Defaulting to empty text/finish_reason.") @@ -306,7 +317,7 @@ def patched_bedrock_api_call(original_func, instance, args, kwargs, function_var span_name=function_vars.get("trace_operation"), service=schematize_service_name("{}.{}".format(pin.service, function_vars.get("endpoint_name"))), resource=function_vars.get("operation"), - span_type=SpanTypes.LLM, + span_type=SpanTypes.LLM if "embed" not in model_name else None, call_key="instrumented_bedrock_call", call_trace=True, bedrock_integration=function_vars.get("integration"), diff --git a/ddtrace/internal/flare.py b/ddtrace/internal/flare.py index 9a11223b221..7cf850e7656 100644 --- a/ddtrace/internal/flare.py +++ b/ddtrace/internal/flare.py @@ -1,4 +1,5 @@ import binascii +import dataclasses import io import json import logging @@ -7,9 +8,7 @@ import pathlib import shutil import tarfile -from typing import Any from typing import Dict -from typing import List from typing import Optional from typing import Tuple @@ -19,7 +18,7 @@ from ddtrace.internal.utils.http import get_connection -TRACER_FLARE_DIRECTORY = pathlib.Path("tracer_flare") +TRACER_FLARE_DIRECTORY = "tracer_flare" TRACER_FLARE_TAR = pathlib.Path("tracer_flare.tar") TRACER_FLARE_ENDPOINT = "/tracer_flare/v1" TRACER_FLARE_FILE_HANDLER_NAME = "tracer_flare_file_handler" @@ -29,111 +28,99 @@ log = get_logger(__name__) +@dataclasses.dataclass +class FlareSendRequest: + case_id: str + hostname: str + email: str + source: str = "tracer_python" + + class Flare: - def __init__(self, timeout_sec: int = DEFAULT_TIMEOUT_SECONDS): - self.original_log_level = 0 # NOTSET - self.timeout = timeout_sec + def __init__(self, timeout_sec: int = DEFAULT_TIMEOUT_SECONDS, flare_dir: str = TRACER_FLARE_DIRECTORY): + self.original_log_level: int = logging.NOTSET + self.timeout: int = timeout_sec + self.flare_dir: pathlib.Path = pathlib.Path(flare_dir) self.file_handler: Optional[RotatingFileHandler] = None - def prepare(self, configs: List[dict]): + def prepare(self, log_level: str): """ Update configurations to start sending tracer logs to a file to be sent in a flare later. """ - if not os.path.exists(TRACER_FLARE_DIRECTORY): - try: - os.makedirs(TRACER_FLARE_DIRECTORY) - log.info("Tracer logs will now be sent to the %s directory", TRACER_FLARE_DIRECTORY) - except Exception as e: - log.error("Failed to create %s directory: %s", TRACER_FLARE_DIRECTORY, e) - return - for agent_config in configs: - # AGENT_CONFIG is currently being used for multiple purposes - # We only want to prepare for a tracer flare if the config name - # starts with 'flare-log-level' - if not agent_config.get("name", "").startswith("flare-log-level"): - return + try: + self.flare_dir.mkdir(exist_ok=True) + except Exception as e: + log.error("Failed to create %s directory: %s", self.flare_dir, e) + return + + flare_log_level_int = logging.getLevelName(log_level) + if type(flare_log_level_int) != int: + raise TypeError("Invalid log level provided: %s", log_level) - # Validate the flare log level - flare_log_level = agent_config.get("config", {}).get("log_level").upper() - flare_log_level_int = logging.getLevelName(flare_log_level) - if type(flare_log_level_int) != int: - raise TypeError("Invalid log level provided: %s", flare_log_level_int) - - ddlogger = get_logger("ddtrace") - pid = os.getpid() - flare_file_path = TRACER_FLARE_DIRECTORY / pathlib.Path(f"tracer_python_{pid}.log") - self.original_log_level = ddlogger.level - - # Set the logger level to the more verbose between original and flare - # We do this valid_original_level check because if the log level is NOTSET, the value is 0 - # which is the minimum value. In this case, we just want to use the flare level, but still - # retain the original state as NOTSET/0 - valid_original_level = 100 if self.original_log_level == 0 else self.original_log_level - logger_level = min(valid_original_level, flare_log_level_int) - ddlogger.setLevel(logger_level) - self.file_handler = _add_file_handler( - ddlogger, flare_file_path.__str__(), flare_log_level, TRACER_FLARE_FILE_HANDLER_NAME - ) - - # Create and add config file - self._generate_config_file(pid) - - def send(self, configs: List[Any]): + ddlogger = get_logger("ddtrace") + pid = os.getpid() + flare_file_path = self.flare_dir / f"tracer_python_{pid}.log" + self.original_log_level = ddlogger.level + + # Set the logger level to the more verbose between original and flare + # We do this valid_original_level check because if the log level is NOTSET, the value is 0 + # which is the minimum value. In this case, we just want to use the flare level, but still + # retain the original state as NOTSET/0 + valid_original_level = ( + logging.CRITICAL if self.original_log_level == logging.NOTSET else self.original_log_level + ) + logger_level = min(valid_original_level, flare_log_level_int) + ddlogger.setLevel(logger_level) + self.file_handler = _add_file_handler( + ddlogger, flare_file_path.__str__(), flare_log_level_int, TRACER_FLARE_FILE_HANDLER_NAME + ) + + # Create and add config file + self._generate_config_file(pid) + + def send(self, flare_send_req: FlareSendRequest): """ Revert tracer flare configurations back to original state before sending the flare. """ - for agent_task in configs: - # AGENT_TASK is currently being used for multiple purposes - # We only want to generate the tracer flare if the task_type is - # 'tracer_flare' - if type(agent_task) != dict or agent_task.get("task_type") != "tracer_flare": - continue - args = agent_task.get("args", {}) - - self.revert_configs() - - # We only want the flare to be sent once, even if there are - # multiple tracer instances - lock_path = TRACER_FLARE_DIRECTORY / TRACER_FLARE_LOCK - if not os.path.exists(lock_path): - try: - open(lock_path, "w").close() - except Exception as e: - log.error("Failed to create %s file", lock_path) - raise e - data = { - "case_id": args.get("case_id"), - "source": "tracer_python", - "hostname": args.get("hostname"), - "email": args.get("user_handle"), - } - try: - client = get_connection(config._trace_agent_url, timeout=self.timeout) - headers, body = self._generate_payload(data) - client.request("POST", TRACER_FLARE_ENDPOINT, body, headers) - response = client.getresponse() - if response.status == 200: - log.info("Successfully sent the flare") - else: - log.error( - "Upload failed with %s status code:(%s) %s", - response.status, - response.reason, - response.read().decode(), - ) - except Exception as e: - log.error("Failed to send tracer flare") - raise e - finally: - client.close() - # Clean up files regardless of success/failure - self.clean_up_files() - return + self.revert_configs() + + # We only want the flare to be sent once, even if there are + # multiple tracer instances + lock_path = self.flare_dir / TRACER_FLARE_LOCK + if not os.path.exists(lock_path): + try: + open(lock_path, "w").close() + except Exception as e: + log.error("Failed to create %s file", lock_path) + raise e + try: + client = get_connection(config._trace_agent_url, timeout=self.timeout) + headers, body = self._generate_payload(flare_send_req.__dict__) + client.request("POST", TRACER_FLARE_ENDPOINT, body, headers) + response = client.getresponse() + if response.status == 200: + log.info("Successfully sent the flare to Zendesk ticket %s", flare_send_req.case_id) + else: + log.error( + "Tracer flare upload to Zendesk ticket %s failed with %s status code:(%s) %s", + flare_send_req.case_id, + response.status, + response.reason, + response.read().decode(), + ) + except Exception as e: + log.error("Failed to send tracer flare to Zendesk ticket %s", flare_send_req.case_id) + raise e + finally: + client.close() + # Clean up files regardless of success/failure + self.clean_up_files() + return def _generate_config_file(self, pid: int): - config_file = TRACER_FLARE_DIRECTORY / pathlib.Path(f"tracer_config_{pid}.json") + config_file = self.flare_dir / f"tracer_config_{pid}.json" try: with open(config_file, "w") as f: tracer_configs = { @@ -162,8 +149,7 @@ def revert_configs(self): def _generate_payload(self, params: Dict[str, str]) -> Tuple[dict, bytes]: tar_stream = io.BytesIO() with tarfile.open(fileobj=tar_stream, mode="w") as tar: - for file_name in os.listdir(TRACER_FLARE_DIRECTORY): - flare_file_name = TRACER_FLARE_DIRECTORY / pathlib.Path(file_name) + for flare_file_name in self.flare_dir.iterdir(): tar.add(flare_file_name) tar_stream.seek(0) @@ -197,6 +183,6 @@ def _get_valid_logger_level(self, flare_log_level: int) -> int: def clean_up_files(self): try: - shutil.rmtree(TRACER_FLARE_DIRECTORY) + shutil.rmtree(self.flare_dir) except Exception as e: log.warning("Failed to clean up tracer flare files: %s", e) diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index ddbef347ef4..2d8f1c5fd1e 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -190,8 +190,8 @@ def _third_party_packages() -> set: return ( set(decompress(read_binary("ddtrace.internal", "third-party.tar.gz")).decode("utf-8").splitlines()) - | tp_config.excludes - ) - tp_config.includes + | tp_config.includes + ) - tp_config.excludes @cached() diff --git a/ddtrace/settings/third_party.py b/ddtrace/settings/third_party.py index 83a7c1ef567..3416be1d524 100644 --- a/ddtrace/settings/third_party.py +++ b/ddtrace/settings/third_party.py @@ -7,14 +7,14 @@ class ThirdPartyDetectionConfig(En): excludes = En.v( set, "excludes", - help="Additional packages to treat as third-party", + help="List of packages that should not be treated as third-party", help_type="List", default=set(), ) includes = En.v( set, "includes", - help="List of packages that should not be treated as third-party", + help="Additional packages to treat as third-party", help_type="List", default=set(), ) diff --git a/releasenotes/notes/feat-bedrock-embedding-d44ac603bdb83a7b.yaml b/releasenotes/notes/feat-bedrock-embedding-d44ac603bdb83a7b.yaml new file mode 100644 index 00000000000..db857b82469 --- /dev/null +++ b/releasenotes/notes/feat-bedrock-embedding-d44ac603bdb83a7b.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + botocore: This introduces tracing support for bedrock-runtime embedding operations. diff --git a/tests/appsec/contrib_appsec/utils.py b/tests/appsec/contrib_appsec/utils.py index dae02eb8f21..1a193b47a04 100644 --- a/tests/appsec/contrib_appsec/utils.py +++ b/tests/appsec/contrib_appsec/utils.py @@ -1186,8 +1186,11 @@ def test_stream_response( def test_exploit_prevention( self, interface, root_span, get_tag, asm_enabled, ep_enabled, endpoint, parameters, rule, top_functions ): + from unittest.mock import patch as mock_patch + from ddtrace.appsec._common_module_patches import patch_common_modules from ddtrace.appsec._common_module_patches import unpatch_common_modules + from ddtrace.appsec._metrics import DDWAF_VERSION from ddtrace.contrib.requests import patch as patch_requests from ddtrace.contrib.requests import unpatch as unpatch_requests from ddtrace.ext import http @@ -1196,7 +1199,7 @@ def test_exploit_prevention( patch_requests() with override_global_config(dict(_asm_enabled=asm_enabled, _ep_enabled=ep_enabled)), override_env( dict(DD_APPSEC_RULES=rules.RULES_EXPLOIT_PREVENTION) - ): + ), mock_patch("ddtrace.internal.telemetry.metrics_namespaces.MetricNamespace.add_metric") as mocked: patch_common_modules() self.update_tracer(interface) response = interface.client.get(f"/rasp/{endpoint}/?{parameters}") @@ -1212,6 +1215,20 @@ def test_exploit_prevention( assert any( function.endswith(top_function) for top_function in top_functions ), f"unknown top function {function}" + # assert mocked.call_args_list == [] + telemetry_calls = { + (c.__name__, f"{ns}.{nm}", t): v for (c, ns, nm, v, t), _ in mocked.call_args_list + } + assert ( + "CountMetric", + "appsec.rasp.rule.match", + (("rule_type", endpoint), ("waf_version", DDWAF_VERSION)), + ) in telemetry_calls + assert ( + "CountMetric", + "appsec.rasp.rule.eval", + (("rule_type", endpoint), ("waf_version", DDWAF_VERSION)), + ) in telemetry_calls else: assert get_triggers(root_span()) is None assert self.check_for_stack_trace(root_span) == [] diff --git a/tests/appsec/iast/taint_sinks/test_command_injection.py b/tests/appsec/iast/taint_sinks/test_command_injection.py index 394a1a5ef4d..0100756dd41 100644 --- a/tests/appsec/iast/taint_sinks/test_command_injection.py +++ b/tests/appsec/iast/taint_sinks/test_command_injection.py @@ -40,12 +40,11 @@ def setup(): def test_ossystem(tracer, iast_span_defaults): with override_global_config(dict(_iast_enabled=True)): patch() - _BAD_DIR = "forbidden_dir/" + _BAD_DIR = "mytest/folder/" _BAD_DIR = taint_pyobject( pyobject=_BAD_DIR, source_name="test_ossystem", source_value=_BAD_DIR, - source_origin=OriginType.PARAMETER, ) assert is_pyobject_tainted(_BAD_DIR) with tracer.trace("ossystem_test"): @@ -54,26 +53,26 @@ def test_ossystem(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + data = span_report.build_and_scrub_value_parts() + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_ossystem" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_ossystem" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_ossystem", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_communicate(tracer, iast_span_defaults): @@ -94,26 +93,27 @@ def test_communicate(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_communicate" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_communicate" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_communicate", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_run(tracer, iast_span_defaults): @@ -132,26 +132,27 @@ def test_run(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_run" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_run" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_run", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_popen_wait(tracer, iast_span_defaults): @@ -171,26 +172,27 @@ def test_popen_wait(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_popen_wait" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_popen_wait" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_popen_wait", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_popen_wait_shell_true(tracer, iast_span_defaults): @@ -210,26 +212,27 @@ def test_popen_wait_shell_true(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ {"value": "dir "}, {"redacted": True}, {"pattern": "abcdefghijklmn", "redacted": True, "source": 0}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_popen_wait_shell_true" - assert source.origin == OriginType.PARAMETER - assert source.value is None + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_popen_wait_shell_true" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_popen_wait_shell_true", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -275,22 +278,23 @@ def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments, span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [{"value": "/bin/ls -l "}, {"source": 0, "value": _BAD_DIR}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_osspawn_variants" - assert source.origin == OriginType.PARAMETER - assert source.value == _BAD_DIR + data = span_report.build_and_scrub_value_parts() + + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [{"value": "/bin/ls -l "}, {"source": 0, "value": _BAD_DIR}] + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() + assert source["name"] == "test_osspawn_variants" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == _BAD_DIR line, hash_value = get_line_and_hash(tag, VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -315,8 +319,9 @@ def test_multiple_cmdi(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - assert len(list(span_report.vulnerabilities)) == 2 + assert len(list(data["vulnerabilities"])) == 2 @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux") @@ -334,8 +339,9 @@ def test_string_cmdi(tracer, iast_span_defaults): span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - assert len(list(span_report.vulnerabilities)) == 1 + assert len(list(data["vulnerabilities"])) == 1 @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) @@ -360,5 +366,5 @@ def test_cmdi_deduplication(num_vuln_expected, tracer, iast_span_deduplication_e assert span_report is None else: assert span_report - - assert len(span_report.vulnerabilities) == num_vuln_expected + data = span_report.build_and_scrub_value_parts() + assert len(data["vulnerabilities"]) == num_vuln_expected diff --git a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py index 27cd030b219..4cb6a962c7d 100644 --- a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py @@ -2,12 +2,14 @@ import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_CMDI from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.command_injection import CommandInjection from ddtrace.internal import core @@ -36,10 +38,14 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_ span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_CMDI + assert source == sources_expected @pytest.mark.parametrize( @@ -72,24 +78,52 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_ "/mytest/../folder/file.txt", ], ) -def test_cmdi_redact_rel_paths(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, - {"value": "ls "}, - {"value": file_path, "source": 0}, +def test_cmdi_redact_rel_paths_and_sudo(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("sudo ", add_aspect("ls ", file_path))) + loc = Location(path="foobar.py", line=35, spanId=123) + v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "sudo ls "}, + {"redacted": True, "pattern": ANY, "source": 0}, ] - ) + + +@pytest.mark.parametrize( + "file_path", + [ + "2 > /mytest/folder/", + "2 > mytest/folder/", + "-p mytest/folder", + "--path=../mytest/folder/", + "--path=../mytest/folder/", + "--options ../mytest/folder", + "-a /mytest/folder/", + "-b /mytest/folder/", + "-c /mytest/folder", + ], +) +def test_cmdi_redact_sudo_command_with_options(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("sudo ", add_aspect("ls ", file_path))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="file", name="SomeName", value=file_path) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "sudo "}, - {"value": "ls "}, + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "sudo ls "}, {"redacted": True, "pattern": ANY, "source": 0}, ] @@ -108,24 +142,69 @@ def test_cmdi_redact_rel_paths(file_path): "-c /mytest/folder", ], ) -def test_cmdi_redact_options(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, +def test_cmdi_redact_command_with_options(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("ls ", file_path)) + loc = Location(path="foobar.py", line=35, spanId=123) + v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "ls "}, - {"value": file_path, "source": 0}, + {"redacted": True, "pattern": ANY, "source": 0}, ] - ) + + +@pytest.mark.parametrize( + "file_path", + [ + "/mytest/folder/", + "mytest/folder/", + "mytest/folder", + "../mytest/folder/", + "../mytest/folder/", + "../mytest/folder", + "/mytest/folder/", + "/mytest/folder/", + "/mytest/folder", + "/mytest/../folder/", + "mytest/../folder/", + "mytest/../folder", + "../mytest/../folder/", + "../mytest/../folder/", + "../mytest/../folder", + "/mytest/../folder/", + "/mytest/../folder/", + "/mytest/../folder", + "/mytest/folder/file.txt", + "mytest/folder/file.txt", + "../mytest/folder/file.txt", + "/mytest/folder/file.txt", + "mytest/../folder/file.txt", + "../mytest/../folder/file.txt", + "/mytest/../folder/file.txt", + ], +) +def test_cmdi_redact_rel_paths(file_path): + file_path = taint_pyobject(pyobject=file_path, source_name="test_ossystem", source_value=file_path) + ev = Evidence(value=add_aspect("dir -l ", file_path)) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="file", name="SomeName", value=file_path) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "sudo "}, - {"value": "ls "}, + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "dir "}, + {"redacted": True}, {"redacted": True, "pattern": ANY, "source": 0}, ] @@ -145,23 +224,19 @@ def test_cmdi_redact_options(file_path): ], ) def test_cmdi_redact_source_command(file_path): - ev = Evidence( - valueParts=[ - {"value": "sudo "}, - {"value": "ls ", "source": 0}, - {"value": file_path}, - ] - ) + Ls_cmd = taint_pyobject(pyobject="ls ", source_name="test_ossystem", source_value="ls ") + + ev = Evidence(value=add_aspect("sudo ", add_aspect(Ls_cmd, file_path))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_CMDI, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = CommandInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "sudo "}, {"value": "ls ", "source": 0}, - {"value": " "}, {"redacted": True}, ] diff --git a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py index 6407406ef7b..db9272e1625 100644 --- a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py @@ -2,6 +2,7 @@ from ddtrace.appsec._constants import IAST from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin from ddtrace.appsec._iast.constants import VULN_HEADER_INJECTION from ddtrace.appsec._iast.reporter import Evidence @@ -13,7 +14,6 @@ from ddtrace.internal import core from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import _taint_pyobject_multiranges from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import get_parametrize -from tests.utils import override_global_config @pytest.mark.parametrize( @@ -34,7 +34,7 @@ def test_header_injection_redact_excluded(header_name, header_value): v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value=header_value) report = IastSpanReporter([s], {v}) - + report.add_ranges_to_evidence_and_extract_sources(v) redacted_report = HeaderInjection._redact_report(report) for v in redacted_report.vulnerabilities: assert v.evidence.valueParts == [{"value": header_name + ": "}, {"source": 0, "value": header_value}] @@ -46,10 +46,7 @@ def test_header_injection_redact_excluded(header_name, header_value): ( "WWW-Authenticate", 'Basic realm="api"', - [ - {"value": "WWW-Authenticate: "}, - {"pattern": "abcdefghijklmnopq", "redacted": True, "source": 0}, - ], + [{"value": "WWW-Authenticate: "}, {"source": 0, "value": 'Basic realm="api"'}], ), ( "Authorization", @@ -65,7 +62,7 @@ def test_header_injection_redact_excluded(header_name, header_value): ), ], ) -def test_header_injection_redact(header_name, header_value, value_part): +def test_common_django_header_injection_redact(header_name, header_value, value_part): ev = Evidence( valueParts=[ {"value": header_name + ": "}, @@ -76,13 +73,12 @@ def test_header_injection_redact(header_name, header_value, value_part): v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value=header_value) report = IastSpanReporter([s], {v}) - + report.add_ranges_to_evidence_and_extract_sources(v) redacted_report = HeaderInjection._redact_report(report) for v in redacted_report.vulnerabilities: assert v.evidence.valueParts == value_part -@pytest.mark.skip(reason="TODO: this algorithm is not working as expected, it needs to be fixed.") @pytest.mark.parametrize( "evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_HEADER_INJECTION)), @@ -90,29 +86,32 @@ def test_header_injection_redact(header_name, header_value, value_part): def test_header_injection_redaction_suite( evidence_input, sources_expected, vulnerabilities_expected, iast_span_defaults ): - with override_global_config(dict(_deduplication_enabled=False)): - tainted_object = _taint_pyobject_multiranges( - evidence_input["value"], - [ - ( - input_ranges["iinfo"]["parameterName"], - input_ranges["iinfo"]["parameterValue"], - str_to_origin(input_ranges["iinfo"]["type"]), - input_ranges["start"], - input_ranges["end"] - input_ranges["start"], - ) - for input_ranges in evidence_input["ranges"] - ], - ) + tainted_object = _taint_pyobject_multiranges( + evidence_input["value"], + [ + ( + input_ranges["iinfo"]["parameterName"], + input_ranges["iinfo"]["parameterValue"], + str_to_origin(input_ranges["iinfo"]["type"]), + input_ranges["start"], + input_ranges["end"] - input_ranges["start"], + ) + for input_ranges in evidence_input["ranges"] + ], + ) - assert is_pyobject_tainted(tainted_object) + assert is_pyobject_tainted(tainted_object) - HeaderInjection.report(tainted_object) + HeaderInjection.report(tainted_object) - span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - assert span_report + span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_HEADER_INJECTION - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_HEADER_INJECTION + assert source == sources_expected diff --git a/tests/appsec/iast/taint_sinks/test_insecure_cookie.py b/tests/appsec/iast/taint_sinks/test_insecure_cookie.py index 2a45778a89c..9d2784b3c49 100644 --- a/tests/appsec/iast/taint_sinks/test_insecure_cookie.py +++ b/tests/appsec/iast/taint_sinks/test_insecure_cookie.py @@ -1,7 +1,9 @@ +import json + +import attr import pytest from ddtrace.appsec._constants import IAST -from ddtrace.appsec._iast._utils import _iast_report_to_str from ddtrace.appsec._iast.constants import VULN_INSECURE_COOKIE from ddtrace.appsec._iast.constants import VULN_NO_HTTPONLY_COOKIE from ddtrace.appsec._iast.constants import VULN_NO_SAMESITE_COOKIE @@ -9,6 +11,20 @@ from ddtrace.internal import core +def _iast_report_to_str(data): + from ddtrace.appsec._iast._taint_tracking import OriginType + from ddtrace.appsec._iast._taint_tracking import origin_to_str + + class OriginTypeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, OriginType): + # if the obj is uuid, we simply return the value of uuid + return origin_to_str(obj) + return json.JSONEncoder.default(self, obj) + + return json.dumps(attr.asdict(data, filter=lambda attr, x: x is not None), cls=OriginTypeEncoder) + + def test_insecure_cookies(iast_span_defaults): cookies = {"foo": "bar"} asm_check_cookies(cookies) diff --git a/tests/appsec/iast/taint_sinks/test_path_traversal.py b/tests/appsec/iast/taint_sinks/test_path_traversal.py index 6a8083908ba..0dda76950e7 100644 --- a/tests/appsec/iast/taint_sinks/test_path_traversal.py +++ b/tests/appsec/iast/taint_sinks/test_path_traversal.py @@ -33,17 +33,20 @@ def test_path_traversal_open(iast_span_defaults): ) mod.pt_open(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert len(span_report.vulnerabilities) == 1 - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert source.name == "path" - assert source.origin == OriginType.PATH - assert source.value == file_path - assert vulnerability.evidence.valueParts == [{"source": 0, "value": file_path}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None + assert span_report + data = span_report.build_and_scrub_value_parts() + + assert len(data["vulnerabilities"]) == 1 + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert source["name"] == "path" + assert source["origin"] == OriginType.PATH + assert source["value"] == file_path + assert vulnerability["evidence"]["valueParts"] == [{"source": 0, "value": file_path}] + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None @pytest.mark.parametrize( @@ -82,19 +85,22 @@ def test_path_traversal(module, function, iast_span_defaults): getattr(mod, "path_{}_{}".format(module, function))(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + assert span_report + data = span_report.build_and_scrub_value_parts() + line, hash_value = get_line_and_hash( "path_{}_{}".format(module, function), VULN_PATH_TRAVERSAL, filename=FIXTURES_PATH ) - vulnerability = list(span_report.vulnerabilities)[0] - assert len(span_report.vulnerabilities) == 1 - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value - assert vulnerability.evidence.valueParts == [{"source": 0, "value": file_path}] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None + vulnerability = data["vulnerabilities"][0] + assert len(data["vulnerabilities"]) == 1 + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value + assert vulnerability["evidence"]["valueParts"] == [{"source": 0, "value": file_path}] + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection.py b/tests/appsec/iast/taint_sinks/test_sql_injection.py index 62252cc7808..54efea82ffe 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection.py @@ -53,8 +53,6 @@ def test_sql_injection(fixture_path, fixture_module, iast_span_defaults): {"value": "students", "source": 0}, ] assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None assert source.name == "test_ossystem" assert source.origin == OriginType.PARAMETER assert source.value == "students" diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py index 4d936854caf..4122b53d402 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py @@ -1,9 +1,6 @@ -import copy - import pytest from ddtrace.appsec._constants import IAST -from ddtrace.appsec._iast import oce from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted from ddtrace.appsec._iast._taint_tracking import str_to_origin from ddtrace.appsec._iast.constants import VULN_SQL_INJECTION @@ -12,13 +9,10 @@ from ddtrace.appsec._iast.reporter import Location from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability -from ddtrace.appsec._iast.taint_sinks._base import VulnerabilityBase from ddtrace.appsec._iast.taint_sinks.sql_injection import SqlInjection from ddtrace.internal import core -from ddtrace.internal.utils.cache import LFUCache from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import _taint_pyobject_multiranges from tests.appsec.iast.taint_sinks.test_taint_sinks_utils import get_parametrize -from tests.utils import override_env from tests.utils import override_global_config @@ -103,7 +97,6 @@ def test_redacted_report_no_match(): def test_redacted_report_source_name_match(): ev = Evidence(value="'SomeEvidenceValue'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="secret", value="SomeValue") @@ -111,14 +104,11 @@ def test_redacted_report_source_name_match(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value def test_redacted_report_source_value_match(): ev = Evidence(value="'SomeEvidenceValue'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value="somepassword") @@ -126,14 +116,11 @@ def test_redacted_report_source_value_match(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value def test_redacted_report_evidence_value_match_also_redacts_source_value(): ev = Evidence(value="'SomeSecretPassword'") - len_ev = len(ev.value) - 2 loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) s = Source(origin="SomeOrigin", name="SomeName", value="SomeSecretPassword") @@ -141,8 +128,6 @@ def test_redacted_report_evidence_value_match_also_redacts_source_value(): redacted_report = SqlInjection._redact_report(report) for v in redacted_report.vulnerabilities: - assert v.evidence.redacted - assert v.evidence.pattern == "'%s'" % ("*" * len_ev) assert not v.evidence.value for s in redacted_report.sources: assert s.redacted @@ -250,122 +235,3 @@ def test_regression_ci_failure(): {"redacted": True}, {"value": "'"}, ] - - -def test_scrub_cache(tracer): - valueParts1 = [ - {"value": "SELECT * FROM users WHERE password = '"}, - {"value": "1234", "source": 0}, - {"value": ":{SHA1}'"}, - ] - # valueParts will be modified to be scrubbed, thus these copies - valueParts1_copy1 = copy.deepcopy(valueParts1) - valueParts1_copy2 = copy.deepcopy(valueParts1) - valueParts1_copy3 = copy.deepcopy(valueParts1) - valueParts2 = [ - {"value": "SELECT * FROM users WHERE password = '"}, - {"value": "123456", "source": 0}, - {"value": ":{SHA1}'"}, - ] - - s1 = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - s2 = Source(origin="SomeOtherOrigin", name="SomeName", value="SomeValue") - - env = {"DD_IAST_REQUEST_SAMPLING": "100", "DD_IAST_ENABLED": "true"} - with override_env(env): - oce.reconfigure() - with tracer.trace("test1") as span: - oce.acquire_request(span) - VulnerabilityBase._redacted_report_cache = LFUCache() - SqlInjection.report(evidence_value=valueParts1, sources=[s1]) - span_report1 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert span_report1, "no report: check that get_info_frame is not skipping this frame" - assert list(span_report1.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert len(VulnerabilityBase._redacted_report_cache) == 1 - oce.release_request() - - # Should be the same report object - with tracer.trace("test2") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy1, sources=[s1]) - span_report2 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report2.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) == id(span_report2) - assert span_report1 is span_report2 - assert len(VulnerabilityBase._redacted_report_cache) == 1 - oce.release_request() - - # Different report, other valueParts - with tracer.trace("test3") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts2, sources=[s1]) - span_report3 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report3.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report3) - assert span_report1 is not span_report3 - assert len(VulnerabilityBase._redacted_report_cache) == 2 - oce.release_request() - - # Different report, other source - with tracer.trace("test4") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy2, sources=[s2]) - span_report4 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report4.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report4) - assert span_report1 is not span_report4 - assert len(VulnerabilityBase._redacted_report_cache) == 3 - oce.release_request() - - # Same as previous so cache should not increase - with tracer.trace("test4") as span: - oce.acquire_request(span) - SqlInjection.report(evidence_value=valueParts1_copy3, sources=[s2]) - span_report5 = core.get_item(IAST.CONTEXT_KEY, span=span) - assert list(span_report5.vulnerabilities)[0].evidence == Evidence( - value=None, - pattern=None, - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ], - ) - assert id(span_report1) != id(span_report5) - assert span_report1 is not span_report5 - assert id(span_report4) == id(span_report5) - assert span_report4 is span_report5 - assert len(VulnerabilityBase._redacted_report_cache) == 3 - oce.release_request() diff --git a/tests/appsec/iast/taint_sinks/test_ssrf.py b/tests/appsec/iast/taint_sinks/test_ssrf.py index 25e133830ec..49053f0b07b 100644 --- a/tests/appsec/iast/taint_sinks/test_ssrf.py +++ b/tests/appsec/iast/taint_sinks/test_ssrf.py @@ -39,25 +39,26 @@ def test_ssrf(tracer, iast_span_defaults): pass span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report + data = span_report.build_and_scrub_value_parts() - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_SSRF - assert vulnerability.evidence.valueParts == [ + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_SSRF + assert vulnerability["evidence"]["valueParts"] == [ {"value": "http://localhost/"}, {"source": 0, "value": tainted_path}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_ssrf" - assert source.origin == OriginType.PARAMETER - assert source.value == tainted_path + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_ssrf" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == tainted_path line, hash_value = get_line_and_hash("test_ssrf", VULN_SSRF, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) diff --git a/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py b/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py index ca43fcb5112..aa329cb551e 100644 --- a/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_ssrf_redacted.py @@ -3,12 +3,14 @@ import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_SSRF from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.ssrf import SSRF from ddtrace.internal import core @@ -45,58 +47,72 @@ def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_ span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_SSRF - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_SSRF + assert source == sources_expected -def test_cmdi_redact_param(): +def test_ssrf_redact_param(): + password_taint_range = taint_pyobject(pyobject="test1234", source_name="password", source_value="test1234") + ev = Evidence( - valueParts=[ - {"value": "https://www.domain1.com/?id="}, - {"value": "test1234", "source": 0}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, - ] + value=add_aspect( + "https://www.domain1.com/?id=", + add_aspect(password_taint_range, "¶m2=value2¶m3=value3¶m3=value3"), + ) ) + loc = Location(path="foobar.py", line=35, spanId=123) - v = Vulnerability(type="VulnerabilityType", evidence=ev, location=loc) - s = Source(origin="http.request.parameter.name", name="password", value="test1234") - report = IastSpanReporter([s], {v}) - - redacted_report = SSRF._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "https://www.domain1.com/?id="}, + v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ + {"value": "https://www.domain1.com/"}, + {"redacted": True}, {"pattern": "abcdefgh", "redacted": True, "source": 0}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, + {"redacted": True}, + {"redacted": True}, + {"redacted": True}, ] def test_cmdi_redact_user_password(): + user_taint_range = taint_pyobject(pyobject="root", source_name="username", source_value="root") + password_taint_range = taint_pyobject( + pyobject="superpasswordsecure", source_name="password", source_value="superpasswordsecure" + ) + ev = Evidence( - valueParts=[ - {"value": "https://"}, - {"value": "root", "source": 0}, - {"value": ":"}, - {"value": "superpasswordsecure", "source": 1}, - {"value": "@domain1.com/?id="}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, - ] + value=add_aspect( + "https://", + add_aspect( + add_aspect(add_aspect(user_taint_range, ":"), password_taint_range), + "@domain1.com/?id=¶m2=value2¶m3=value3¶m3=value3", + ), + ) ) + loc = Location(path="foobar.py", line=35, spanId=123) - v = Vulnerability(type="VulnerabilityType", evidence=ev, location=loc) - s1 = Source(origin="http.request.parameter.name", name="username", value="root") - s2 = Source(origin="http.request.parameter.name", name="password", value="superpasswordsecure") - report = IastSpanReporter([s1, s2], {v}) - - redacted_report = SSRF._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ + v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + for v in result["vulnerabilities"]: + assert v["evidence"]["valueParts"] == [ {"value": "https://"}, {"pattern": "abcd", "redacted": True, "source": 0}, {"value": ":"}, - {"source": 1, "value": "superpasswordsecure"}, - {"value": "@domain1.com/?id="}, - {"value": "¶m2=value2¶m3=value3¶m3=value3"}, + {"pattern": "abcdefghijklmnopqrs", "redacted": True, "source": 1}, + {"value": "@domain1.com/?id=¶m2=value2¶m3=value3¶m3=value3"}, ] diff --git a/tests/appsec/iast/taint_sinks/test_weak_randomness.py b/tests/appsec/iast/taint_sinks/test_weak_randomness.py index 602834accb2..f8aa0ab1a71 100644 --- a/tests/appsec/iast/taint_sinks/test_weak_randomness.py +++ b/tests/appsec/iast/taint_sinks/test_weak_randomness.py @@ -39,8 +39,6 @@ def test_weak_randomness(random_func, iast_span_defaults): assert vulnerability.hash == hash_value assert vulnerability.evidence.value == "Random.{}".format(random_func) assert vulnerability.evidence.valueParts is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None @pytest.mark.skipif(WEEK_RANDOMNESS_PY_VERSION, reason="Some random methods exists on 3.9 or higher") @@ -73,8 +71,6 @@ def test_weak_randomness_module(random_func, iast_span_defaults): assert vulnerability.hash == hash_value assert vulnerability.evidence.value == "Random.{}".format(random_func) assert vulnerability.evidence.valueParts is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None @pytest.mark.skipif(WEEK_RANDOMNESS_PY_VERSION, reason="Some random methods exists on 3.9 or higher") diff --git a/tests/appsec/iast/test_iast_propagation_path.py b/tests/appsec/iast/test_iast_propagation_path.py index 5456daf540d..9637b692501 100644 --- a/tests/appsec/iast/test_iast_propagation_path.py +++ b/tests/appsec/iast/test_iast_propagation_path.py @@ -13,18 +13,18 @@ FIXTURES_PATH = "tests/appsec/iast/fixtures/propagation_path.py" -def _assert_vulnerability(span_report, value_parts, file_line_label): - vulnerability = list(span_report.vulnerabilities)[0] - assert vulnerability.type == VULN_PATH_TRAVERSAL - assert vulnerability.evidence.valueParts == value_parts - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None +def _assert_vulnerability(data, value_parts, file_line_label): + vulnerability = data["vulnerabilities"][0] + assert vulnerability["type"] == VULN_PATH_TRAVERSAL + assert vulnerability["evidence"]["valueParts"] == value_parts + assert "value" not in vulnerability["evidence"].keys() + assert "pattern" not in vulnerability["evidence"].keys() + assert "redacted" not in vulnerability["evidence"].keys() line, hash_value = get_line_and_hash(file_line_label, VULN_PATH_TRAVERSAL, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value def test_propagation_no_path(iast_span_defaults): @@ -55,19 +55,22 @@ def test_propagation_path_1_origin_1_propagation(origin1, iast_span_defaults): mod.propagation_path_1_source_1_prop(tainted_string) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - source = span_report.sources[0] + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] source_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert source.name == "path" - assert source.origin == OriginType.PATH - assert source.value == source_value_encoded + assert len(sources) == 1 + assert sources[0]["name"] == "path" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source_value_encoded value_parts = [ {"value": ANY}, {"source": 0, "value": source_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_1_source_1_prop") + _assert_vulnerability(data, value_parts, "propagation_path_1_source_1_prop") @pytest.mark.parametrize( @@ -87,12 +90,15 @@ def test_propagation_path_1_origins_2_propagations(origin1, iast_span_defaults): mod.propagation_path_1_source_2_prop(tainted_string_1) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - sources = span_report.sources + assert len(sources) == 1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == value_encoded value_parts = [ {"value": ANY}, @@ -100,14 +106,14 @@ def test_propagation_path_1_origins_2_propagations(origin1, iast_span_defaults): {"source": 0, "value": value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_1_source_2_prop") + _assert_vulnerability(data, value_parts, "propagation_path_1_source_2_prop") @pytest.mark.parametrize( "origin1, origin2", [ ("taintsource1", "taintsource2"), - ("taintsource", "taintsource"), + # ("taintsource", "taintsource"), TODO: invalid source pos ("1", "1"), (b"taintsource1", "taintsource2"), (b"taintsource1", b"taintsource2"), @@ -130,35 +136,37 @@ def test_propagation_path_2_origins_2_propagations(origin1, origin2, iast_span_d mod.propagation_path_2_source_2_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] - sources = span_report.sources assert len(sources) == 2 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded source2_value_encoded = str(origin2, encoding="utf-8") if type(origin2) is not str else origin2 - assert sources[1].name == "path2" - assert sources[1].origin == OriginType.PARAMETER - assert sources[1].value == source2_value_encoded - + assert sources[1]["name"] == "path2" + assert sources[1]["origin"] == OriginType.PARAMETER + assert sources[1]["value"] == source2_value_encoded value_parts = [ {"value": ANY}, {"source": 0, "value": source1_value_encoded}, {"source": 1, "value": source2_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_2_source_2_prop") + _assert_vulnerability(data, value_parts, "propagation_path_2_source_2_prop") @pytest.mark.parametrize( "origin1, origin2", [ ("taintsource1", "taintsource2"), - ("taintsource", "taintsource"), + # ("taintsource", "taintsource"), TODO: invalid source pos ("1", "1"), (b"taintsource1", "taintsource2"), + # (b"taintsource", "taintsource"), TODO: invalid source pos (b"taintsource1", b"taintsource2"), ("taintsource1", b"taintsource2"), (bytearray(b"taintsource1"), "taintsource2"), @@ -179,18 +187,20 @@ def test_propagation_path_2_origins_3_propagation(origin1, origin2, iast_span_de mod.propagation_path_3_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] - sources = span_report.sources assert len(sources) == 2 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded source2_value_encoded = str(origin2, encoding="utf-8") if type(origin2) is not str else origin2 - assert sources[1].name == "path2" - assert sources[1].origin == OriginType.PARAMETER - assert sources[1].value == source2_value_encoded + assert sources[1]["name"] == "path2" + assert sources[1]["origin"] == OriginType.PARAMETER + assert sources[1]["value"] == source2_value_encoded value_parts = [ {"value": ANY}, @@ -204,7 +214,7 @@ def test_propagation_path_2_origins_3_propagation(origin1, origin2, iast_span_de {"source": 1, "value": source2_value_encoded}, {"value": ".txt"}, ] - _assert_vulnerability(span_report, value_parts, "propagation_path_3_prop") + _assert_vulnerability(data, value_parts, "propagation_path_3_prop") @pytest.mark.parametrize( @@ -233,13 +243,14 @@ def test_propagation_path_2_origins_5_propagation(origin1, origin2, iast_span_de mod.propagation_path_5_prop(tainted_string_1, tainted_string_2) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) - - sources = span_report.sources + span_report.build_and_scrub_value_parts() + data = span_report._to_dict() + sources = data["sources"] assert len(sources) == 1 source1_value_encoded = str(origin1, encoding="utf-8") if type(origin1) is not str else origin1 - assert sources[0].name == "path1" - assert sources[0].origin == OriginType.PATH - assert sources[0].value == source1_value_encoded + assert sources[0]["name"] == "path1" + assert sources[0]["origin"] == OriginType.PATH + assert sources[0]["value"] == source1_value_encoded value_parts = [{"value": ANY}, {"source": 0, "value": "aint"}, {"value": ".txt"}] - _assert_vulnerability(span_report, value_parts, "propagation_path_5_prop") + _assert_vulnerability(data, value_parts, "propagation_path_5_prop") diff --git a/tests/appsec/integrations/test_langchain.py b/tests/appsec/integrations/test_langchain.py index d1e86e6ab68..325bfe670d5 100644 --- a/tests/appsec/integrations/test_langchain.py +++ b/tests/appsec/integrations/test_langchain.py @@ -33,21 +33,23 @@ def test_openai_llm_appsec_iast_cmdi(iast_span_defaults): # noqa: F811 span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_CMDI - assert vulnerability.evidence.valueParts == [ - {"value": "echo Hello World", "source": 0}, + data = span_report.build_and_scrub_value_parts() + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_CMDI + assert vulnerability["evidence"]["valueParts"] == [ + {"source": 0, "value": "echo "}, + {"pattern": "", "redacted": True, "source": 0}, + {"source": 0, "value": "Hello World"}, ] - assert vulnerability.evidence.value is None - assert vulnerability.evidence.pattern is None - assert vulnerability.evidence.redacted is None - assert source.name == "test_openai_llm_appsec_iast_cmdi" - assert source.origin == OriginType.PARAMETER - assert source.value == string_to_taint + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_openai_llm_appsec_iast_cmdi" + assert source["origin"] == OriginType.PARAMETER + assert "value" not in source.keys() line, hash_value = get_line_and_hash("test_openai_llm_appsec_iast_cmdi", VULN_CMDI, filename=FIXTURES_PATH) - assert vulnerability.location.path == FIXTURES_PATH - assert vulnerability.location.line == line - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value diff --git a/tests/contrib/botocore/bedrock_cassettes/amazon_embedding.yaml b/tests/contrib/botocore/bedrock_cassettes/amazon_embedding.yaml new file mode 100644 index 00000000000..b6cdf04f298 --- /dev/null +++ b/tests/contrib/botocore/bedrock_cassettes/amazon_embedding.yaml @@ -0,0 +1,44 @@ +interactions: +- request: + body: '{"inputText": "Hello World!"}' + headers: + Content-Length: + - '29' + User-Agent: + - !!binary | + Qm90bzMvMS4zNC40OSBtZC9Cb3RvY29yZSMxLjM0LjQ5IHVhLzIuMCBvcy9tYWNvcyMyMy40LjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEwLjUgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl + dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzQuNDk= + X-Amz-Date: + - !!binary | + MjAyNDA0MjNUMjA1NzAzWg== + amz-sdk-invocation-id: + - !!binary | + ZTUyMjJhZGQtNGI3My00YjM4LThhZmEtZTkxNmI1NmJkZTky + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke + response: + body: + string: '{"embedding":[0.45703125,0.30078125,0.41210938,0.41015625,0.74609375,0.3828125,-0.018798828,-0.0010681152,0.515625,-0.25195312,-0.20898438,-0.15332031,-0.22460938,0.13671875,-0.34375,0.037109375,-0.03491211,-0.29882812,-0.875,0.21289062,-0.39648438,0.26171875,-0.49609375,-0.15527344,-0.014465332,0.25,-0.1796875,-0.46875,0.82421875,0.1796875,-0.34960938,1.2265625,-0.079589844,-0.3046875,-0.93359375,0.3828125,-0.031982422,-0.74609375,-0.49023438,0.546875,0.080078125,0.328125,-0.1484375,-0.30078125,0.54296875,0.58203125,-0.1875,-0.17773438,0.59375,1.0859375,-0.20703125,0.99609375,-0.25195312,0.6875,-0.10839844,-0.11035156,0.66015625,-0.12695312,-0.0079956055,-0.37695312,-0.038085938,-0.34570312,-0.62890625,0.94921875,0.12597656,0.125,-0.12109375,0.3515625,-0.17285156,0.6328125,0.41210938,0.48828125,0.19140625,0.6640625,-0.67578125,0.23730469,0.80859375,-0.16796875,0.43164062,0.27148438,0.42578125,0.06298828,-0.3984375,-0.27148438,-0.64453125,-0.04638672,0.19726562,-0.095214844,1.3542175E-4,-0.98828125,-0.048828125,-0.25,1.0078125,-0.087402344,-0.49609375,0.359375,-0.9765625,-0.5078125,0.06933594,-0.24511719,0.9921875,0.1875,0.08984375,0.36132812,0.14355469,0.52734375,-0.08251953,0.78125,0.09863281,0.49414062,0.24316406,-0.6796875,0.875,0.051757812,0.42382812,0.11425781,0.69140625,-1.0546875,-0.19824219,-0.3984375,0.18359375,-0.14648438,0.6015625,0.51171875,0.2734375,0.2265625,0.19824219,-0.640625,0.14648438,0.072265625,0.48828125,0.23730469,1.3125,-0.1484375,-0.25195312,-0.15527344,-0.37695312,0.26757812,-0.55078125,-0.98828125,-0.03112793,-0.26367188,0.19238281,0.375,0.0146484375,0.037109375,-0.53515625,0.26953125,0.16796875,-0.44921875,0.4453125,0.21679688,0.26367188,-0.48046875,0.734375,0.3671875,0.3359375,0.3203125,-0.171875,-0.0703125,0.20800781,0.62109375,-1.1328125,0.025390625,-0.52734375,-0.41601562,-0.6796875,0.3671875,-0.21679688,-0.54296875,1.1875,0.21679688,-0.34179688,0.12060547,-0.24804688,0.057373047,0.27734375,0.20800781,0.016235352,0.51953125,0.515625,-0.2421875,0.24023438,-0.890625,-0.27539062,-0.095703125,-0.58203125,-0.7578125,0.055908203,0.95703125,-0.11328125,0.2421875,0.20800781,-0.024414062,-0.328125,0.45507812,-0.13769531,0.34375,-0.16601562,-0.26757812,-0.15039062,0.546875,0.38671875,0.009399414,-0.11767578,-0.21191406,-0.071777344,-0.040283203,-0.42773438,0.06347656,-0.111328125,-0.14648438,-0.32226562,-1.171875,0.6484375,-0.5,0.10107422,-0.20507812,0.16796875,-0.022949219,0.30273438,-0.96484375,0.30664062,0.45898438,-0.31445312,0.31054688,0.53125,0.37304688,-0.08154297,0.6015625,-0.6171875,0.296875,-0.63671875,0.48242188,0.087402344,0.30859375,-0.6484375,-0.10498047,-0.42578125,0.62890625,0.33007812,-0.42382812,-0.24121094,-0.17480469,0.10498047,0.3984375,0.28710938,0.27929688,-0.064453125,-0.17480469,-0.15039062,0.546875,-0.39257812,-0.2734375,-0.32421875,-0.025268555,-0.51953125,-0.38867188,-0.3359375,-0.09033203,0.24511719,0.056152344,0.40039062,-0.51953125,0.203125,-0.65625,0.13671875,0.44140625,-0.064453125,-1.1171875,1.3256073E-4,0.11230469,0.41601562,-0.18066406,-0.19238281,0.02722168,-1.25,0.76171875,0.19238281,-0.4375,0.359375,-0.20507812,0.84765625,-0.41601562,-0.25390625,-1.609375,-0.28125,0.3984375,0.05053711,0.080078125,-0.609375,-0.107910156,-0.64453125,0.21484375,0.8046875,0.30273438,0.43554688,0.17578125,-0.37109375,0.12890625,-0.18066406,0.25976562,0.6015625,0.2734375,0.24609375,0.15136719,0.18945312,0.08935547,-0.35546875,0.3671875,-0.39257812,-0.095214844,0.47070312,-0.22753906,0.84375,-0.14160156,-0.103515625,0.18847656,-0.2734375,-0.1953125,-0.56640625,-0.734375,-0.79296875,0.05053711,0.103515625,-0.2890625,-0.21191406,-0.6484375,0.122558594,-0.42382812,-0.042236328,0.3125,-0.014343262,0.25195312,0.08251953,-0.80859375,-0.46875,0.92578125,0.53515625,-0.86328125,-0.39257812,-0.18261719,0.90234375,-0.484375,-0.02746582,-0.31835938,-0.083984375,0.24414062,-0.5625,-0.032958984,0.4140625,-0.26953125,-0.51171875,0.13085938,0.84375,-0.8984375,-0.13867188,-0.44726562,-0.29101562,-0.09423828,0.49023438,-0.2890625,0.07470703,-0.65625,-0.453125,-0.09765625,-0.50390625,-0.74609375,0.049072266,0.6171875,-0.515625,1.7265625,0.15039062,-0.28710938,-0.0010757446,0.30273438,-0.20410156,0.63671875,-0.45703125,-0.6796875,0.9765625,-1.0078125,-0.24511719,-0.17382812,-0.49023438,0.76953125,-0.359375,-1.1015625,-1.3125,-0.3984375,0.15039062,0.2265625,0.14648438,-0.03491211,0.09814453,0.08886719,0.83984375,-0.33203125,-0.34960938,-0.20898438,-0.20410156,0.072753906,0.42382812,-1.453125,0.65234375,-0.625,-1.0,-0.14257812,0.2578125,-0.1953125,-0.030639648,0.4375,-0.115234375,0.26757812,0.36523438,-0.36523438,0.26367188,0.0067443848,-0.578125,-1.140625,-0.39257812,0.47070312,0.5859375,0.029785156,1.03125,-0.52734375,0.6640625,-0.09472656,-0.69921875,0.5078125,-0.2890625,-0.27734375,0.66796875,0.578125,0.453125,0.3046875,-0.2890625,0.34179688,-0.41992188,-0.22460938,0.7421875,0.024414062,-0.22265625,-0.265625,-0.35546875,-0.05859375,-0.7890625,-0.14257812,-0.39257812,0.3515625,0.07519531,-0.29296875,-0.43554688,-0.71875,0.15625,0.084472656,-0.07373047,0.38476562,0.4609375,-0.13964844,0.17773438,-0.3828125,-0.06738281,-0.50390625,-0.375,-0.56640625,1.0546875,0.54296875,-0.80078125,-0.11425781,-0.7578125,0.13671875,0.30273438,0.09765625,-0.34765625,-0.0061950684,-0.05883789,0.61328125,-0.3359375,0.21972656,0.53515625,0.4140625,0.37695312,-1.2265625,0.203125,-0.578125,-0.28125,-0.17871094,-0.546875,-0.703125,-0.07080078,1.7578125,-0.6171875,-0.81640625,-0.34765625,0.1328125,-2.6512146E-4,-0.19140625,-0.016723633,0.703125,0.22851562,-0.029541016,-0.7265625,0.038085938,-0.44921875,0.29101562,-1.1640625,0.40234375,-0.3671875,-0.09863281,1.125,0.16992188,-0.82421875,0.48828125,0.110839844,0.65625,0.51953125,0.20800781,-0.625,0.22949219,0.057861328,0.16210938,0.57421875,0.265625,-0.8203125,1.1796875,-0.6875,-0.32421875,-1.2421875,-0.14550781,-0.0013809204,-0.43945312,-0.35742188,0.07861328,0.8984375,-0.040283203,-0.07421875,0.43554688,-0.984375,-0.38085938,-0.14746094,-0.5078125,-1.1015625,0.94921875,-0.5234375,0.063964844,-0.38867188,-0.26171875,-0.48242188,0.075683594,0.43554688,-0.36914062,0.21972656,0.018188477,0.053466797,0.92578125,-0.62109375,0.44335938,-0.5859375,-0.59375,0.25585938,-0.20898438,-0.31445312,-0.48632812,0.14355469,-0.19238281,-0.15234375,0.41015625,0.07324219,0.025756836,-0.375,-0.19140625,0.9921875,0.27734375,0.33398438,0.93359375,0.5078125,-0.515625,0.58203125,0.43164062,0.42382812,-0.29296875,0.5625,-0.51171875,-0.6328125,0.5625,0.1015625,0.18359375,0.26757812,-0.765625,-0.29101562,-0.77734375,0.546875,-0.79296875,0.19921875,0.65234375,0.23730469,-1.25,-0.73828125,-0.35351562,0.28125,0.15039062,0.49414062,-0.2578125,0.15429688,0.051513672,-0.55078125,0.119628906,-0.44726562,0.7421875,-0.7109375,0.6328125,0.671875,0.16894531,-1.09375,-0.087890625,0.20703125,0.44140625,-0.3046875,-0.734375,-0.09423828,-0.42578125,-0.79296875,-0.41601562,0.40039062,0.06933594,0.009765625,-1.0546875,0.115722656,-1.2421875,-0.734375,0.890625,0.09277344,0.8125,0.125,1.1875,-0.19824219,0.40234375,0.36328125,0.06933594,-0.515625,0.484375,0.45117188,0.16113281,0.99609375,-0.42578125,0.24804688,1.3671875,-0.4453125,0.16308594,-0.30859375,0.30078125,0.44726562,-0.15136719,-0.036376953,0.068847656,0.33398438,0.19824219,-0.83984375,-0.24511719,-0.35546875,0.54296875,-0.03125,0.35742188,0.9140625,0.4375,0.14355469,-0.48242188,0.52734375,0.20019531,0.31445312,-0.98046875,0.76171875,-0.0022735596,-0.22851562,-0.30859375,0.100097656,-0.30078125,-0.87109375,-0.8125,0.31640625,-0.33789062,-0.53515625,-0.16992188,-0.296875,1.3359375,0.54296875,-0.24902344,-0.095703125,0.71484375,-0.026977539,0.72265625,0.4375,0.66796875,0.21777344,0.18652344,0.009216309,0.11279297,-0.21777344,1.0859375,-0.65625,0.6328125,-0.008911133,-0.58203125,-0.44726562,0.8125,-0.703125,-0.15917969,0.051513672,0.49609375,0.35351562,0.24023438,0.20996094,-0.5859375,0.26171875,0.6484375,-0.57421875,0.024169922,0.41992188,0.55078125,0.037109375,-0.6171875,-0.01977539,0.58203125,0.88671875,0.29296875,-0.1875,0.3984375,0.48046875,-0.045898438,0.515625,1.2890625,0.41992188,0.3203125,-0.13867188,-0.37109375,-0.47460938,-0.26171875,-0.5546875,-0.49023438,-0.6796875,-0.04736328,-0.25976562,-0.94921875,0.1484375,-0.02319336,-0.7578125,0.7578125,0.08105469,0.59765625,-0.29882812,0.84375,0.26953125,0.09472656,-0.66015625,-0.99609375,0.7265625,0.5703125,-0.09716797,0.55078125,-0.11230469,0.46875,0.546875,0.21582031,-0.19140625,-0.29492188,-0.20507812,-0.1328125,0.3671875,0.28320312,0.072265625,0.091796875,-0.07373047,0.4296875,-0.106933594,-0.033447266,0.22851562,0.84765625,0.3203125,-0.007598877,-0.5078125,0.06689453,0.34179688,0.484375,-1.515625,1.0625,0.6171875,0.08935547,-0.6484375,0.55078125,0.3828125,-0.22363281,0.765625,-0.28320312,0.09765625,0.76953125,-0.45703125,0.05493164,0.42578125,0.18066406,0.12695312,-0.038085938,0.44335938,-0.15722656,0.24414062,0.032958984,0.6484375,0.31640625,0.35351562,0.048339844,0.20410156,-0.38085938,-0.51953125,-0.21191406,-0.48046875,-0.7890625,-0.21972656,0.09375,0.42773438,0.51171875,-0.30859375,1.5546875,-0.17382812,-0.7109375,0.061767578,0.0036621094,0.15234375,-0.076660156,-0.16894531,-0.33007812,-0.05908203,0.6796875,0.13769531,-0.37304688,-0.21972656,-0.014343262,-0.47070312,-0.6484375,1.21875,-0.12451172,-0.046875,0.107910156,-0.37109375,0.057861328,0.51171875,0.640625,0.14648438,0.37695312,0.16601562,-0.24707031,1.4296875,-0.57421875,-0.39257812,-0.4921875,0.45507812,-0.12792969,-0.09033203,-0.31054688,0.10253906,-0.42773438,0.14160156,-0.11376953,-0.73828125,0.5546875,-0.16796875,0.36132812,0.24609375,-0.8359375,-0.6484375,0.2578125,0.1328125,-0.21191406,-0.23046875,0.33203125,0.23632812,0.59375,0.26367188,-0.08984375,1.0078125,-0.060791016,0.58203125,-0.6015625,0.44921875,0.2109375,-0.08300781,1.2578125,0.4765625,0.072753906,-0.03930664,0.24121094,0.41992188,0.6875,0.46679688,0.41210938,0.08984375,0.59375,0.03173828,-0.6875,-0.08642578,0.69140625,-0.59765625,-0.10888672,0.19238281,0.053222656,0.118652344,-0.13085938,-0.15917969,-0.055419922,-0.23828125,0.25195312,0.057861328,-0.19238281,0.23925781,0.75390625,-0.05810547,0.828125,0.87890625,-0.65234375,-0.55859375,-1.0859375,-0.1328125,-0.00793457,0.013916016,0.19042969,-0.10107422,0.34765625,-0.12695312,-0.14941406,0.375,-0.5078125,-0.22167969,0.4609375,-0.18066406,-0.18359375,-0.51171875,0.40234375,0.6015625,0.29296875,0.453125,0.115722656,-1.265625,4.386902E-5,0.59375,-0.44335938,0.26367188,-0.34960938,-0.8359375,-0.33203125,-0.039794922,0.58203125,0.3203125,0.39648438,-0.43554688,0.0013046265,-0.07373047,-0.7578125,0.31640625,-0.22070312,-0.004272461,-0.60546875,-0.7890625,-0.07861328,-0.69140625,-0.32421875,-0.2734375,0.38476562,-1.25,0.010620117,-1.1953125,0.15234375,-0.66015625,0.265625,-0.08496094,0.33007812,-0.23828125,0.060546875,-0.039794922,-0.17773438,0.8359375,0.34765625,-0.73046875,0.37890625,-0.23632812,-0.45703125,-0.015136719,-0.73828125,-0.076660156,0.11230469,0.45117188,-0.8125,-0.27148438,0.22851562,-0.10498047,-0.03930664,0.59375,-0.62109375,-0.6796875,0.74609375,0.50390625,-0.76171875,-0.70703125,-0.29882812,0.049072266,-0.060546875,-0.43554688,-0.578125,0.20410156,0.1640625,0.040039062,-0.62109375,-0.10839844,-0.19824219,-0.30859375,0.30859375,0.0042419434,0.24511719,0.19238281,0.5859375,0.6328125,0.3359375,-0.88671875,-0.5546875,0.40234375,-0.022460938,0.16113281,-0.04272461,0.81640625,0.98828125,-0.27734375,-0.19921875,-0.55078125,-0.05053711,0.01171875,-0.69921875,-0.15917969,0.43164062,0.22167969,-0.43945312,-0.44921875,0.21484375,-0.5703125,-0.24707031,0.17578125,0.008483887,0.14453125,-0.36328125,-0.118652344,0.28710938,0.0010604858,0.4453125,0.24609375,-0.83203125,-0.33007812,-0.12402344,-0.30273438,-0.51953125,-0.18066406,-0.17871094,-0.5,-0.34375,0.072753906,0.25390625,0.37304688,-0.12109375,0.35546875,-0.4140625,-0.16308594,-0.23828125,0.52734375,-0.3984375,-0.17578125,-0.17871094,-0.20117188,0.33984375,-0.71875,-0.35546875,-0.14648438,0.020996094,0.11621094,0.90234375,0.21386719,-0.31054688,-0.49023438,-0.61328125,-0.12402344,-0.07421875,0.12207031,0.04321289,-0.3515625,0.06201172,-0.07763672,0.21875,-0.7578125,0.55859375,-0.12597656,0.73046875,0.44335938,1.328125,-0.3515625,-0.0035858154,-0.34765625,0.42382812,-0.17773438,-0.90625,0.83203125,0.16992188,0.1328125,0.64453125,-0.09277344,-0.06640625,-0.21875,0.48046875,0.3359375,0.36328125,1.0703125,0.97265625,-0.14550781,-1.0234375,-0.38671875,0.16308594,0.3828125,-0.47070312,-0.84375,-0.28125,-0.50390625,0.23828125,1.0390625,0.14746094,-0.34570312,0.29882812,-0.37109375,-0.01977539,-0.65234375,0.4453125,0.21875,0.24121094,0.4609375,0.44726562,-0.40039062,-1.28125,-0.81640625,0.546875,1.1640625,-0.22753906,-0.296875,1.1484375,-0.640625,0.1640625,0.5,-0.29101562,0.03930664,-0.32421875,0.12695312,-0.49414062,0.052734375,-0.3671875,0.2890625,-0.08105469,0.640625,-0.546875,0.11816406,1.109375,0.23730469,-0.12890625,-1.4375,-0.3515625,0.80078125,-0.25390625,-0.079589844,-1.0,0.62890625,-0.39453125,-0.72265625,0.34765625,-0.875,-0.46875,0.48242188,0.32617188,-0.060302734,-0.41210938,-0.18457031,0.09472656,-0.8515625,0.83984375,0.3671875,-0.072265625,0.875,0.55859375,-0.33203125,-0.25,0.35742188,0.31445312,0.04248047,0.8125,0.33203125,-0.072753906,-0.18554688,-0.59765625,0.07128906,-0.27148438,-0.25976562,0.08105469,1.0625,-1.421875,-0.09423828,-0.46484375,-0.29492188,-0.65234375,1.3046875,0.3359375,-0.091796875,-0.17578125,0.26171875,0.546875,-0.061279297,-0.15234375,0.65234375,0.2578125,-1.375,0.609375,-0.38867188,-0.265625,-0.859375,0.19628906,-0.3984375,0.41015625,0.1484375,0.0115356445,-0.44726562,0.5078125,0.54296875,-0.30078125,0.2734375,0.12695312,0.2109375,0.984375,0.060546875,-0.36132812,-1.53125,0.625,-0.71875,-0.41210938,0.99609375,0.061523438,-0.19042969,-0.14648438,-0.3515625,0.051757812,0.2578125,-0.70703125,-0.022705078,-0.035888672,0.3359375,-0.08984375,-0.4609375,-0.038330078,-1.5078125,0.3125,0.3828125,0.42578125,0.22070312,1.0859375,0.703125,0.38085938,0.84765625,-0.40820312,-0.26757812,0.50390625,-0.53125,-0.12158203,-0.32226562,0.234375,-0.68359375,-0.29882812,0.11230469,0.3203125,-0.29882812,0.5,-0.609375,-0.3671875,-0.052001953,0.30859375,-0.24316406,0.5234375,-0.41601562,-0.17578125,0.734375,0.26367188,0.30078125,0.084472656,0.9140625,-0.98828125,-0.70703125,-0.044189453,-0.25976562,-0.7265625,0.9140625,0.017211914,0.6171875,-0.6171875,-0.73828125,0.12451172,-0.13769531,0.30273438,-0.62890625,-0.921875,0.16308594,0.07470703,-0.5,0.59375,0.16015625,0.31445312,-0.11279297,1.875,-0.4140625,0.7421875,0.17773438,0.2421875,-0.23828125,0.421875,0.2265625,-0.84765625,0.2421875,0.005706787,-0.18847656,0.21679688,0.39453125,0.39257812,-0.703125,-0.55078125,-0.74609375,-0.13769531,0.055419922,0.20214844,-0.026367188,-0.59765625,1.4140625,-0.32421875,-0.14550781,0.026977539,0.31054688,-0.0703125,1.4140625,0.46679688,0.65625,-0.17089844,-0.07373047,0.41210938,0.028076172,0.3671875,0.041992188,-0.3515625,-0.1640625,-0.8203125,-0.029785156,-0.03515625,-0.140625,-0.12207031,-0.43945312,0.44140625,-0.0013046265,-0.24804688,-0.041748047,0.13378906,0.81640625,-0.14550781,-0.12792969,-0.15527344,0.6015625,-0.17578125,0.66015625,-0.3984375,0.5234375,-0.21679688,0.14648438,0.2890625,-1.0546875,0.09814453,-0.016967773,-0.013000488,-0.20800781,0.82421875,0.3359375,-0.6953125,0.30273438,0.10205078,-0.828125,-0.29882812,0.42773438,-0.55859375,-1.5625,-0.46289062,-0.25585938,0.68359375,0.66796875,0.27539062,-0.7421875,-0.140625,0.055419922,0.012023926,-0.11328125,0.3671875,-0.37890625,0.75390625,-0.60546875,0.734375,0.041503906,0.83984375,-0.640625,-0.671875,-0.27929688,-0.24316406,0.4921875,-0.6640625,-0.16210938,-0.29296875,0.4140625,-0.29101562,0.40429688,0.296875,0.875,0.43359375,-0.13964844,0.28515625,-0.359375,0.20996094,-0.23144531,-0.54296875,-0.083984375,-0.28125,0.01574707,-0.18945312,-0.65625,0.05810547,-0.10205078,-0.4921875,0.94921875,-0.78515625,0.122558594,-0.14550781,-0.39453125,0.046875,0.16796875,0.71875,0.66796875,-0.53515625,0.0033416748,0.45117188,0.004211426,0.09667969,0.12792969,-0.7578125,0.28125,-0.94921875,0.36914062,-0.049316406,-0.07080078,-0.40820312,0.38671875,0.5859375,0.609375,-0.84765625,0.90625,-0.051513672,0.734375,-0.5859375,0.71484375,1.015625,-0.061523438,0.005432129,-0.28710938,0.3671875,-0.46484375,0.59375,0.87109375,0.59375,0.67578125,-0.6171875,-0.11621094,0.061035156,-0.26367188,0.625,-0.22363281,-0.36523438,0.41601562,-0.030151367,-0.59375,-0.27929688,-0.92578125,-0.28125,0.20703125,-0.2890625,0.4296875,1.359375,0.7890625,-0.49804688,0.20703125,-0.13085938,-0.0043945312,-0.73046875,0.37890625,0.17773438,-0.11816406,-0.37890625,0.017944336,0.76171875,0.8125,0.35742188,-0.19628906,-0.044433594,0.5078125,-0.65234375,0.04663086,-0.99609375,-0.42382812,-0.65625,-0.12988281,1.03125,0.30664062,-0.30078125,-1.71875,0.35546875,-0.01940918,0.09814453,0.59765625,-0.21386719,-0.87109375,1.6171875,-0.49023438,-0.10107422,-0.12597656,0.18554688,-0.42382812,0.69140625,-0.64453125,0.114746094],"inputTextTokenCount":3}' + headers: + Connection: + - keep-alive + Content-Length: + - '17006' + Content-Type: + - application/json + Date: + - Tue, 23 Apr 2024 20:57:03 GMT + X-Amzn-Bedrock-Input-Token-Count: + - '3' + X-Amzn-Bedrock-Invocation-Latency: + - '311' + x-amzn-RequestId: + - 1fd884e0-c9e8-44fa-b736-d31e2f607d54 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/botocore/bedrock_cassettes/cohere_embedding.yaml b/tests/contrib/botocore/bedrock_cassettes/cohere_embedding.yaml new file mode 100644 index 00000000000..3c7ca4b192b --- /dev/null +++ b/tests/contrib/botocore/bedrock_cassettes/cohere_embedding.yaml @@ -0,0 +1,45 @@ +interactions: +- request: + body: '{"texts": ["Hello World!", "Goodbye cruel world!"], "input_type": "search_document"}' + headers: + Content-Length: + - '84' + User-Agent: + - !!binary | + Qm90bzMvMS4zNC40OSBtZC9Cb3RvY29yZSMxLjM0LjQ5IHVhLzIuMCBvcy9tYWNvcyMyMy40LjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEwLjUgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl + dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzQuNDk= + X-Amz-Date: + - !!binary | + MjAyNDA0MjNUMjEwMDExWg== + amz-sdk-invocation-id: + - !!binary | + MGIwYzc5OTUtOWE5MC00MmM2LWIxODYtNGViYTJkNmQ2MWFm + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.embed-english-v3/invoke + response: + body: + string: '{"embeddings":[[-0.041503906,-0.026153564,-0.070373535,-0.057525635,-0.026245117,-0.019714355,-0.029449463,0.060333252,-0.012016296,0.031219482,-0.03036499,-0.0023498535,0.026153564,-0.036193848,0.050231934,-0.0152282715,0.050445557,-0.002483368,0.014816284,-0.0055618286,-0.016464233,0.03363037,-0.0049209595,0.01776123,0.04031372,-0.018188477,-0.0035190582,-0.017364502,-0.035064697,-0.050842285,0.008964539,0.05026245,0.021530151,0.026947021,-0.0010185242,0.06341553,-0.015213013,0.022537231,0.020935059,-0.015823364,0.023162842,-0.01763916,0.018157959,-0.01637268,-0.038330078,0.010925293,-0.00762558,-0.004917145,0.0049591064,-0.048583984,0.017349243,-0.022094727,0.023269653,0.009536743,-0.006526947,0.012512207,-0.04284668,-0.022705078,0.032348633,0.015808105,-0.006450653,-0.0058784485,0.015792847,8.506775E-4,0.027633667,-0.030853271,-0.007522583,0.007587433,0.024002075,0.029251099,9.994507E-4,0.0010881424,-0.018234253,-3.2234192E-4,0.02130127,0.0051574707,-0.008659363,0.029281616,0.016479492,0.026229858,-0.01486969,0.009094238,0.0032100677,-0.06359863,0.031402588,-0.016723633,-0.0017356873,0.009475708,-0.0035057068,-0.08514404,-0.013404846,0.057525635,-0.035003662,0.0022697449,-0.047698975,-0.031585693,0.0098724365,0.04864502,-0.010551453,0.0234375,-0.060943604,-0.002948761,-0.014450073,-0.03857422,0.008773804,-0.050231934,0.007457733,-0.0051956177,0.04171753,0.045318604,-0.057403564,0.07757568,0.01473999,0.021224976,-0.005619049,-0.009849548,-9.765625E-4,0.0029563904,-0.024383545,0.02015686,0.013069153,0.02268982,-0.0025596619,0.017486572,-0.004875183,0.016708374,-0.028945923,-0.016921997,0.06939697,-0.054870605,-0.002248764,0.048034668,-0.09857178,-0.06604004,-0.038635254,0.037719727,0.027420044,-0.049316406,-0.0031814575,-0.010917664,0.037384033,-0.0021858215,8.3065033E-4,0.012664795,-0.034484863,0.038360596,0.018753052,-0.004699707,-0.042236328,-0.045013428,-0.024154663,-0.046142578,-0.021118164,0.03189087,0.043823242,0.021652222,0.004501343,0.008804321,0.059448242,0.05593872,-0.009033203,0.004043579,0.0496521,-0.058929443,-0.028503418,-0.040405273,-0.037597656,0.012016296,-0.00579834,0.027130127,-0.029067993,0.007537842,-0.02279663,-0.011871338,-2.1362305E-4,0.0073127747,0.010383606,-0.011375427,-0.018081665,0.07141113,0.07513428,-0.0018005371,-0.08093262,0.08026123,-0.032592773,-0.01386261,0.0050735474,-0.011375427,0.037841797,0.0065727234,0.019454956,0.0056152344,-0.061706543,0.007293701,-0.015930176,-0.0032348633,0.019363403,0.0068130493,-5.0354004E-4,0.015563965,-0.0129776,0.022033691,0.016967773,-0.0075645447,0.055541992,-0.0033626556,0.015853882,0.012435913,0.042510986,0.009346008,-0.022140503,-0.025527954,0.06121826,-0.13354492,-0.06323242,-0.013664246,-0.044189453,0.01272583,-0.010894775,0.021469116,0.032318115,-0.018997192,0.01184082,0.008255005,0.013465881,0.032196045,-0.037139893,0.009536743,-0.044799805,0.02670288,-0.031951904,0.01675415,-0.05947876,-0.033966064,-0.06222534,-0.040740967,-0.07141113,0.0025959015,0.022979736,0.046569824,0.016799927,-0.019744873,-0.010566711,0.0047683716,0.0335083,0.028427124,0.025131226,0.015792847,-0.00390625,0.036224365,0.0061302185,0.009002686,0.022232056,-0.0063438416,-0.012245178,0.068481445,-0.012573242,0.0043678284,-0.008300781,0.009353638,-0.00541687,0.031433105,0.014595032,0.015434265,-0.036621094,-0.012878418,-0.034210205,-0.003583908,-0.018997192,0.016815186,0.027069092,-0.0491333,-0.016494751,-0.021621704,0.05960083,-0.015655518,-0.067993164,0.019622803,0.009819031,-0.0119018555,-0.0050697327,-0.019897461,0.018951416,0.009895325,0.018875122,0.02218628,0.04348755,0.026657104,0.0044441223,0.03387451,0.06536865,0.02784729,0.016342163,0.037902832,0.0010757446,0.01020813,0.042755127,0.05908203,-0.019073486,0.010620117,-0.056121826,-0.011314392,0.008270264,-0.024139404,-0.0034065247,-0.024673462,0.025421143,0.0115356445,-6.2942505E-4,0.021362305,-0.028305054,0.0440979,0.0099487305,0.035247803,-0.048706055,-0.05999756,-0.024368286,-0.019622803,-0.11859131,-0.065979004,-0.0309906,-0.0073547363,-0.004371643,-0.018005371,0.0030021667,-0.0044517517,-0.039794922,0.014312744,-0.01689148,-0.018035889,0.0051193237,-0.013595581,-0.027404785,0.00705719,0.018661499,-0.008934021,0.02520752,0.010749817,-0.0038280487,-0.006034851,0.040649414,-0.027175903,0.003419876,0.021362305,0.021713257,-0.040283203,0.006652832,-0.013458252,0.03189087,0.011428833,0.001083374,-0.024414062,0.017150879,-0.021194458,0.03591919,-0.06011963,-0.043670654,-0.038085938,-0.021865845,-0.0635376,0.05203247,0.021896362,0.017303467,-0.040405273,-0.060699463,-0.05432129,0.030578613,-0.0039138794,-0.059814453,-0.010299683,-0.029174805,-0.04147339,0.040893555,-0.026000977,-0.062438965,0.024993896,-0.003967285,-0.039031982,-0.021026611,-0.010467529,0.024658203,0.035461426,-0.01676941,0.018493652,0.0077171326,0.026565552,0.001209259,0.012168884,0.03778076,0.011230469,0.002632141,0.030273438,-0.04119873,0.0015602112,0.04272461,0.004989624,-0.053588867,0.03366089,-0.11859131,0.023147583,-0.024993896,-0.01600647,-0.02017212,0.0025901794,-0.07891846,-0.009033203,-0.056243896,-0.024612427,0.02368164,0.022979736,0.031433105,-0.002412796,0.039001465,-0.010360718,0.012123108,-0.021972656,-0.087890625,0.016479492,-0.012496948,0.017303467,-0.024017334,0.02381897,-0.054107666,-0.004699707,-0.021087646,0.045166016,0.0033435822,0.025024414,0.0132369995,-0.030975342,0.011276245,-0.039611816,-0.018295288,-0.03652954,-0.017196655,0.01676941,-0.023040771,0.028411865,-0.007724762,-0.016998291,0.016647339,0.014465332,0.020843506,0.019088745,-0.010063171,-0.004512787,0.022521973,0.03378296,-0.053863525,0.007675171,-0.02532959,0.026260376,-0.02947998,0.025756836,-0.016616821,-0.009803772,0.01727295,0.04827881,0.003780365,-0.010955811,0.0055274963,-0.026367188,-0.042541504,0.0072784424,-0.028869629,-0.0054779053,0.07788086,-0.020355225,-0.03677368,0.01449585,-0.03250122,0.018081665,0.022628784,0.018157959,-0.015434265,0.002368927,-0.025024414,0.008857727,-0.030059814,-1.9836426E-4,-0.022064209,-0.012069702,-0.033416748,-0.04763794,-0.016052246,-0.0027198792,-0.05557251,-0.00970459,0.006515503,0.028823853,0.021026611,-0.026611328,0.031555176,-0.0061798096,-0.031585693,-0.0053863525,-0.03161621,0.032287598,0.0149002075,-0.060150146,-0.015594482,-0.016799927,0.005432129,-0.013442993,0.024856567,-0.018920898,-0.010696411,0.023834229,0.020568848,0.055023193,0.004219055,0.002937317,0.013938904,0.026153564,-0.019546509,0.03955078,-0.0061187744,-0.007659912,0.0039749146,0.0011529922,0.01864624,0.032440186,-0.006843567,0.037841797,0.03111267,-0.006515503,-0.041625977,0.01751709,-0.0115356445,-0.08465576,0.033294678,0.006000519,-0.006477356,0.024169922,-0.016906738,0.085998535,-0.012420654,-0.0035476685,0.009208679,-0.008361816,0.012489319,0.014389038,-0.031280518,-0.02760315,0.022705078,0.014923096,0.029663086,0.090026855,-0.032684326,0.08679199,-2.670288E-4,-0.012367249,0.031311035,0.022583008,-0.017028809,-0.05883789,-0.042266846,0.006515503,-0.018676758,0.020126343,-0.009780884,0.038513184,-0.055236816,0.023651123,-0.031036377,0.016189575,0.006752014,-0.016311646,-0.009292603,-0.017150879,-0.008041382,-0.03225708,-0.045532227,0.0012378693,-0.026062012,0.043945312,-0.038391113,0.0027770996,0.018676758,0.087768555,-0.0026512146,0.024551392,-0.03677368,0.048034668,0.009979248,0.045684814,0.0017814636,0.024734497,-8.9645386E-4,0.02748108,-0.021606445,-0.020217896,-0.008232117,0.0037136078,0.027496338,0.008773804,0.022094727,-0.048614502,0.019058228,0.0023002625,-0.04171753,-0.018066406,0.011566162,-0.034057617,0.058013916,-0.012496948,-0.025360107,-0.022750854,0.023345947,0.007041931,0.028762817,-0.011993408,-0.015838623,0.05618286,-0.009490967,-0.054473877,-0.004760742,0.01953125,0.010566711,-0.013214111,0.02558899,-0.06555176,0.028442383,-6.1416626E-4,0.0019302368,-0.032043457,-3.0517578E-4,0.028930664,-0.0037651062,-0.019561768,0.028549194,-0.019195557,-0.016418457,0.0062332153,0.025909424,0.026733398,-0.02798462,0.012001038,-0.05291748,-0.023284912,0.00422287,0.013687134,0.041870117,-0.025726318,-0.020370483,-0.044006348,-3.9196014E-4,0.00944519,-0.023834229,-0.015098572,-0.023223877,0.008781433,0.0076789856,0.020141602,-0.014175415,0.016662598,-0.005973816,0.033813477,-0.013748169,-0.033111572,-0.016845703,0.0051345825,-0.010635376,-0.02268982,0.019210815,0.0124053955,0.015052795,0.034118652,-0.01600647,0.040374756,-0.007949829,-0.0030612946,0.021774292,-0.007896423,-0.03164673,-0.01576233,0.043884277,-0.017059326,0.0039596558,0.007537842,-0.019470215,0.023986816,0.011787415,0.020629883,-0.045074463,0.022628784,0.01914978,0.011131287,-0.016403198,-0.039276123,-0.07208252,0.023010254,-0.03894043,0.010787964,0.04019165,0.0017576218,-0.043823242,-0.034454346,0.030059814,0.02558899,1.5258789E-5,-0.03302002,0.03741455,-0.040374756,0.014251709,0.0046806335,-0.011749268,0.0289917,-0.025726318,0.006515503,-2.9087067E-4,0.043670654,-0.05911255,-0.03189087,0.038726807,-0.027862549,-0.06311035,-0.007610321,-0.02104187,-0.02180481,-0.029296875,-0.068725586,-0.016113281,-0.012924194,0.017684937,-0.020828247,-0.026885986,-0.0058670044,0.008880615,0.0056419373,0.016693115,0.0473938,-0.011367798,-0.0010662079,-0.0013999939,0.02822876,0.014808655,0.010635376,0.006538391,0.0030784607,-0.05682373,0.0035820007,0.019012451,0.02571106,0.021362305,0.04168701,0.02029419,0.040039062,-0.017074585,0.0127334595,0.019332886,0.006351471,0.05267334,0.0029335022,0.014518738,-0.040405273,-0.038635254,0.034179688,0.07299805,-0.027801514,-0.050476074,-0.030014038,-0.00617218,0.06488037,0.0038414001,0.064208984,0.034210205,0.02494812,-0.012954712,0.026641846,0.0597229,0.01146698,0.0014743805,-0.027877808,-0.04699707,0.037597656,0.014572144,-0.012710571,0.018417358,0.02508545,6.599426E-4,0.003255844,-0.043884277,-0.021469116,-0.0284729,-0.037109375,0.044311523,0.043640137,0.018676758,0.1005249,-0.022979736,0.02911377,-0.0015258789,0.05899048,0.042175293,0.016601562,0.012954712,-0.0038909912,0.017425537,-0.03274536,-0.019714355,0.011199951,-0.014831543,0.0069389343,-0.006549835,-0.07409668,0.027420044,0.0491333,-0.0038833618,0.023590088,-9.317398E-4,-0.027160645,8.049011E-4,0.015716553,0.008773804,-0.003025055,-0.00642395,-0.0012283325,0.010566711,-0.05407715,-0.011138916,0.03326416,0.03125,-0.051696777,-0.016860962,0.028656006,0.017044067,-0.021911621,-0.012763977,-0.01890564,0.039794922,-0.013145447,-4.6682358E-4,0.020568848,-0.011108398,0.0021705627,0.03765869,-0.039855957,0.049591064,0.0110321045,-0.005542755,-0.0113220215,0.0050315857,-0.003232956,-0.079589844,0.018722534,0.034423828,-2.7942657E-4,-0.013671875,0.05960083,0.05230713,0.057281494,0.029251099,0.019073486,-0.007331848,0.018981934,0.0074005127,-0.030715942,-0.04446411,-0.013702393,-0.02027893,-2.0217896E-4,0.017913818,0.02960205,0.006713867,0.0044059753,-0.041015625,-0.011566162,-0.0054397583,-0.034362793,0.0073280334,0.02130127,0.012771606,-0.06100464,-0.03945923,-0.014793396,-0.009017944,-0.017608643,-0.037139893,0.058624268,0.0135650635,0.015274048,-0.013259888,-0.041229248,-0.02255249,0.030029297,-0.028579712,0.036224365,0.011756897,0.0043754578,0.029129028,-0.040374756,-0.021484375,0.014190674,-0.077819824,0.002494812,-0.017791748,0.019805908,-0.02432251,0.046691895,-0.041290283,-0.028915405,-0.0020446777,0.009262085,-0.032440186,-0.00554657,0.014709473,0.012992859,-0.024871826,-0.048858643,0.026321411,0.005897522,0.024353027,0.064697266,-0.048950195,0.017547607,-0.009010315,0.014549255,0.040802002,-0.025970459,-0.023788452,0.004211426,-0.02810669,-0.030014038,-0.011566162,0.025314331,-0.05480957,-0.02720642,-0.006198883,-0.01209259,-0.019378662,-0.047668457,-0.09552002,-0.014328003,-0.014564514,-0.046417236,-0.005859375,-0.006511688,-0.014915466,-0.008666992,-0.016555786,-0.016479492,0.0070610046,0.04147339,-0.04446411,0.030334473,0.01423645,-0.01802063,-0.019104004,0.0045928955,0.0038833618,-0.013938904,-0.0061950684,-0.0023040771,0.012863159,0.042114258,-0.052612305,-0.03289795,-0.019195557,0.029571533,-2.5558472E-4,0.028396606,0.057556152,0.03375244,-0.06903076,-0.008117676,-0.04675293,-0.00806427,0.026321411,-0.004749298,-0.030944824,-0.006416321,-6.5612793E-4,-0.02166748,0.009925842,-0.0069274902,0.034576416,-0.010894775,-0.011184692,0.03353882,-0.0657959,0.03781128,-0.012008667,0.020965576,-0.024719238,0.067871094,-0.033721924,0.012908936,0.005168915,0.018966675,0.0158844,0.0044174194,-0.030136108,-0.022460938,0.064331055,0.028320312,0.01259613,0.004337311,0.047424316,0.0025100708,-0.053009033,0.024597168,0.05508423,0.028564453,-0.042633057,-0.0047836304,0.049438477,0.0046958923,0.006164551,-0.060394287,-0.039398193,0.055236816,-0.050323486,-0.028961182,-0.02078247,-0.044555664,-0.008033752,0.0053710938,-0.020370483,-0.061553955,0.016067505,0.054779053,-0.012863159,0.021575928],[0.007972717,0.0024280548,-0.023376465,-0.036071777,1.9264221E-4,0.014801025,0.0071029663,-0.004360199,-0.018234253,0.023132324,-0.042877197,-0.013389587,0.045318604,-0.03543091,0.042907715,0.0048332214,0.025680542,0.026672363,0.0035133362,-0.0020771027,-0.021209717,0.008041382,-0.030914307,-0.04434204,0.004173279,0.021499634,-0.04208374,0.006576538,-0.073913574,-0.08654785,-0.013748169,0.092285156,0.01713562,0.020599365,0.010017395,0.011482239,-0.036590576,0.029083252,0.016189575,-0.020095825,0.021408081,-0.0087890625,-0.06109619,-0.024230957,-0.08343506,-0.012451172,0.022827148,0.026016235,0.0073013306,-0.028549194,0.040283203,0.034576416,0.051727295,0.016906738,-0.020889282,0.0022678375,-0.012893677,0.005531311,0.033996582,0.0022392273,-0.018875122,-0.013549805,0.024108887,-0.032440186,0.0031280518,0.013534546,-0.007888794,0.013366699,0.058166504,0.01725769,-0.04083252,-0.0011711121,0.013961792,-0.013442993,0.009841919,0.064086914,-0.026931763,0.051086426,0.040740967,-0.006477356,-0.013435364,6.008148E-4,-0.008781433,-0.009712219,-0.011795044,-0.010375977,-0.006969452,0.0029678345,-0.012237549,-0.089416504,-0.015083313,0.06713867,-0.037017822,-0.019180298,-0.056549072,-0.03930664,0.024093628,0.043273926,-0.061157227,-0.031799316,-0.04284668,0.009384155,-0.047912598,-0.01083374,-0.023757935,0.002407074,-0.010772705,-0.017974854,0.011367798,0.05911255,-0.0184021,-0.023208618,-0.010986328,0.03668213,0.023208618,-0.018875122,-0.05630493,0.03845215,-0.047332764,0.04147339,0.00995636,0.019439697,-0.05505371,0.055358887,0.026550293,0.05307007,-0.0039138794,0.026153564,0.08666992,0.012229919,-0.008392334,0.018981934,-0.11810303,-0.089538574,-0.052490234,0.016082764,0.045654297,-0.039855957,-0.010025024,-0.0012817383,0.037322998,-0.002067566,-0.029464722,-0.014595032,-0.0017738342,-0.009475708,0.043182373,0.014801025,-0.014251709,0.009094238,-0.04940796,0.027893066,0.023086548,0.034423828,0.01461792,0.027130127,0.01033783,-0.013534546,0.013656616,0.039520264,-0.01096344,0.011108398,0.06921387,-0.026016235,-0.0030212402,-0.011543274,-0.020599365,0.005302429,0.0023651123,0.04220581,-0.06793213,-0.02532959,-0.010414124,0.012817383,-0.007850647,0.04498291,0.015396118,-0.017288208,0.006729126,0.020706177,0.030914307,-0.012954712,-0.017532349,0.047180176,-0.021606445,-0.021575928,-0.0060043335,-0.03277588,0.045318604,-0.014854431,-0.024551392,0.03704834,0.0087890625,0.035461426,-0.024475098,-0.05505371,-0.009529114,0.0014896393,-0.021194458,-0.026733398,0.011680603,0.001285553,0.016189575,-0.03942871,-0.0076446533,-0.012550354,-4.4250488E-4,0.036956787,0.04034424,0.047607422,-0.0044670105,-0.02168274,-0.00894165,0.017456055,0.0041160583,-0.01399231,-0.017654419,-0.014175415,0.009124756,-0.0069351196,0.06341553,-0.021240234,0.026138306,-0.01828003,0.041656494,-0.019226074,-0.009681702,-0.044403076,0.036834717,-0.011131287,0.01234436,0.03427124,-0.042663574,-0.035949707,-0.051086426,-0.00504303,-0.020950317,0.04232788,0.007270813,0.011054993,0.0015964508,-0.011894226,-0.054473877,-0.0569458,-0.008010864,-0.022842407,0.010177612,0.0026245117,0.0390625,0.018478394,0.008834839,-0.025054932,0.03857422,0.020507812,0.029785156,0.061828613,-0.0026779175,-0.0012540817,0.0345459,-0.024261475,0.005680084,0.034820557,-0.0026245117,0.014022827,-0.026641846,-0.028533936,-0.028656006,0.016448975,-0.0034885406,-0.008125305,0.028930664,-0.032958984,-0.02003479,0.009506226,0.036102295,-0.0121536255,-0.049987793,0.0025253296,0.0019054413,-0.0066566467,0.014137268,-0.0054473877,0.04724121,0.020126343,0.018295288,0.03466797,0.048614502,0.040527344,-0.004722595,-0.012260437,0.028564453,0.074523926,0.024230957,0.02923584,0.07922363,0.03677368,0.023513794,0.045166016,-0.008644104,0.014854431,-0.035186768,0.009254456,0.008491516,-0.027999878,-0.0016345978,0.078308105,0.05126953,-0.013305664,0.02217102,0.054870605,-0.038726807,-0.0019245148,0.023071289,0.007724762,0.0057525635,-0.04473877,0.020996094,0.027786255,0.013893127,0.034942627,0.037353516,8.029938E-4,-0.049926758,-0.006713867,-0.003396988,-0.034362793,-0.008590698,-0.023620605,0.0023536682,0.0060691833,0.05783081,0.05517578,0.012481689,0.011428833,0.028656006,-0.011505127,0.018600464,0.015838623,0.022521973,-0.007949829,0.033996582,-0.03086853,-0.023452759,0.018722534,0.034057617,-0.061828613,0.010726929,-0.0018997192,0.07434082,0.033843994,-0.009117126,-0.02470398,-0.01927185,0.020629883,0.03692627,0.009597778,0.0023822784,-0.02330017,-0.0017147064,-0.0680542,-0.035583496,0.016708374,-0.03112793,0.046813965,0.011497498,-0.05529785,0.021759033,0.0368042,-0.024169922,0.050811768,-0.013748169,0.014923096,-0.030960083,-0.019454956,-0.09020996,-0.005874634,-0.007095337,-0.066833496,0.008651733,-0.011703491,0.012435913,0.026931763,0.007865906,0.048858643,0.03152466,-0.03173828,-0.06021118,0.028167725,0.011543274,0.04925537,-0.01600647,-0.013244629,0.03173828,-0.0284729,-0.02508545,-0.026519775,0.039398193,0.037017822,0.0059890747,-0.0020275116,-0.022521973,0.037109375,-0.03074646,0.047698975,-0.07800293,0.0068626404,-0.04623413,-0.04949951,0.016906738,0.022033691,0.003753662,0.057556152,0.019714355,0.008262634,-0.020553589,-0.023864746,-0.029388428,0.026611328,-0.041259766,-0.016082764,-0.0013427734,0.010002136,-0.023834229,-0.022735596,-0.012268066,0.016159058,-0.0038642883,0.021331787,-0.011642456,0.02168274,-0.0029525757,-0.015327454,-0.020736694,-0.039886475,-0.026275635,-0.024337769,-0.01689148,0.04156494,0.03012085,-0.018127441,-0.05038452,0.006095886,-0.018676758,0.020996094,-0.03744507,-0.008956909,0.0032100677,-0.03253174,-0.036071777,-9.975433E-4,-0.06378174,0.030380249,-0.037200928,0.021743774,-0.011383057,-0.052886963,9.2697144E-4,0.030670166,0.0012054443,0.05090332,0.06149292,0.002105713,-0.029953003,0.011146545,-0.033477783,-0.053955078,0.03414917,-0.034851074,-0.014160156,-0.021148682,-0.081604004,0.09564209,0.021209717,0.023651123,-0.017868042,-0.00970459,-5.41687E-4,0.04840088,-0.031921387,-0.0087890625,-0.026672363,0.010871887,0.042236328,-0.022125244,-0.02558899,0.016677856,-0.016403198,0.03643799,0.010864258,-0.06719971,-0.034332275,-0.047790527,0.026855469,-0.06137085,-0.022064209,-0.014144897,8.239746E-4,-0.028900146,-0.004585266,-0.030685425,-0.07611084,9.288788E-4,0.06689453,-0.052490234,0.028945923,-0.01979065,-0.0413208,-0.013008118,0.019638062,0.02859497,-0.022583008,-0.00623703,0.07684326,0.026626587,-0.012588501,-0.04864502,0.04788208,0.01739502,-0.026901245,0.017318726,0.037963867,-0.018356323,0.05996704,0.024429321,-0.04168701,0.023391724,0.007698059,-0.001660347,0.015686035,-0.063964844,0.03765869,-1.9073486E-4,-0.0033950806,0.05557251,-0.021255493,0.056854248,-0.024719238,0.01939392,0.023376465,-0.057617188,-0.005973816,-0.0037002563,-0.03100586,-0.009712219,0.0039711,-0.07922363,-0.024841309,0.01411438,0.006313324,0.026657104,0.01586914,-0.012710571,2.784729E-4,0.0019798279,0.014564514,-0.02418518,-0.022872925,0.0040130615,-0.02281189,-0.014892578,-0.008270264,0.004299164,-0.036315918,0.014770508,0.01424408,-0.0031585693,0.038757324,-0.018463135,0.008255005,-0.027832031,-6.465912E-4,-0.026473999,0.039489746,0.008277893,-0.017913818,-0.01398468,-0.03111267,-0.010543823,0.0076828003,0.01876831,-0.009513855,0.03012085,0.011604309,-0.022521973,0.028060913,-0.016113281,0.0046844482,0.011230469,0.0063476562,-0.0057754517,-0.013763428,-0.030090332,0.017089844,-0.01651001,-0.0063667297,-0.03933716,0.0637207,-0.016921997,-0.0158844,0.002439499,-0.02407837,-0.0015716553,0.024505615,-0.038726807,0.005142212,0.015449524,-0.013900757,-0.03967285,0.018096924,-0.03515625,0.05734253,-0.04901123,-0.03491211,0.09124756,-0.05026245,-0.03451538,0.0635376,0.009437561,-0.04852295,-0.05722046,0.010345459,-0.05090332,0.022003174,0.009017944,0.011566162,-0.030517578,0.0602417,-0.007347107,-0.022735596,0.016921997,-0.011161804,0.014839172,-0.03250122,-0.06149292,-0.002532959,0.041015625,0.010803223,0.0020713806,0.0178833,0.0132751465,-0.007801056,0.019348145,-0.015289307,-0.052490234,0.014862061,-0.028259277,-0.054260254,0.0017242432,-0.028213501,0.0031280518,-0.01234436,0.008598328,-0.058898926,0.04055786,0.042816162,0.0061836243,-0.026123047,0.0552063,0.008476257,0.011627197,0.011108398,0.048065186,-0.01725769,-0.006969452,0.030639648,0.004463196,0.056274414,0.024169922,0.010620117,0.03552246,-0.0013484955,9.95636E-4,0.0050811768,0.006210327,0.025482178,-0.05279541,-0.0034561157,-0.0037002563,-0.036102295,-0.01914978,-0.0011482239,0.023284912,0.040374756,-0.034240723,-0.016860962,-0.00995636,0.055999756,0.036621094,-0.016296387,0.0046653748,-0.029693604,0.019180298,-0.049743652,0.012390137,0.028015137,-0.039276123,-0.0256958,0.0385437,0.060760498,0.00390625,-0.0019607544,0.003868103,0.020980835,0.010070801,0.022232056,-0.0042304993,0.028671265,-0.040130615,0.01525116,0.018936157,-0.0074005127,0.010147095,-0.036621094,-0.0059661865,0.027008057,-0.016708374,-0.014480591,0.03237915,0.004550934,-0.04425049,-0.036743164,-0.050689697,0.018554688,-0.025360107,0.03213501,-0.014831543,-0.012130737,-0.005302429,0.016433716,0.015174866,0.055908203,0.05117798,-0.019699097,0.038085938,0.021850586,-0.006465912,0.014511108,-0.015052795,-0.021865845,0.010940552,-0.019500732,0.0027427673,0.0066719055,0.010673523,-0.014343262,0.017669678,-0.011878967,0.009338379,0.012748718,0.002670288,0.0335083,0.0057907104,0.009147644,-0.01424408,0.0033569336,-0.011749268,0.04144287,0.004371643,0.0015029907,-0.040130615,-0.06530762,-0.0262146,-0.02822876,0.028549194,0.02305603,0.006778717,-0.06677246,-0.013961792,0.03753662,0.0101623535,0.012748718,0.017822266,-8.8500977E-4,-0.028793335,0.005077362,0.02053833,0.025619507,-0.047729492,-0.006877899,0.01524353,0.0012569427,0.03338623,-0.06512451,-0.009796143,-0.008056641,0.02192688,0.045410156,-0.024307251,0.032073975,0.03591919,-0.0016384125,-0.018585205,-0.012321472,0.036315918,0.013786316,0.02784729,0.028213501,-8.010864E-5,-0.039031982,-0.0121154785,-9.3460083E-4,-0.028839111,-0.030258179,-0.0034751892,-0.0256958,-0.048858643,-0.038726807,0.038482666,-0.0013504028,-0.0065307617,0.0030670166,-0.025177002,0.0070266724,0.05355835,0.019454956,0.03036499,-0.008895874,-0.019561768,-0.022079468,-0.041168213,-0.01802063,0.04168701,0.024475098,-0.022094727,-0.031951904,0.0024299622,0.02243042,0.00504303,-0.018615723,0.0022087097,0.040374756,-0.0096206665,-0.017303467,-0.013702393,-0.009414673,0.05609131,0.032348633,-0.04107666,0.048736572,0.015945435,-0.010169983,0.018463135,-0.0069084167,0.03225708,-0.11608887,0.049713135,0.06750488,-0.01751709,0.014785767,0.04345703,0.05065918,0.060516357,-0.007091522,0.034362793,0.042022705,0.0871582,-0.003118515,-0.042053223,-0.046813965,-0.0115356445,-0.004142761,0.01838684,0.012565613,-0.027648926,0.04522705,0.0065727234,0.0031471252,-0.012519836,-0.022888184,-0.015838623,0.019607544,-0.0026855469,0.016677856,0.010879517,-0.03302002,0.0011777878,0.021942139,-0.021713257,0.022003174,-0.012924194,-0.006134033,-0.049713135,0.024017334,0.020263672,0.0038452148,-0.07635498,0.028244019,-0.018447876,0.020706177,-0.006877899,-0.014923096,-0.014389038,-0.022216797,-0.008102417,-0.047088623,-0.0115356445,-0.03161621,0.02166748,-0.016647339,-0.010101318,0.002243042,-0.03048706,-0.008117676,-0.013755798,0.02331543,-0.05029297,-0.023971558,0.007507324,-0.055664062,-0.070129395,0.09082031,-0.026443481,-0.043548584,-0.0020980835,-0.008628845,0.02394104,-0.09869385,-0.02519226,-0.062683105,-0.021820068,-0.0134887695,-0.032958984,0.019744873,-6.275177E-4,-0.023071289,-0.006454468,0.012886047,-0.034973145,0.04812622,-0.042541504,-0.027511597,-0.025024414,-5.4979324E-4,-0.0028800964,0.02633667,-0.019241333,0.005504608,0.0045318604,0.011688232,-0.043670654,0.011413574,-0.04638672,-0.02810669,0.006465912,-0.023132324,0.040771484,-0.006450653,0.012374878,0.030761719,0.048034668,0.03817749,-0.006034851,-0.039855957,0.014457703,0.008560181,0.045715332,-0.037841797,-0.0209198,-0.0128479,0.056396484,-0.00856781,0.047943115,0.038330078,-0.0057907104,-0.0284729,-0.037017822,-0.020324707,0.0121536255,-0.037384033,-0.02835083,0.018722534,-8.659363E-4,0.015899658,-0.07098389,0.0069503784,0.026901245,0.018997192,0.00724411,-0.04309082,0.011207581,-0.0048713684,0.043914795,0.0012407303,0.007255554,0.009460449,0.034118652,-0.029403687,0.020263672,-0.015472412,-7.543564E-4,0.027282715,-0.00995636,0.0066108704,-0.0104904175,0.0077667236,0.031951904,0.009170532,0.01802063,-0.0039978027,-0.004688263,-0.037139893,-0.018554688,0.031402588,-0.04385376,-0.036743164,0.032836914,0.02658081,-0.005706787,-0.057678223,0.026901245,-0.023086548,0.018951416,-0.050720215,0.06384277,-0.0031585693,-0.0041236877,5.760193E-4,0.028533936,0.036346436,-0.0057868958,0.0049591064,-0.0024299622,0.0065078735,-0.0051994324]],"id":"0e9cb5ab-1fef-46eb-8e2c-773f0f60f39d","response_type":"embeddings_floats","texts":["hello + world!","goodbye cruel world!"]}' + headers: + Connection: + - keep-alive + Content-Length: + - '25552' + Content-Type: + - application/json + Date: + - Tue, 23 Apr 2024 21:00:11 GMT + X-Amzn-Bedrock-Input-Token-Count: + - '7' + X-Amzn-Bedrock-Invocation-Latency: + - '271' + x-amzn-RequestId: + - 0e9cb5ab-1fef-46eb-8e2c-773f0f60f39d + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/botocore/test_bedrock.py b/tests/contrib/botocore/test_bedrock.py index 6faad4af643..a94699813ed 100644 --- a/tests/contrib/botocore/test_bedrock.py +++ b/tests/contrib/botocore/test_bedrock.py @@ -433,6 +433,24 @@ def test_readlines_error(bedrock_client, request_vcr): response.get("body").readlines() +@pytest.mark.snapshot +def test_amazon_embedding(bedrock_client, request_vcr): + body = json.dumps({"inputText": "Hello World!"}) + model = "amazon.titan-embed-text-v1" + with request_vcr.use_cassette("amazon_embedding.yaml"): + response = bedrock_client.invoke_model(body=body, modelId=model) + json.loads(response.get("body").read()) + + +@pytest.mark.snapshot +def test_cohere_embedding(bedrock_client, request_vcr): + body = json.dumps({"texts": ["Hello World!", "Goodbye cruel world!"], "input_type": "search_document"}) + model = "cohere.embed-english-v3" + with request_vcr.use_cassette("cohere_embedding.yaml"): + response = bedrock_client.invoke_model(body=body, modelId=model) + json.loads(response.get("body").read()) + + @pytest.mark.parametrize( "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] ) diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py index 9189830a12f..763504159a2 100644 --- a/tests/internal/test_packages.py +++ b/tests/internal/test_packages.py @@ -86,8 +86,8 @@ def test_third_party_packages(): @pytest.mark.subprocess( env={ - "DD_THIRD_PARTY_DETECTION_EXCLUDES": "myfancypackage,myotherfancypackage", - "DD_THIRD_PARTY_DETECTION_INCLUDES": "requests", + "DD_THIRD_PARTY_DETECTION_INCLUDES": "myfancypackage,myotherfancypackage", + "DD_THIRD_PARTY_DETECTION_EXCLUDES": "requests", } ) def test_third_party_packages_excludes_includes(): diff --git a/tests/internal/test_tracer_flare.py b/tests/internal/test_tracer_flare.py index 35f38674e67..7051190e17d 100644 --- a/tests/internal/test_tracer_flare.py +++ b/tests/internal/test_tracer_flare.py @@ -2,13 +2,16 @@ from logging import Logger import multiprocessing import os +import pathlib from typing import Optional import unittest from unittest import mock +import uuid from ddtrace.internal.flare import TRACER_FLARE_DIRECTORY from ddtrace.internal.flare import TRACER_FLARE_FILE_HANDLER_NAME from ddtrace.internal.flare import Flare +from ddtrace.internal.flare import FlareSendRequest from ddtrace.internal.logger import get_logger @@ -16,25 +19,17 @@ class TracerFlareTests(unittest.TestCase): - mock_agent_config = [{"name": "flare-log-level", "config": {"log_level": "DEBUG"}}] - mock_agent_task = [ - False, - { - "args": { - "case_id": "1111111", - "hostname": "myhostname", - "user_handle": "user.name@datadoghq.com", - }, - "task_type": "tracer_flare", - "uuid": "d53fc8a4-8820-47a2-aa7d-d565582feb81", - }, - ] + mock_flare_send_request = FlareSendRequest( + case_id="1111111", hostname="myhostname", email="user.name@datadoghq.com" + ) def setUp(self): - self.flare = Flare() + self.flare_uuid = uuid.uuid4() + self.flare_dir = f"{TRACER_FLARE_DIRECTORY}-{self.flare_uuid}" + self.flare = Flare(flare_dir=pathlib.Path(self.flare_dir)) self.pid = os.getpid() - self.flare_file_path = f"{TRACER_FLARE_DIRECTORY}/tracer_python_{self.pid}.log" - self.config_file_path = f"{TRACER_FLARE_DIRECTORY}/tracer_config_{self.pid}.json" + self.flare_file_path = f"{self.flare_dir}/tracer_python_{self.pid}.log" + self.config_file_path = f"{self.flare_dir}/tracer_config_{self.pid}.json" def tearDown(self): self.confirm_cleanup() @@ -53,7 +48,7 @@ def test_single_process_success(self): """ ddlogger = get_logger("ddtrace") - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") file_handler = self._get_handler() valid_logger_level = self.flare._get_valid_logger_level(DEBUG_LEVEL_INT) @@ -66,7 +61,7 @@ def test_single_process_success(self): # Sends request to testagent # This just validates the request params - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) def test_single_process_partial_failure(self): """ @@ -79,7 +74,7 @@ def test_single_process_partial_failure(self): # Mock the partial failure with mock.patch("json.dump") as mock_json: mock_json.side_effect = Exception("file issue happened") - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") file_handler = self._get_handler() assert file_handler is not None @@ -89,7 +84,7 @@ def test_single_process_partial_failure(self): assert os.path.exists(self.flare_file_path) assert not os.path.exists(self.config_file_path) - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) def test_multiple_process_success(self): """ @@ -99,10 +94,10 @@ def test_multiple_process_success(self): num_processes = 3 def handle_agent_config(): - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") def handle_agent_task(): - self.flare.send(self.mock_agent_task) + self.flare.send(self.mock_flare_send_request) # Create multiple processes for _ in range(num_processes): @@ -114,7 +109,7 @@ def handle_agent_task(): # Assert that each process wrote its file successfully # We double the process number because each will generate a log file and a config file - assert len(processes) * 2 == len(os.listdir(TRACER_FLARE_DIRECTORY)) + assert len(processes) * 2 == len(os.listdir(self.flare_dir)) for _ in range(num_processes): p = multiprocessing.Process(target=handle_agent_task) @@ -130,19 +125,19 @@ def test_multiple_process_partial_failure(self): """ processes = [] - def do_tracer_flare(agent_config, agent_task): - self.flare.prepare(agent_config) + def do_tracer_flare(prep_request, send_request): + self.flare.prepare(prep_request) # Assert that only one process wrote its file successfully # We check for 2 files because it will generate a log file and a config file - assert 2 == len(os.listdir(TRACER_FLARE_DIRECTORY)) - self.flare.send(agent_task) + assert 2 == len(os.listdir(self.flare_dir)) + self.flare.send(send_request) # Create successful process - p = multiprocessing.Process(target=do_tracer_flare, args=(self.mock_agent_config, self.mock_agent_task)) + p = multiprocessing.Process(target=do_tracer_flare, args=("DEBUG", self.mock_flare_send_request)) processes.append(p) p.start() # Create failing process - p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_agent_task)) + p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_flare_send_request)) processes.append(p) p.start() for p in processes: @@ -154,7 +149,7 @@ def test_no_app_logs(self): file, just the tracer logs """ app_logger = Logger(name="my-app", level=DEBUG_LEVEL_INT) - self.flare.prepare(self.mock_agent_config) + self.flare.prepare("DEBUG") app_log_line = "this is an app log" app_logger.debug(app_log_line) @@ -169,5 +164,5 @@ def test_no_app_logs(self): self.flare.revert_configs() def confirm_cleanup(self): - assert not os.path.exists(TRACER_FLARE_DIRECTORY), f"The directory {TRACER_FLARE_DIRECTORY} still exists" + assert not self.flare.flare_dir.exists(), f"The directory {self.flare.flare_dir} still exists" assert self._get_handler() is None, "File handler was not removed" diff --git a/tests/snapshots/tests.contrib.botocore.test_bedrock.test_amazon_embedding.json b/tests/snapshots/tests.contrib.botocore.test_bedrock.test_amazon_embedding.json new file mode 100644 index 00000000000..f4c09d2734b --- /dev/null +++ b/tests/snapshots/tests.contrib.botocore.test_bedrock.test_amazon_embedding.json @@ -0,0 +1,34 @@ +[[ + { + "name": "bedrock-runtime.command", + "service": "aws.bedrock-runtime", + "resource": "InvokeModel", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.base_service": "", + "_dd.p.dm": "-0", + "_dd.p.tid": "662820e400000000", + "bedrock.request.model": "titan-embed-text-v1", + "bedrock.request.model_provider": "amazon", + "bedrock.request.prompt": "Hello World!", + "bedrock.response.duration": "311", + "bedrock.response.id": "1fd884e0-c9e8-44fa-b736-d31e2f607d54", + "bedrock.usage.completion_tokens": "", + "bedrock.usage.prompt_tokens": "3", + "language": "python", + "runtime-id": "a7bb6456241740dea419398d37aa13d2" + }, + "metrics": { + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "bedrock.response.embedding_length": 1536, + "process_id": 60939 + }, + "duration": 6739000, + "start": 1713905892539987000 + }]] diff --git a/tests/snapshots/tests.contrib.botocore.test_bedrock.test_cohere_embedding.json b/tests/snapshots/tests.contrib.botocore.test_bedrock.test_cohere_embedding.json new file mode 100644 index 00000000000..d1522b46ff5 --- /dev/null +++ b/tests/snapshots/tests.contrib.botocore.test_bedrock.test_cohere_embedding.json @@ -0,0 +1,36 @@ +[[ + { + "name": "bedrock-runtime.command", + "service": "aws.bedrock-runtime", + "resource": "InvokeModel", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.base_service": "", + "_dd.p.dm": "-0", + "_dd.p.tid": "6628215a00000000", + "bedrock.request.input_type": "search_document", + "bedrock.request.model": "embed-english-v3", + "bedrock.request.model_provider": "cohere", + "bedrock.request.prompt": "['Hello World!', 'Goodbye cruel world!']", + "bedrock.request.truncate": "", + "bedrock.response.duration": "271", + "bedrock.response.id": "0e9cb5ab-1fef-46eb-8e2c-773f0f60f39d", + "bedrock.usage.completion_tokens": "", + "bedrock.usage.prompt_tokens": "7", + "language": "python", + "runtime-id": "c02c555fdac14227bee7b37a0c304534" + }, + "metrics": { + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "bedrock.response.embedding_length": 1024, + "process_id": 61336 + }, + "duration": 630192000, + "start": 1713906010873383000 + }]]