Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into christophe-papazian/A…
Browse files Browse the repository at this point in the history
…PPSEC-52972-exploit-prevention-blocking-support
  • Loading branch information
christophe-papazian committed May 2, 2024
2 parents 2c417bd + 357cb3b commit 6899030
Show file tree
Hide file tree
Showing 69 changed files with 2,473 additions and 1,287 deletions.
12 changes: 0 additions & 12 deletions .github/workflows/build_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,6 @@ on:
# before merging/releasing
- build_deploy*
pull_request:
paths:
- ".github/workflows/build_deploy.yml"
- ".github/workflows/build_python_3.yml"
- "setup.py"
- "setup.cfg"
- "pyproject.toml"
- "**.c"
- "**.h"
- "**.cpp"
- "**.hpp"
- "**.pyx"
- "ddtrace/vendor/**"
release:
types:
- published
Expand Down
2 changes: 2 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ stages:
- deploy
- benchmarks
- benchmarks-pr-comment
- macrobenchmarks

include:
- remote: https://gitlab-templates.ddbuild.io/apm/packaging.yml
- local: ".gitlab/benchmarks.yml"
- local: ".gitlab/macrobenchmarks.yml"

variables:
DOWNSTREAM_BRANCH:
Expand Down
86 changes: 86 additions & 0 deletions .gitlab/macrobenchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
variables:
BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py-macrobenchmarks

.macrobenchmarks:
stage: macrobenchmarks
needs: []
tags: ["runner:apm-k8s-same-cpu"]
timeout: 1h
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
when: always
- when: manual
## Next step, enable:
# - if: $CI_COMMIT_REF_NAME == "main"
# when: always
# If you have a problem with Gitlab cache, see Troubleshooting section in Benchmarking Platform docs
image: $BENCHMARKS_CI_IMAGE
script: |
git clone --branch python/macrobenchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform platform && cd platform
if [ "$BP_PYTHON_SCENARIO_DIR" == "flask-realworld" ]; then
bp-runner bp-runner.flask-realworld.yml --debug
else
bp-runner bp-runner.simple.yml --debug
fi
artifacts:
name: "artifacts"
when: always
paths:
- platform/artifacts/
expire_in: 3 months
variables:
# Benchmark's env variables. Modify to tweak benchmark parameters.
DD_TRACE_DEBUG: "false"
DD_RUNTIME_METRICS_ENABLED: "true"
DD_REMOTE_CONFIGURATION_ENABLED: "false"
DD_INSTRUMENTATION_TELEMETRY_ENABLED: "false"

K6_OPTIONS_NORMAL_OPERATION_RATE: 40
K6_OPTIONS_NORMAL_OPERATION_DURATION: 5m
K6_OPTIONS_NORMAL_OPERATION_GRACEFUL_STOP: 1m
K6_OPTIONS_NORMAL_OPERATION_PRE_ALLOCATED_VUS: 4
K6_OPTIONS_NORMAL_OPERATION_MAX_VUS: 4

K6_OPTIONS_HIGH_LOAD_RATE: 500
K6_OPTIONS_HIGH_LOAD_DURATION: 1m
K6_OPTIONS_HIGH_LOAD_GRACEFUL_STOP: 30s
K6_OPTIONS_HIGH_LOAD_PRE_ALLOCATED_VUS: 4
K6_OPTIONS_HIGH_LOAD_MAX_VUS: 4

# Gitlab and BP specific env vars. Do not modify.
FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"

# Workaround: Currently we're not running the benchmarks on every PR, but GitHub still shows them as pending.
# By marking the benchmarks as allow_failure, this should go away. (This workaround should be removed once the
# benchmarks get changed to run on every PR)
allow_failure: true

macrobenchmarks:
extends: .macrobenchmarks
parallel:
matrix:
- DD_BENCHMARKS_CONFIGURATION: baseline
BP_PYTHON_SCENARIO_DIR: flask-realworld
DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"

- DD_BENCHMARKS_CONFIGURATION: only-tracing
BP_PYTHON_SCENARIO_DIR: flask-realworld
DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"

- DD_BENCHMARKS_CONFIGURATION: only-tracing
BP_PYTHON_SCENARIO_DIR: flask-realworld
DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
DD_REMOTE_CONFIGURATION_ENABLED: "false"
DD_INSTRUMENTATION_TELEMETRY_ENABLED: "true"

- DD_BENCHMARKS_CONFIGURATION: only-tracing
BP_PYTHON_SCENARIO_DIR: flask-realworld
DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
DD_REMOTE_CONFIGURATION_ENABLED: "false"
DD_INSTRUMENTATION_TELEMETRY_ENABLED: "false"

- DD_BENCHMARKS_CONFIGURATION: only-tracing
BP_PYTHON_SCENARIO_DIR: flask-realworld
DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
DD_REMOTE_CONFIGURATION_ENABLED: "true"
DD_INSTRUMENTATION_TELEMETRY_ENABLED: "true"
5 changes: 2 additions & 3 deletions benchmarks/appsec_iast_propagation/scenario.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import Any # noqa:F401

import bm

from tests.utils import override_env
from bm.utils import override_env


with override_env({"DD_IAST_ENABLED": "True"}):
Expand Down Expand Up @@ -42,7 +41,7 @@ def aspect_function(internal_loop, tainted):
value = ""
res = value
for _ in range(internal_loop):
res = add_aspect(res, join_aspect(str.join, 1, "_", (tainted, "_", tainted)))
res = add_aspect(res, join_aspect("_".join, 1, "_", (tainted, "_", tainted)))
value = res
res = add_aspect(res, tainted)
value = res
Expand Down
45 changes: 39 additions & 6 deletions ddtrace/_trace/trace_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from typing import List
from typing import Optional

from ddtrace import config
from ddtrace._trace.span import Span
from ddtrace._trace.utils import extract_DD_context_from_messages
from ddtrace._trace.utils import set_botocore_patched_api_call_span_tags as set_patched_api_call_span_tags
from ddtrace._trace.utils import set_botocore_response_metadata_tags
from ddtrace.constants import ANALYTICS_SAMPLE_RATE_KEY
from ddtrace.constants import SPAN_KIND
from ddtrace.constants import SPAN_MEASURED_KEY
Expand Down Expand Up @@ -107,6 +108,9 @@ def _start_span(ctx: core.ExecutionContext, call_trace: bool = True, **kwargs) -
trace_utils.activate_distributed_headers(
tracer, int_config=distributed_headers_config, request_headers=ctx["distributed_headers"]
)
distributed_context = ctx.get_item("distributed_context", traverse=True)
if distributed_context and not call_trace:
span_kwargs["child_of"] = distributed_context
span_kwargs.update(kwargs)
span = (tracer.trace if call_trace else tracer.start_span)(ctx["span_name"], **span_kwargs)
for tk, tv in ctx.get_item("tags", dict()).items():
Expand Down Expand Up @@ -569,20 +573,20 @@ def _on_botocore_patched_api_call_started(ctx):
span.start_ns = start_ns


def _on_botocore_patched_api_call_exception(ctx, response, exception_type, set_response_metadata_tags):
def _on_botocore_patched_api_call_exception(ctx, response, exception_type, is_error_code_fn):
span = ctx.get_item(ctx.get_item("call_key"))
# `ClientError.response` contains the result, so we can still grab response metadata
set_response_metadata_tags(span, response)
set_botocore_response_metadata_tags(span, response, is_error_code_fn=is_error_code_fn)

# If we have a status code, and the status code is not an error,
# then ignore the exception being raised
status_code = span.get_tag(http.STATUS_CODE)
if status_code and not config.botocore.operations[span.resource].is_error_code(int(status_code)):
if status_code and not is_error_code_fn(int(status_code)):
span._ignore_exception(exception_type)


def _on_botocore_patched_api_call_success(ctx, response, set_response_metadata_tags):
set_response_metadata_tags(ctx.get_item(ctx.get_item("call_key")), response)
def _on_botocore_patched_api_call_success(ctx, response):
set_botocore_response_metadata_tags(ctx.get_item(ctx.get_item("call_key")), response)


def _on_botocore_trace_context_injection_prepared(
Expand Down Expand Up @@ -682,6 +686,31 @@ def _on_botocore_bedrock_process_response(
span.finish()


def _on_botocore_sqs_recvmessage_post(
ctx: core.ExecutionContext, _, result: Dict, propagate: bool, message_parser: Callable
) -> None:
if result is not None and "Messages" in result and len(result["Messages"]) >= 1:
ctx.set_item("message_received", True)
if propagate:
ctx.set_safe("distributed_context", extract_DD_context_from_messages(result["Messages"], message_parser))


def _on_botocore_kinesis_getrecords_post(
ctx: core.ExecutionContext,
_,
__,
___,
____,
result,
propagate: bool,
message_parser: Callable,
):
if result is not None and "Records" in result and len(result["Records"]) >= 1:
ctx.set_item("message_received", True)
if propagate:
ctx.set_item("distributed_context", extract_DD_context_from_messages(result["Records"], message_parser))


def _on_redis_async_command_post(span, rowcount):
if rowcount is not None:
span.set_metric(db.ROWCOUNT, rowcount)
Expand Down Expand Up @@ -727,10 +756,14 @@ def listen():
core.on("botocore.patched_stepfunctions_api_call.started", _on_botocore_patched_api_call_started)
core.on("botocore.patched_stepfunctions_api_call.exception", _on_botocore_patched_api_call_exception)
core.on("botocore.stepfunctions.update_messages", _on_botocore_update_messages)
core.on("botocore.eventbridge.update_messages", _on_botocore_update_messages)
core.on("botocore.client_context.update_messages", _on_botocore_update_messages)
core.on("botocore.patched_bedrock_api_call.started", _on_botocore_patched_bedrock_api_call_started)
core.on("botocore.patched_bedrock_api_call.exception", _on_botocore_patched_bedrock_api_call_exception)
core.on("botocore.patched_bedrock_api_call.success", _on_botocore_patched_bedrock_api_call_success)
core.on("botocore.bedrock.process_response", _on_botocore_bedrock_process_response)
core.on("botocore.sqs.ReceiveMessage.post", _on_botocore_sqs_recvmessage_post)
core.on("botocore.kinesis.GetRecords.post", _on_botocore_kinesis_getrecords_post)
core.on("redis.async_command.post", _on_redis_async_command_post)

for context_name in (
Expand Down
59 changes: 45 additions & 14 deletions ddtrace/_trace/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ def __init__(

self.enabled = config._tracing_enabled
self.context_provider = context_provider or DefaultContextProvider()
self._user_sampler: Optional[BaseSampler] = None
# _user_sampler is the backup in case we need to revert from remote config to local
self._user_sampler: Optional[BaseSampler] = DatadogSampler()
self._sampler: BaseSampler = DatadogSampler()
self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url
self._compute_stats = config._trace_compute_stats
Expand Down Expand Up @@ -286,7 +287,7 @@ def __init__(
self._shutdown_lock = RLock()

self._new_process = False
config._subscribe(["_trace_sample_rate"], self._on_global_config_update)
config._subscribe(["_trace_sample_rate", "_trace_sampling_rules"], self._on_global_config_update)
config._subscribe(["logs_injection"], self._on_global_config_update)
config._subscribe(["tags"], self._on_global_config_update)
config._subscribe(["_tracing_enabled"], self._on_global_config_update)
Expand Down Expand Up @@ -1125,19 +1126,10 @@ def _is_span_internal(span):

def _on_global_config_update(self, cfg, items):
# type: (Config, List) -> None
if "_trace_sample_rate" in items:
# Reset the user sampler if one exists
if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler:
self._sampler = self._user_sampler
return

if cfg._get_source("_trace_sample_rate") != "default":
sample_rate = cfg._trace_sample_rate
else:
sample_rate = None

sampler = DatadogSampler(default_sample_rate=sample_rate)
self._sampler = sampler
# sampling configs always come as a pair
if "_trace_sample_rate" in items and "_trace_sampling_rules" in items:
self._handle_sampler_update(cfg)

if "tags" in items:
self._tags = cfg.tags.copy()
Expand All @@ -1160,3 +1152,42 @@ def _on_global_config_update(self, cfg, items):
from ddtrace.contrib.logging import unpatch

unpatch()

def _handle_sampler_update(self, cfg):
# type: (Config) -> None
if (
cfg._get_source("_trace_sample_rate") != "remote_config"
and cfg._get_source("_trace_sampling_rules") != "remote_config"
and self._user_sampler
):
# if we get empty configs from rc for both sample rate and rules, we should revert to the user sampler
self.sampler = self._user_sampler
return

if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler:
try:
sample_rate = self._user_sampler.default_sample_rate # type: ignore[attr-defined]
except AttributeError:
log.debug("Custom non-DatadogSampler is being used, cannot pull default sample rate")
sample_rate = None
elif cfg._get_source("_trace_sample_rate") != "default":
sample_rate = cfg._trace_sample_rate
else:
sample_rate = None

if cfg._get_source("_trace_sampling_rules") != "remote_config" and self._user_sampler:
try:
sampling_rules = self._user_sampler.rules # type: ignore[attr-defined]
# we need to chop off the default_sample_rate rule so the new sample_rate can be applied
sampling_rules = sampling_rules[:-1]
except AttributeError:
log.debug("Custom non-DatadogSampler is being used, cannot pull sampling rules")
sampling_rules = None
elif cfg._get_source("_trace_sampling_rules") != "default":
sampling_rules = DatadogSampler._parse_rules_from_str(cfg._trace_sampling_rules)
else:
sampling_rules = None

sampler = DatadogSampler(rules=sampling_rules, default_sample_rate=sample_rate)

self._sampler = sampler
41 changes: 41 additions & 0 deletions ddtrace/_trace/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
from typing import Any
from typing import Callable
from typing import Dict
from typing import Optional

from ddtrace import Span
from ddtrace import config
from ddtrace.constants import ANALYTICS_SAMPLE_RATE_KEY
from ddtrace.constants import SPAN_KIND
from ddtrace.constants import SPAN_MEASURED_KEY
from ddtrace.ext import SpanKind
from ddtrace.ext import aws
from ddtrace.ext import http
from ddtrace.internal.constants import COMPONENT
from ddtrace.internal.utils.formats import deep_getattr
from ddtrace.propagation.http import HTTPPropagator


def set_botocore_patched_api_call_span_tags(span: Span, instance, args, params, endpoint_name, operation):
Expand Down Expand Up @@ -39,3 +46,37 @@ def set_botocore_patched_api_call_span_tags(span: Span, instance, args, params,

# set analytics sample rate
span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.botocore.get_analytics_sample_rate())


def set_botocore_response_metadata_tags(
span: Span, result: Dict[str, Any], is_error_code_fn: Optional[Callable] = None
) -> None:
if not result or not result.get("ResponseMetadata"):
return
response_meta = result["ResponseMetadata"]

if "HTTPStatusCode" in response_meta:
status_code = response_meta["HTTPStatusCode"]
span.set_tag(http.STATUS_CODE, status_code)

# Mark this span as an error if requested
if is_error_code_fn is not None and is_error_code_fn(int(status_code)):
span.error = 1

if "RetryAttempts" in response_meta:
span.set_tag("retry_attempts", response_meta["RetryAttempts"])

if "RequestId" in response_meta:
span.set_tag_str("aws.requestid", response_meta["RequestId"])


def extract_DD_context_from_messages(messages, extract_from_message: Callable):
ctx = None
if len(messages) >= 1:
message = messages[0]
context_json = extract_from_message(message)
if context_json is not None:
child_of = HTTPPropagator.extract(context_json)
if child_of.trace_id is not None:
ctx = child_of
return ctx
4 changes: 4 additions & 0 deletions ddtrace/appsec/_iast/_evidence_redaction/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ddtrace.appsec._iast._evidence_redaction._sensitive_handler import sensitive_handler


sensitive_handler
Loading

0 comments on commit 6899030

Please sign in to comment.