Merge branch 'main' into emmett.butler/redis-spans-refactor

DataDog · May 1, 2024 · d3b3ba0 · d3b3ba0
2 parents e9f3713 + c8b907b
commit d3b3ba0
Show file tree

Hide file tree

Showing 27 changed files with 1,053 additions and 156 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -3,10 +3,12 @@ stages:
   - deploy
   - benchmarks
   - benchmarks-pr-comment
+  - macrobenchmarks
 
 include:
   - remote: https://gitlab-templates.ddbuild.io/apm/packaging.yml
   - local: ".gitlab/benchmarks.yml"
+  - local: ".gitlab/macrobenchmarks.yml"
 
 variables:
   DOWNSTREAM_BRANCH:

diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml
@@ -0,0 +1,86 @@
+variables:
+  BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py-macrobenchmarks
+
+.macrobenchmarks:
+  stage: macrobenchmarks
+  needs: []
+  tags: ["runner:apm-k8s-same-cpu"]
+  timeout: 1h
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - when: manual
+    ## Next step, enable:
+    # - if: $CI_COMMIT_REF_NAME == "main"
+    #   when: always
+  # If you have a problem with Gitlab cache, see Troubleshooting section in Benchmarking Platform docs
+  image: $BENCHMARKS_CI_IMAGE
+  script: |
+    git clone --branch python/macrobenchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform platform && cd platform
+    if [ "$BP_PYTHON_SCENARIO_DIR" == "flask-realworld" ]; then
+      bp-runner bp-runner.flask-realworld.yml --debug
+    else
+      bp-runner bp-runner.simple.yml --debug
+    fi
+  artifacts:
+    name: "artifacts"
+    when: always
+    paths:
+      - platform/artifacts/
+    expire_in: 3 months
+  variables:
+    # Benchmark's env variables. Modify to tweak benchmark parameters.
+    DD_TRACE_DEBUG: "false"
+    DD_RUNTIME_METRICS_ENABLED: "true"
+    DD_REMOTE_CONFIGURATION_ENABLED: "false"
+    DD_INSTRUMENTATION_TELEMETRY_ENABLED: "false"
+
+    K6_OPTIONS_NORMAL_OPERATION_RATE: 40
+    K6_OPTIONS_NORMAL_OPERATION_DURATION: 5m
+    K6_OPTIONS_NORMAL_OPERATION_GRACEFUL_STOP: 1m
+    K6_OPTIONS_NORMAL_OPERATION_PRE_ALLOCATED_VUS: 4
+    K6_OPTIONS_NORMAL_OPERATION_MAX_VUS: 4
+
+    K6_OPTIONS_HIGH_LOAD_RATE: 500
+    K6_OPTIONS_HIGH_LOAD_DURATION: 1m
+    K6_OPTIONS_HIGH_LOAD_GRACEFUL_STOP: 30s
+    K6_OPTIONS_HIGH_LOAD_PRE_ALLOCATED_VUS: 4
+    K6_OPTIONS_HIGH_LOAD_MAX_VUS: 4
+
+    # Gitlab and BP specific env vars. Do not modify.
+    FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"
+
+  # Workaround: Currently we're not running the benchmarks on every PR, but GitHub still shows them as pending.
+  # By marking the benchmarks as allow_failure, this should go away. (This workaround should be removed once the
+  # benchmarks get changed to run on every PR)
+  allow_failure: true
+
+macrobenchmarks:
+  extends: .macrobenchmarks
+  parallel:
+    matrix:
+      - DD_BENCHMARKS_CONFIGURATION: baseline
+        BP_PYTHON_SCENARIO_DIR: flask-realworld
+        DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
+
+      - DD_BENCHMARKS_CONFIGURATION: only-tracing
+        BP_PYTHON_SCENARIO_DIR: flask-realworld
+        DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
+
+      - DD_BENCHMARKS_CONFIGURATION: only-tracing
+        BP_PYTHON_SCENARIO_DIR: flask-realworld
+        DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
+        DD_REMOTE_CONFIGURATION_ENABLED: "false"
+        DD_INSTRUMENTATION_TELEMETRY_ENABLED: "true"
+
+      - DD_BENCHMARKS_CONFIGURATION: only-tracing
+        BP_PYTHON_SCENARIO_DIR: flask-realworld
+        DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
+        DD_REMOTE_CONFIGURATION_ENABLED: "false"
+        DD_INSTRUMENTATION_TELEMETRY_ENABLED: "false"
+
+      - DD_BENCHMARKS_CONFIGURATION: only-tracing
+        BP_PYTHON_SCENARIO_DIR: flask-realworld
+        DDTRACE_INSTALL_VERSION: "git+https://github.com/Datadog/dd-trace-py@${CI_COMMIT_SHA}"
+        DD_REMOTE_CONFIGURATION_ENABLED: "true"
+        DD_INSTRUMENTATION_TELEMETRY_ENABLED: "true"
diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py
@@ -230,7 +230,8 @@ def __init__(
 
         self.enabled = config._tracing_enabled
         self.context_provider = context_provider or DefaultContextProvider()
-        self._user_sampler: Optional[BaseSampler] = None
+        # _user_sampler is the backup in case we need to revert from remote config to local
+        self._user_sampler: Optional[BaseSampler] = DatadogSampler()
         self._sampler: BaseSampler = DatadogSampler()
         self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url
         self._compute_stats = config._trace_compute_stats
@@ -286,7 +287,7 @@ def __init__(
         self._shutdown_lock = RLock()
 
         self._new_process = False
-        config._subscribe(["_trace_sample_rate"], self._on_global_config_update)
+        config._subscribe(["_trace_sample_rate", "_trace_sampling_rules"], self._on_global_config_update)
         config._subscribe(["logs_injection"], self._on_global_config_update)
         config._subscribe(["tags"], self._on_global_config_update)
         config._subscribe(["_tracing_enabled"], self._on_global_config_update)
@@ -1125,19 +1126,10 @@ def _is_span_internal(span):
 
     def _on_global_config_update(self, cfg, items):
         # type: (Config, List) -> None
-        if "_trace_sample_rate" in items:
-            # Reset the user sampler if one exists
-            if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler:
-                self._sampler = self._user_sampler
-                return
-
-            if cfg._get_source("_trace_sample_rate") != "default":
-                sample_rate = cfg._trace_sample_rate
-            else:
-                sample_rate = None
 
-            sampler = DatadogSampler(default_sample_rate=sample_rate)
-            self._sampler = sampler
+        # sampling configs always come as a pair
+        if "_trace_sample_rate" in items and "_trace_sampling_rules" in items:
+            self._handle_sampler_update(cfg)
 
         if "tags" in items:
             self._tags = cfg.tags.copy()
@@ -1160,3 +1152,42 @@ def _on_global_config_update(self, cfg, items):
                 from ddtrace.contrib.logging import unpatch
 
                 unpatch()
+
+    def _handle_sampler_update(self, cfg):
+        # type: (Config) -> None
+        if (
+            cfg._get_source("_trace_sample_rate") != "remote_config"
+            and cfg._get_source("_trace_sampling_rules") != "remote_config"
+            and self._user_sampler
+        ):
+            # if we get empty configs from rc for both sample rate and rules, we should revert to the user sampler
+            self.sampler = self._user_sampler
+            return
+
+        if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler:
+            try:
+                sample_rate = self._user_sampler.default_sample_rate  # type: ignore[attr-defined]
+            except AttributeError:
+                log.debug("Custom non-DatadogSampler is being used, cannot pull default sample rate")
+                sample_rate = None
+        elif cfg._get_source("_trace_sample_rate") != "default":
+            sample_rate = cfg._trace_sample_rate
+        else:
+            sample_rate = None
+
+        if cfg._get_source("_trace_sampling_rules") != "remote_config" and self._user_sampler:
+            try:
+                sampling_rules = self._user_sampler.rules  # type: ignore[attr-defined]
+                # we need to chop off the default_sample_rate rule so the new sample_rate can be applied
+                sampling_rules = sampling_rules[:-1]
+            except AttributeError:
+                log.debug("Custom non-DatadogSampler is being used, cannot pull sampling rules")
+                sampling_rules = None
+        elif cfg._get_source("_trace_sampling_rules") != "default":
+            sampling_rules = DatadogSampler._parse_rules_from_str(cfg._trace_sampling_rules)
+        else:
+            sampling_rules = None
+
+        sampler = DatadogSampler(rules=sampling_rules, default_sample_rate=sample_rate)
+
+        self._sampler = sampler
diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py
@@ -135,6 +135,8 @@ def _patched_sqs_api_call(parent_ctx, original_func, instance, args, kwargs, fun
     else:
         call_name = trace_operation
 
+    child_of = parent_ctx.get_item("distributed_context")
+
     if should_instrument:
         with core.context_with_data(
             "botocore.patched_sqs_api_call",

diff --git a/ddtrace/internal/constants.py b/ddtrace/internal/constants.py
@@ -90,7 +90,9 @@
 
 class _PRIORITY_CATEGORY:
     USER = "user"
-    RULE = "rule"
+    RULE_DEF = "rule_default"
+    RULE_CUSTOMER = "rule_customer"
+    RULE_DYNAMIC = "rule_dynamic"
     AUTO = "auto"
     DEFAULT = "default"
 
@@ -99,7 +101,9 @@ class _PRIORITY_CATEGORY:
 # used to simplify code that selects sampling priority based on many factors
 _CATEGORY_TO_PRIORITIES = {
     _PRIORITY_CATEGORY.USER: (USER_KEEP, USER_REJECT),
-    _PRIORITY_CATEGORY.RULE: (USER_KEEP, USER_REJECT),
+    _PRIORITY_CATEGORY.RULE_DEF: (USER_KEEP, USER_REJECT),
+    _PRIORITY_CATEGORY.RULE_CUSTOMER: (USER_KEEP, USER_REJECT),
+    _PRIORITY_CATEGORY.RULE_DYNAMIC: (USER_KEEP, USER_REJECT),
     _PRIORITY_CATEGORY.AUTO: (AUTO_KEEP, AUTO_REJECT),
     _PRIORITY_CATEGORY.DEFAULT: (AUTO_KEEP, AUTO_REJECT),
 }

diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py
@@ -238,6 +238,11 @@ def is_third_party(path: Path) -> bool:
     return package.name in _third_party_packages()
 
 
+@cached()
+def is_user_code(path: Path) -> bool:
+    return not (is_stdlib(path) or is_third_party(path))
+
+
 @cached()
 def is_distribution_available(name: str) -> bool:
     """Determine if a distribution is available in the current environment."""

diff --git a/ddtrace/internal/remoteconfig/client.py b/ddtrace/internal/remoteconfig/client.py
@@ -75,6 +75,7 @@ class Capabilities(enum.IntFlag):
     APM_TRACING_HTTP_HEADER_TAGS = 1 << 14
     APM_TRACING_CUSTOM_TAGS = 1 << 15
     APM_TRACING_ENABLED = 1 << 19
+    APM_TRACING_SAMPLE_RULES = 1 << 29
 
 
 class RemoteConfigError(Exception):
@@ -382,6 +383,7 @@ def _build_payload(self, state):
             | Capabilities.APM_TRACING_HTTP_HEADER_TAGS
             | Capabilities.APM_TRACING_CUSTOM_TAGS
             | Capabilities.APM_TRACING_ENABLED
+            | Capabilities.APM_TRACING_SAMPLE_RULES
         )
         return dict(
             client=dict(

diff --git a/ddtrace/internal/sampling.py b/ddtrace/internal/sampling.py
@@ -62,6 +62,16 @@ class SamplingMechanism(object):
     REMOTE_RATE_USER = 6
     REMOTE_RATE_DATADOG = 7
     SPAN_SAMPLING_RULE = 8
+    REMOTE_USER_RULE = 11
+    REMOTE_DYNAMIC_RULE = 12
+
+
+class PriorityCategory(object):
+    DEFAULT = "default"
+    AUTO = "auto"
+    RULE_DEFAULT = "rule_default"
+    RULE_CUSTOMER = "rule_customer"
+    RULE_DYNAMIC = "rule_dynamic"
 
 
 # Use regex to validate trace tag value
@@ -278,11 +288,17 @@ def is_single_span_sampled(span):
 def _set_sampling_tags(span, sampled, sample_rate, priority_category):
     # type: (Span, bool, float, str) -> None
     mechanism = SamplingMechanism.TRACE_SAMPLING_RULE
-    if priority_category == "rule":
+    if priority_category == PriorityCategory.RULE_DEFAULT:
+        span.set_metric(SAMPLING_RULE_DECISION, sample_rate)
+    if priority_category == PriorityCategory.RULE_CUSTOMER:
+        span.set_metric(SAMPLING_RULE_DECISION, sample_rate)
+        mechanism = SamplingMechanism.REMOTE_USER_RULE
+    if priority_category == PriorityCategory.RULE_DYNAMIC:
         span.set_metric(SAMPLING_RULE_DECISION, sample_rate)
-    elif priority_category == "default":
+        mechanism = SamplingMechanism.REMOTE_DYNAMIC_RULE
+    elif priority_category == PriorityCategory.DEFAULT:
         mechanism = SamplingMechanism.DEFAULT
-    elif priority_category == "auto":
+    elif priority_category == PriorityCategory.AUTO:
         mechanism = SamplingMechanism.AGENT_RATE
         span.set_metric(SAMPLING_AGENT_DECISION, sample_rate)
     priorities = _CATEGORY_TO_PRIORITIES[priority_category]

diff --git a/ddtrace/internal/symbol_db/symbols.py b/ddtrace/internal/symbol_db/symbols.py
@@ -3,7 +3,7 @@
 from dataclasses import field
 import dis
 from enum import Enum
-import http
+from http.client import HTTPResponse
 from inspect import CO_VARARGS
 from inspect import CO_VARKEYWORDS
 from inspect import isasyncgenfunction
@@ -31,7 +31,6 @@
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.module import BaseModuleWatchdog
 from ddtrace.internal.module import origin
-from ddtrace.internal.packages import is_stdlib
 from ddtrace.internal.runtime import get_runtime_id
 from ddtrace.internal.safety import _isinstance
 from ddtrace.internal.utils.cache import cached
@@ -50,10 +49,10 @@
 
 
 @cached()
-def is_from_stdlib(obj: t.Any) -> t.Optional[bool]:
+def is_from_user_code(obj: t.Any) -> t.Optional[bool]:
     try:
         path = origin(sys.modules[object.__getattribute__(obj, "__module__")])
-        return is_stdlib(path) if path is not None else None
+        return packages.is_user_code(path) if path is not None else None
     except (AttributeError, KeyError):
         return None
 
@@ -182,9 +181,6 @@ def _(cls, module: ModuleType, data: ScopeData):
         symbols = []
         scopes = []
 
-        if is_stdlib(module_origin):
-            return None
-
         for alias, child in object.__getattribute__(module, "__dict__").items():
             if _isinstance(child, ModuleType):
                 # We don't want to traverse other modules.
@@ -224,7 +220,7 @@ def _(cls, obj: type, data: ScopeData):
             return None
         data.seen.add(obj)
 
-        if is_from_stdlib(obj):
+        if not is_from_user_code(obj):
             return None
 
         symbols = []
@@ -347,7 +343,7 @@ def _(cls, f: FunctionType, data: ScopeData):
             return None
         data.seen.add(f)
 
-        if is_from_stdlib(f):
+        if not is_from_user_code(f):
             return None
 
         code = f.__dd_wrapped__.__code__ if hasattr(f, "__dd_wrapped__") else f.__code__
@@ -416,7 +412,7 @@ def _(cls, pr: property, data: ScopeData):
         data.seen.add(pr.fget)
 
         # TODO: These names don't match what is reported by the discovery.
-        if pr.fget is None or is_from_stdlib(pr.fget):
+        if pr.fget is None or not is_from_user_code(pr.fget):
             return None
 
         path = func_origin(t.cast(FunctionType, pr.fget))
@@ -477,7 +473,7 @@ def to_json(self) -> dict:
             "scopes": [_.to_json() for _ in self._scopes],
         }
 
-    def upload(self) -> http.client.HTTPResponse:
+    def upload(self) -> HTTPResponse:
         body, headers = multipart(
             parts=[
                 FormData(
@@ -509,14 +505,24 @@ def __len__(self) -> int:
 
 
 def is_module_included(module: ModuleType) -> bool:
+    # Check if module name matches the include patterns
     if symdb_config._includes_re.match(module.__name__):
         return True
 
-    package = packages.module_to_package(module)
-    if package is None:
+    # Check if it is user code
+    module_origin = origin(module)
+    if module_origin is None:
         return False
 
-    return symdb_config._includes_re.match(package.name) is not None
+    if packages.is_user_code(module_origin):
+        return True
+
+    # Check if the package name matches the include patterns
+    package = packages.filename_to_package(module_origin)
+    if package is not None and symdb_config._includes_re.match(package.name):
+        return True
+
+    return False
 
 
 class SymbolDatabaseUploader(BaseModuleWatchdog):