From dc000ae8397a7d8e1c0fb569622ee9d2e42082ac Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Fri, 19 Jul 2024 15:08:39 -0400 Subject: [PATCH] chore(internal): start crashtracker when enabled (#9865) WIP: initial implementation, needs some manual and automated testing changes. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez Co-authored-by: erikayasuda <153395705+erikayasuda@users.noreply.github.com> Co-authored-by: David Sanchez <838104+sanchda@users.noreply.github.com> Co-authored-by: Taegyun Kim Co-authored-by: Federico Mon Co-authored-by: Christophe Papazian Co-authored-by: Juanjo Alvarez Martinez --- ddtrace/bootstrap/preload.py | 12 ++ ddtrace/internal/core/crashtracking.py | 50 ++++++++ .../profiling/crashtracker/__init__.py | 13 +++ ddtrace/internal/runtime/__init__.py | 16 ++- ddtrace/internal/telemetry/constants.py | 11 ++ ddtrace/internal/telemetry/writer.py | 19 +++ ddtrace/settings/crashtracker.py | 25 ++-- .../crashtracker/test_crashtracker.py | 108 ++++++++++++++++++ tests/telemetry/test_telemetry.py | 16 +-- tests/telemetry/test_writer.py | 17 +++ 10 files changed, 265 insertions(+), 22 deletions(-) create mode 100644 ddtrace/internal/core/crashtracking.py diff --git a/ddtrace/bootstrap/preload.py b/ddtrace/bootstrap/preload.py index 64d5fc72edc..d400ccb581e 100644 --- a/ddtrace/bootstrap/preload.py +++ b/ddtrace/bootstrap/preload.py @@ -2,6 +2,7 @@ Bootstrapping code that is run when using the `ddtrace-run` Python entrypoint Add all monkey-patching that needs to run by default here """ + import os # noqa:I001 from ddtrace import config # noqa:F401 @@ -15,6 +16,7 @@ from ddtrace.internal.utils.formats import asbool # noqa:F401 from ddtrace.internal.utils.formats import parse_tags_str # noqa:F401 from ddtrace.settings.asm import config as asm_config # noqa:F401 +from ddtrace.settings.crashtracker import config as crashtracker_config from ddtrace.settings.symbol_db import config as symdb_config # noqa:F401 from ddtrace import tracer @@ -41,6 +43,16 @@ def register_post_preload(func: t.Callable) -> None: log = get_logger(__name__) +# DEV: We want to start the crashtracker as early as possible +if crashtracker_config.enabled: + log.debug("crashtracking enabled via environment variable") + try: + from ddtrace.internal.core import crashtracking + + crashtracking.start() + except Exception: + log.error("failed to enable crashtracking", exc_info=True) + if profiling_config.enabled: log.debug("profiler enabled via environment variable") diff --git a/ddtrace/internal/core/crashtracking.py b/ddtrace/internal/core/crashtracking.py new file mode 100644 index 00000000000..a2d81deb32a --- /dev/null +++ b/ddtrace/internal/core/crashtracking.py @@ -0,0 +1,50 @@ +from typing import Callable + +from ddtrace import config +from ddtrace import version +from ddtrace.internal import agent +from ddtrace.internal.datadog.profiling import crashtracker +from ddtrace.internal.runtime import get_runtime_id +from ddtrace.internal.runtime import on_runtime_id_change +from ddtrace.settings.crashtracker import config as crashtracker_config + + +is_available: bool = crashtracker.is_available +failure_msg: str = crashtracker.failure_msg +is_started: Callable[[], bool] = crashtracker.is_started + + +@on_runtime_id_change +def _update_runtime_id(runtime_id: str) -> None: + crashtracker.set_runtime_id(runtime_id) + + +def start() -> bool: + if not is_available: + return False + + crashtracker.set_url(crashtracker_config.debug_url or agent.get_trace_url()) + crashtracker.set_service(config.service) + crashtracker.set_version(config.version) + crashtracker.set_env(config.env) + crashtracker.set_runtime_id(get_runtime_id()) + crashtracker.set_library_version(version.get_version()) + crashtracker.set_alt_stack(bool(crashtracker_config.alt_stack)) + if crashtracker_config.stacktrace_resolver == "fast": + crashtracker.set_resolve_frames_fast() + elif crashtracker_config.stacktrace_resolver == "full": + crashtracker.set_resolve_frames_full() + elif crashtracker_config.stacktrace_resolver == "safe": + crashtracker.set_resolve_frames_safe() + else: + crashtracker.set_resolve_frames_disable() + + if crashtracker_config.stdout_filename: + crashtracker.set_stdout_filename(crashtracker_config.stdout_filename) + if crashtracker_config.stderr_filename: + crashtracker.set_stderr_filename(crashtracker_config.stderr_filename) + + # Only start if it is enabled + if crashtracker_config.enabled: + return crashtracker.start() + return False diff --git a/ddtrace/internal/datadog/profiling/crashtracker/__init__.py b/ddtrace/internal/datadog/profiling/crashtracker/__init__.py index a4a3542e18b..54d4f68f51c 100644 --- a/ddtrace/internal/datadog/profiling/crashtracker/__init__.py +++ b/ddtrace/internal/datadog/profiling/crashtracker/__init__.py @@ -4,6 +4,10 @@ failure_msg = "" +def _default_return_false(*args, **kwargs): + return False + + try: from ._crashtracker import * # noqa: F403, F401 @@ -11,3 +15,12 @@ except Exception as e: failure_msg = str(e) + + # Crashtracker is used early during startup, and so it must be robust across installations. + # Here we just stub everything. + def __getattr__(name): + if name == "failure_msg": + return failure_msg + if name == "is_available": + return False + return _default_return_false diff --git a/ddtrace/internal/runtime/__init__.py b/ddtrace/internal/runtime/__init__.py index 3ccfcfa362d..e34182aec72 100644 --- a/ddtrace/internal/runtime/__init__.py +++ b/ddtrace/internal/runtime/__init__.py @@ -9,12 +9,22 @@ ] -def _generate_runtime_id(): +def _generate_runtime_id() -> str: return uuid.uuid4().hex -_RUNTIME_ID = _generate_runtime_id() +_RUNTIME_ID: str = _generate_runtime_id() _ANCESTOR_RUNTIME_ID: t.Optional[str] = None +_ON_RUNTIME_ID_CHANGE: t.Set[t.Callable[[str], None]] = set() + + +def on_runtime_id_change(cb: t.Callable[[str], None]) -> None: + """Register a callback to be called when the runtime ID changes. + + This can happen after a fork. + """ + global _ON_RUNTIME_ID_CHANGE + _ON_RUNTIME_ID_CHANGE.add(cb) @forksafe.register @@ -26,6 +36,8 @@ def _set_runtime_id(): _ANCESTOR_RUNTIME_ID = _RUNTIME_ID _RUNTIME_ID = _generate_runtime_id() + for cb in _ON_RUNTIME_ID_CHANGE: + cb(_RUNTIME_ID) def get_runtime_id(): diff --git a/ddtrace/internal/telemetry/constants.py b/ddtrace/internal/telemetry/constants.py index 704a99725dd..e649acd32c5 100644 --- a/ddtrace/internal/telemetry/constants.py +++ b/ddtrace/internal/telemetry/constants.py @@ -70,3 +70,14 @@ TELEMETRY_INJECT_WAS_ATTEMPTED = "DD_LIB_INJECTION_ATTEMPTED" TELEMETRY_LIB_WAS_INJECTED = "DD_LIB_INJECTED" TELEMETRY_LIB_INJECTION_FORCED = "DD_INJECT_FORCE" + + +# Crashtracker +TELEMETRY_CRASHTRACKING_ENABLED = "crashtracking_enabled" # Env var enabled +TELEMETRY_CRASHTRACKING_AVAILABLE = "crashtracking_available" # Feature is available +TELEMETRY_CRASHTRACKING_STARTED = "crashtracking_started" # Crashtracking is running +TELEMETRY_CRASHTRACKING_STDOUT_FILENAME = "crashtracking_stdout_filename" +TELEMETRY_CRASHTRACKING_STDERR_FILENAME = "crashtracking_stderr_filename" +TELEMETRY_CRASHTRACKING_ALT_STACK = "crashtracking_alt_stack" +TELEMETRY_CRASHTRACKING_STACKTRACE_RESOLVER = "crashtracking_stacktrace_resolver" +TELEMETRY_CRASHTRACKING_DEBUG_URL = "crashtracking_debug_url" diff --git a/ddtrace/internal/telemetry/writer.py b/ddtrace/internal/telemetry/writer.py index e608eaede15..9ed3b383d58 100644 --- a/ddtrace/internal/telemetry/writer.py +++ b/ddtrace/internal/telemetry/writer.py @@ -16,12 +16,14 @@ from ...internal import atexit from ...internal import forksafe from ...internal.compat import parse +from ...internal.core import crashtracking from ...internal.module import BaseModuleWatchdog from ...internal.module import origin from ...internal.schema import SCHEMA_VERSION from ...internal.schema import _remove_client_service_names from ...settings import _config as config from ...settings.config import _ConfigSource +from ...settings.crashtracker import config as crashtracker_config from ...settings.dynamic_instrumentation import config as di_config from ...settings.exception_debugging import config as ed_config from ...settings.peer_service import _ps_config @@ -47,6 +49,14 @@ from .constants import TELEMETRY_AGENT_URL from .constants import TELEMETRY_ANALYTICS_ENABLED from .constants import TELEMETRY_CLIENT_IP_ENABLED +from .constants import TELEMETRY_CRASHTRACKING_ALT_STACK +from .constants import TELEMETRY_CRASHTRACKING_AVAILABLE +from .constants import TELEMETRY_CRASHTRACKING_DEBUG_URL +from .constants import TELEMETRY_CRASHTRACKING_ENABLED +from .constants import TELEMETRY_CRASHTRACKING_STACKTRACE_RESOLVER +from .constants import TELEMETRY_CRASHTRACKING_STARTED +from .constants import TELEMETRY_CRASHTRACKING_STDERR_FILENAME +from .constants import TELEMETRY_CRASHTRACKING_STDOUT_FILENAME from .constants import TELEMETRY_DOGSTATSD_PORT from .constants import TELEMETRY_DOGSTATSD_URL from .constants import TELEMETRY_DYNAMIC_INSTRUMENTATION_ENABLED @@ -510,6 +520,15 @@ def _app_started_event(self, register_app_shutdown=True): (TELEMETRY_INJECT_WAS_ATTEMPTED, config._inject_was_attempted, "unknown"), (TELEMETRY_LIB_WAS_INJECTED, config._lib_was_injected, "unknown"), (TELEMETRY_LIB_INJECTION_FORCED, config._inject_force, "unknown"), + # Crashtracker + (TELEMETRY_CRASHTRACKING_ENABLED, crashtracker_config.enabled, "unknown"), + (TELEMETRY_CRASHTRACKING_STARTED, crashtracking.is_started(), "unknown"), + (TELEMETRY_CRASHTRACKING_AVAILABLE, crashtracking.is_available, "unknown"), + (TELEMETRY_CRASHTRACKING_STACKTRACE_RESOLVER, str(crashtracker_config.stacktrace_resolver), "unknown"), + (TELEMETRY_CRASHTRACKING_STDOUT_FILENAME, str(crashtracker_config.stdout_filename), "unknown"), + (TELEMETRY_CRASHTRACKING_STDERR_FILENAME, str(crashtracker_config.stderr_filename), "unknown"), + (TELEMETRY_CRASHTRACKING_DEBUG_URL, str(crashtracker_config.debug_url), "unknown"), + (TELEMETRY_CRASHTRACKING_ALT_STACK, crashtracker_config.alt_stack, "unknown"), ] + get_python_config_vars() ) diff --git a/ddtrace/settings/crashtracker.py b/ddtrace/settings/crashtracker.py index 1d7b1dae61e..af574448f73 100644 --- a/ddtrace/settings/crashtracker.py +++ b/ddtrace/settings/crashtracker.py @@ -2,27 +2,25 @@ from envier import En -from ddtrace.internal.datadog.profiling import crashtracker - -def _derive_stacktrace_resolver(config): - # type: (CrashtrackerConfig) -> t.Optional[str] - resolver = config._stacktrace_resolver or "" +def _derive_stacktrace_resolver(config: "CrashtrackerConfig") -> t.Optional[str]: + resolver = str(config._stacktrace_resolver or "") resolver = resolver.lower() - if resolver in ("fast", "full"): + if resolver in ("fast", "full", "safe"): return resolver return None -def _check_for_crashtracker_available(): +def _check_for_crashtracker_available() -> bool: + from ddtrace.internal.datadog.profiling import crashtracker + return crashtracker.is_available -def _derive_crashtracker_enabled(config): - # type: (CrashtrackerConfig) -> bool +def _derive_crashtracker_enabled(config: "CrashtrackerConfig") -> bool: if not _check_for_crashtracker_available(): return False - return config._enabled + return bool(config._enabled) class CrashtrackerConfig(En): @@ -31,7 +29,7 @@ class CrashtrackerConfig(En): _enabled = En.v( bool, "enabled", - default=False, + default=True, help_type="Boolean", help="Enables the crashtracker", ) @@ -77,6 +75,9 @@ class CrashtrackerConfig(En): default=None, help_type="String", help="How to collect native stack traces during a crash, if at all. Accepted values are 'none', 'fast'," - " and 'full'. The default value is 'none' (no stack traces).", + " 'safe', and 'full'. The default value is 'none' (no stack traces).", ) stacktrace_resolver = En.d(t.Optional[str], _derive_stacktrace_resolver) + + +config = CrashtrackerConfig() diff --git a/tests/internal/crashtracker/test_crashtracker.py b/tests/internal/crashtracker/test_crashtracker.py index cae856f3b00..4359c57e22a 100644 --- a/tests/internal/crashtracker/test_crashtracker.py +++ b/tests/internal/crashtracker/test_crashtracker.py @@ -1,7 +1,10 @@ +import os import sys import pytest +import tests.internal.crashtracker.utils as utils + @pytest.mark.skipif(not sys.platform.startswith("linux"), reason="Linux only") @pytest.mark.subprocess() @@ -274,3 +277,108 @@ def test_crashtracker_raise_sigbus(): data = utils.conn_to_bytes(conn) conn.close() assert b"os_kill" in data + + +preload_code = """ +import ctypes +ctypes.string_at(0) +exit(-1) +""" + + +@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="Linux only") +def test_crashtracker_preload_default(ddtrace_run_python_code_in_subprocess): + # Setup the listening socket before we open ddtrace + port, sock = utils.crashtracker_receiver_bind() + assert sock + + # Call the program + env = os.environ.copy() + env["DD_TRACE_AGENT_URL"] = "http://localhost:%d" % port + stdout, stderr, exitcode, _ = ddtrace_run_python_code_in_subprocess(preload_code, env=env) + + # Check for expected exit condition + assert not stdout + assert not stderr + assert exitcode == -11 # exit code for SIGSEGV + + # Wait for the connection + conn = utils.listen_get_conn(sock) + assert conn + data = utils.conn_to_bytes(conn) + assert data + + +@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="Linux only") +def test_crashtracker_preload_disabled(ddtrace_run_python_code_in_subprocess): + # Setup the listening socket before we open ddtrace + port, sock = utils.crashtracker_receiver_bind() + assert sock + + # Call the program + env = os.environ.copy() + env["DD_TRACE_AGENT_URL"] = "http://localhost:%d" % port + env["DD_CRASHTRACKER_ENABLED"] = "false" + stdout, stderr, exitcode, _ = ddtrace_run_python_code_in_subprocess(preload_code, env=env) + + # Check for expected exit condition + assert not stdout + assert not stderr + assert exitcode == -11 + + # Wait for the connection, which should fail + conn = utils.listen_get_conn(sock) + assert not conn + + +auto_code = """ +import ctypes +import ddtrace.auto +ctypes.string_at(0) +exit(-1) +""" + + +@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="Linux only") +def test_crashtracker_auto_default(run_python_code_in_subprocess): + # Setup the listening socket before we open ddtrace + port, sock = utils.crashtracker_receiver_bind() + assert sock + + # Call the program + env = os.environ.copy() + env["DD_TRACE_AGENT_URL"] = "http://localhost:%d" % port + stdout, stderr, exitcode, _ = run_python_code_in_subprocess(auto_code, env=env) + + # Check for expected exit condition + assert not stdout + assert not stderr + assert exitcode == -11 + + # Wait for the connection + conn = utils.listen_get_conn(sock) + assert conn + data = utils.conn_to_bytes(conn) + assert data + + +@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="Linux only") +def test_crashtracker_auto_disabled(run_python_code_in_subprocess): + # Setup the listening socket before we open ddtrace + port, sock = utils.crashtracker_receiver_bind() + assert sock + + # Call the program + env = os.environ.copy() + env["DD_TRACE_AGENT_URL"] = "http://localhost:%d" % port + env["DD_CRASHTRACKER_ENABLED"] = "false" + stdout, stderr, exitcode, _ = run_python_code_in_subprocess(auto_code, env=env) + + # Check for expected exit condition + assert not stdout + assert not stderr + assert exitcode == -11 + + # Wait for the connection, which should fail + conn = utils.listen_get_conn(sock) + assert not conn diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py index 89b08203357..beca2e7befb 100644 --- a/tests/telemetry/test_telemetry.py +++ b/tests/telemetry/test_telemetry.py @@ -322,8 +322,6 @@ def test_handled_integration_error(test_agent_session, run_python_code_in_subpro def test_unhandled_integration_error(test_agent_session, ddtrace_run_python_code_in_subprocess): - env = os.environ.copy() - env["DD_PATCH_MODULES"] = "jinja2:False,subprocess:False" code = """ import logging logging.basicConfig() @@ -335,7 +333,7 @@ def test_unhandled_integration_error(test_agent_session, ddtrace_run_python_code f.wsgi_app() """ - _, stderr, status, _ = ddtrace_run_python_code_in_subprocess(code, env=env) + _, stderr, status, _ = ddtrace_run_python_code_in_subprocess(code) assert status == 1, stderr @@ -356,11 +354,13 @@ def test_unhandled_integration_error(test_agent_session, ddtrace_run_python_code integration_events = [event for event in events if event["request_type"] == "app-integrations-change"] integrations = integration_events[0]["payload"]["integrations"] - assert len(integrations) == 1 - assert integrations[0]["enabled"] is True - assert integrations[0]["compatible"] is False - assert "ddtrace/contrib/flask/patch.py:" in integrations[0]["error"] - assert "not enough values to unpack (expected 2, got 0)" in integrations[0]["error"] + + (flask_integration,) = [integration for integration in integrations if integration["name"] == "flask"] + + assert flask_integration["enabled"] is True + assert flask_integration["compatible"] is False + assert "ddtrace/contrib/flask/patch.py:" in flask_integration["error"] + assert "not enough values to unpack (expected 2, got 0)" in flask_integration["error"] metric_events = [event for event in events if event["request_type"] == "generate-metrics"] diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py index b4ff3b2f8f2..8e20eea845e 100644 --- a/tests/telemetry/test_writer.py +++ b/tests/telemetry/test_writer.py @@ -1,4 +1,5 @@ import os +import sys import sysconfig import time from typing import Any # noqa:F401 @@ -128,6 +129,14 @@ def test_app_started_event(telemetry_writer, test_agent_session, mock_time): {"name": "profiling_enabled", "origin": "default", "value": "false"}, {"name": "data_streams_enabled", "origin": "default", "value": "false"}, {"name": "appsec_enabled", "origin": "default", "value": "false"}, + {"name": "crashtracking_alt_stack", "origin": "unknown", "value": False}, + {"name": "crashtracking_available", "origin": "unknown", "value": sys.platform == "linux"}, + {"name": "crashtracking_debug_url", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_enabled", "origin": "unknown", "value": sys.platform == "linux"}, + {"name": "crashtracking_stacktrace_resolver", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_started", "origin": "unknown", "value": False}, + {"name": "crashtracking_stderr_filename", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_stdout_filename", "origin": "unknown", "value": "None"}, { "name": "python_build_gnu_type", "origin": "unknown", @@ -310,6 +319,14 @@ def test_app_started_event_configuration_override( {"name": "profiling_enabled", "origin": "env_var", "value": "true"}, {"name": "data_streams_enabled", "origin": "env_var", "value": "true"}, {"name": "appsec_enabled", "origin": "env_var", "value": "true"}, + {"name": "crashtracking_alt_stack", "origin": "unknown", "value": False}, + {"name": "crashtracking_available", "origin": "unknown", "value": sys.platform == "linux"}, + {"name": "crashtracking_debug_url", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_enabled", "origin": "unknown", "value": sys.platform == "linux"}, + {"name": "crashtracking_stacktrace_resolver", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_started", "origin": "unknown", "value": sys.platform == "linux"}, + {"name": "crashtracking_stderr_filename", "origin": "unknown", "value": "None"}, + {"name": "crashtracking_stdout_filename", "origin": "unknown", "value": "None"}, {"name": "python_build_gnu_type", "origin": "unknown", "value": sysconfig.get_config_var("BUILD_GNU_TYPE")}, {"name": "python_host_gnu_type", "origin": "unknown", "value": sysconfig.get_config_var("HOST_GNU_TYPE")}, {"name": "python_soabi", "origin": "unknown", "value": sysconfig.get_config_var("SOABI")},