From 64c9b3562171c6cfe95f99027e391756ec125b88 Mon Sep 17 00:00:00 2001 From: Romain Komorn <136473744+romainkomorndatadog@users.noreply.github.com> Date: Sat, 20 Jul 2024 13:21:59 +0100 Subject: [PATCH] chore(telemetry): use enum for telemetry log levels (#9520) Makes type-checking more accurate the telemetry writer's `add_log()` method by specifying an enum. For example, improperly specifying `TELEMETRY_LOG_LEVEL.POTATO` instead of `.ERROR` would result in: ``` ddtrace/appsec/_iast/_metrics.py:77: error: "Type[TELEMETRY_LOG_LEVEL]" has no attribute "POTATO" [attr-defined] ``` This was prompted by #9349 having failures in `system-tests` due to the use of `WARNING` instead of `WARN`. The telemetry API docs currently specify: Log level|Remote collection policy ---|--- ERROR|Enabled by default, configured by environment variable `DD_TELEMETRY_LOG_COLLECTION_ENABLED` WARN|Enabled by default, configured by same environment variable as ERROR DEBUG|Can be enabled locally by the end user when asked by the support team. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/_trace/processor/__init__.py | 5 ++-- ddtrace/appsec/_iast/_metrics.py | 3 ++- ddtrace/appsec/_metrics.py | 3 ++- ddtrace/internal/telemetry/constants.py | 10 ++++++++ ddtrace/internal/telemetry/writer.py | 5 ++-- tests/telemetry/test_telemetry_metrics.py | 31 ++++++++++++----------- 6 files changed, 36 insertions(+), 21 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index 01d066f58c4..609a965aa16 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -24,6 +24,7 @@ from ddtrace.internal.sampling import SpanSamplingRule from ddtrace.internal.sampling import is_single_span_sampled from ddtrace.internal.service import ServiceStatusError +from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_TRACER from ddtrace.internal.writer import TraceWriter from ddtrace.sampler import BaseSampler @@ -292,7 +293,7 @@ def on_span_finish(self, span: Span) -> None: # e.g. `tracer.configure()` is called after starting a span if span.trace_id not in self._traces: log_msg = "finished span not connected to a trace" - telemetry.telemetry_writer.add_log("ERROR", log_msg) + telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, log_msg) log.debug("%s: %s", log_msg, span) return @@ -316,7 +317,7 @@ def on_span_finish(self, span: Span) -> None: trace.num_finished -= num_finished if trace.num_finished != 0: log_msg = "unexpected finished span count" - telemetry.telemetry_writer.add_log("ERROR", log_msg) + telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, log_msg) log.debug("%s (%s) for span %s", log_msg, num_finished, span) trace.num_finished = 0 diff --git a/ddtrace/appsec/_iast/_metrics.py b/ddtrace/appsec/_iast/_metrics.py index 521b5e5d79d..03a24466ec2 100644 --- a/ddtrace/appsec/_iast/_metrics.py +++ b/ddtrace/appsec/_iast/_metrics.py @@ -9,6 +9,7 @@ from ddtrace.appsec._deduplications import deduplication from ddtrace.internal import telemetry from ddtrace.internal.logger import get_logger +from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_IAST @@ -73,7 +74,7 @@ def _set_iast_error_metric(msg: Text) -> None: tags = { "lib_language": "python", } - telemetry.telemetry_writer.add_log("ERROR", msg, stack_trace=stack_trace, tags=tags) + telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, msg, stack_trace=stack_trace, tags=tags) except Exception: log.warning("Error reporting ASM WAF logs metrics", exc_info=True) diff --git a/ddtrace/appsec/_metrics.py b/ddtrace/appsec/_metrics.py index a501f3c3259..776c25f1735 100644 --- a/ddtrace/appsec/_metrics.py +++ b/ddtrace/appsec/_metrics.py @@ -4,6 +4,7 @@ from ddtrace.appsec._deduplications import deduplication from ddtrace.internal import telemetry from ddtrace.internal.logger import get_logger +from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_APPSEC @@ -21,7 +22,7 @@ def _set_waf_error_metric(msg: str, stack_trace: str, info: DDWaf_info) -> None: } if info and info.version: tags["event_rules_version"] = info.version - telemetry.telemetry_writer.add_log("ERROR", msg, stack_trace=stack_trace, tags=tags) + telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, msg, stack_trace=stack_trace, tags=tags) except Exception: log.warning("Error reporting ASM WAF logs metrics", exc_info=True) diff --git a/ddtrace/internal/telemetry/constants.py b/ddtrace/internal/telemetry/constants.py index e649acd32c5..bf79727b270 100644 --- a/ddtrace/internal/telemetry/constants.py +++ b/ddtrace/internal/telemetry/constants.py @@ -1,3 +1,6 @@ +from enum import Enum + + TELEMETRY_NAMESPACE_TAG_TRACER = "tracers" TELEMETRY_NAMESPACE_TAG_APPSEC = "appsec" TELEMETRY_NAMESPACE_TAG_IAST = "iast" @@ -6,6 +9,13 @@ TELEMETRY_TYPE_DISTRIBUTION = "distributions" TELEMETRY_TYPE_LOGS = "logs" + +class TELEMETRY_LOG_LEVEL(Enum): + DEBUG = "DEBUG" + WARNING = "WARN" + ERROR = "ERROR" + + # Configuration names must map to values supported by backend services: # https://github.com/DataDog/dd-go/blob/f88e85d64b173e7733ac03e23576d1c9be37f32e/trace/apps/tracer-telemetry-intake/telemetry-payload/static/config_norm_rules.json TELEMETRY_DYNAMIC_INSTRUMENTATION_ENABLED = "DD_DYNAMIC_INSTRUMENTATION_ENABLED" diff --git a/ddtrace/internal/telemetry/writer.py b/ddtrace/internal/telemetry/writer.py index 9ed3b383d58..1d9f3112d47 100644 --- a/ddtrace/internal/telemetry/writer.py +++ b/ddtrace/internal/telemetry/writer.py @@ -65,6 +65,7 @@ from .constants import TELEMETRY_INJECT_WAS_ATTEMPTED from .constants import TELEMETRY_LIB_INJECTION_FORCED from .constants import TELEMETRY_LIB_WAS_INJECTED +from .constants import TELEMETRY_LOG_LEVEL # noqa:F401 from .constants import TELEMETRY_OBFUSCATION_QUERY_STRING_PATTERN from .constants import TELEMETRY_OTEL_ENABLED from .constants import TELEMETRY_PARTIAL_FLUSH_ENABLED @@ -652,7 +653,7 @@ def add_configurations(self, configuration_list): } def add_log(self, level, message, stack_trace="", tags=None): - # type: (str, str, str, Optional[Dict]) -> None + # type: (TELEMETRY_LOG_LEVEL, str, str, Optional[Dict]) -> None """ Queues log. This event is meant to send library logs to Datadog’s backend through the Telemetry intake. This will make support cycles easier and ensure we know about potentially silent issues in libraries. @@ -664,7 +665,7 @@ def add_log(self, level, message, stack_trace="", tags=None): data = LogData( { "message": message, - "level": level, + "level": level.value, "tracer_time": int(time.time()), } ) diff --git a/tests/telemetry/test_telemetry_metrics.py b/tests/telemetry/test_telemetry_metrics.py index a9f46257abe..a3ea6051b8b 100644 --- a/tests/telemetry/test_telemetry_metrics.py +++ b/tests/telemetry/test_telemetry_metrics.py @@ -2,6 +2,7 @@ from mock.mock import ANY +from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_APPSEC from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_TRACER from ddtrace.internal.telemetry.constants import TELEMETRY_TYPE_DISTRIBUTION @@ -335,10 +336,10 @@ def test_send_metric_flush_and_distributions_series_is_restarted(telemetry_write def test_send_log_metric_simple(telemetry_writer, test_agent_session, mock_time): """Check the queue of metrics is empty after run periodic method of PeriodicService""" with override_global_config(dict(_telemetry_dependency_collection=False)): - telemetry_writer.add_log("WARNING", "test error 1") + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1") expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "tracer_time": 1642544540, }, @@ -350,10 +351,10 @@ def test_send_log_metric_simple(telemetry_writer, test_agent_session, mock_time) def test_send_log_metric_simple_tags(telemetry_writer, test_agent_session, mock_time): """Check the queue of metrics is empty after run periodic method of PeriodicService""" with override_global_config(dict(_telemetry_dependency_collection=False)): - telemetry_writer.add_log("WARNING", "test error 1", tags={"a": "b", "c": "d"}) + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", tags={"a": "b", "c": "d"}) expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "tracer_time": 1642544540, "tags": "a:b,c:d", @@ -366,10 +367,10 @@ def test_send_log_metric_simple_tags(telemetry_writer, test_agent_session, mock_ def test_send_multiple_log_metric(telemetry_writer, test_agent_session, mock_time): """Check the queue of metrics is empty after run periodic method of PeriodicService""" with override_global_config(dict(_telemetry_dependency_collection=False)): - telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"}) + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"}) expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "stack_trace": "Traceback:\nValueError", "tracer_time": 1642544540, @@ -379,7 +380,7 @@ def test_send_multiple_log_metric(telemetry_writer, test_agent_session, mock_tim _assert_logs(test_agent_session, expected_payload) - telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"}) + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"}) _assert_logs(test_agent_session, expected_payload) @@ -387,11 +388,11 @@ def test_send_multiple_log_metric(telemetry_writer, test_agent_session, mock_tim def test_send_multiple_log_metric_no_duplicates(telemetry_writer, test_agent_session, mock_time): with override_global_config(dict(_telemetry_dependency_collection=False)): for _ in range(10): - telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"}) + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"}) expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "stack_trace": "Traceback:\nValueError", "tracer_time": 1642544540, @@ -405,11 +406,11 @@ def test_send_multiple_log_metric_no_duplicates(telemetry_writer, test_agent_ses def test_send_multiple_log_metric_no_duplicates_for_each_interval(telemetry_writer, test_agent_session, mock_time): with override_global_config(dict(_telemetry_dependency_collection=False)): for _ in range(10): - telemetry_writer.add_log("WARNING", "test error 1") + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1") expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "tracer_time": 1642544540, }, @@ -418,7 +419,7 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval(telemetry_writ _assert_logs(test_agent_session, expected_payload) for _ in range(10): - telemetry_writer.add_log("WARNING", "test error 1") + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1") _assert_logs(test_agent_session, expected_payload) @@ -427,11 +428,11 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval_check_time(tel with override_global_config(dict(_telemetry_dependency_collection=False)): for _ in range(3): sleep(0.1) - telemetry_writer.add_log("WARNING", "test error 1") + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1") expected_payload = [ { - "level": "WARNING", + "level": "WARN", "message": "test error 1", "tracer_time": ANY, }, @@ -441,6 +442,6 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval_check_time(tel for _ in range(3): sleep(0.1) - telemetry_writer.add_log("WARNING", "test error 1") + telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1") _assert_logs(test_agent_session, expected_payload)