Skip to content

Commit

Permalink
chore(telemetry): use enum for telemetry log levels (#9520)
Browse files Browse the repository at this point in the history
Makes type-checking more accurate the telemetry writer's `add_log()`
method by specifying an enum.

For example, improperly specifying `TELEMETRY_LOG_LEVEL.POTATO` instead
of `.ERROR` would result in:
```
ddtrace/appsec/_iast/_metrics.py:77: error: "Type[TELEMETRY_LOG_LEVEL]" has no attribute "POTATO"  [attr-defined]
```

This was prompted by #9349 having failures in `system-tests` due to the
use of `WARNING` instead of `WARN`.

The telemetry API docs currently specify:

Log level|Remote collection policy
---|--- 
ERROR|Enabled by default, configured by environment variable
`DD_TELEMETRY_LOG_COLLECTION_ENABLED`
WARN|Enabled by default, configured by same environment variable as
ERROR
DEBUG|Can be enabled locally by the end user when asked by the support
team.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
romainkomorndatadog authored Jul 20, 2024
1 parent 8ff6e59 commit 64c9b35
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 21 deletions.
5 changes: 3 additions & 2 deletions ddtrace/_trace/processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ddtrace.internal.sampling import SpanSamplingRule
from ddtrace.internal.sampling import is_single_span_sampled
from ddtrace.internal.service import ServiceStatusError
from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_TRACER
from ddtrace.internal.writer import TraceWriter
from ddtrace.sampler import BaseSampler
Expand Down Expand Up @@ -292,7 +293,7 @@ def on_span_finish(self, span: Span) -> None:
# e.g. `tracer.configure()` is called after starting a span
if span.trace_id not in self._traces:
log_msg = "finished span not connected to a trace"
telemetry.telemetry_writer.add_log("ERROR", log_msg)
telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, log_msg)
log.debug("%s: %s", log_msg, span)
return

Expand All @@ -316,7 +317,7 @@ def on_span_finish(self, span: Span) -> None:
trace.num_finished -= num_finished
if trace.num_finished != 0:
log_msg = "unexpected finished span count"
telemetry.telemetry_writer.add_log("ERROR", log_msg)
telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, log_msg)
log.debug("%s (%s) for span %s", log_msg, num_finished, span)
trace.num_finished = 0

Expand Down
3 changes: 2 additions & 1 deletion ddtrace/appsec/_iast/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ddtrace.appsec._deduplications import deduplication
from ddtrace.internal import telemetry
from ddtrace.internal.logger import get_logger
from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_IAST


Expand Down Expand Up @@ -73,7 +74,7 @@ def _set_iast_error_metric(msg: Text) -> None:
tags = {
"lib_language": "python",
}
telemetry.telemetry_writer.add_log("ERROR", msg, stack_trace=stack_trace, tags=tags)
telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, msg, stack_trace=stack_trace, tags=tags)
except Exception:
log.warning("Error reporting ASM WAF logs metrics", exc_info=True)

Expand Down
3 changes: 2 additions & 1 deletion ddtrace/appsec/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ddtrace.appsec._deduplications import deduplication
from ddtrace.internal import telemetry
from ddtrace.internal.logger import get_logger
from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_APPSEC


Expand All @@ -21,7 +22,7 @@ def _set_waf_error_metric(msg: str, stack_trace: str, info: DDWaf_info) -> None:
}
if info and info.version:
tags["event_rules_version"] = info.version
telemetry.telemetry_writer.add_log("ERROR", msg, stack_trace=stack_trace, tags=tags)
telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, msg, stack_trace=stack_trace, tags=tags)
except Exception:
log.warning("Error reporting ASM WAF logs metrics", exc_info=True)

Expand Down
10 changes: 10 additions & 0 deletions ddtrace/internal/telemetry/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from enum import Enum


TELEMETRY_NAMESPACE_TAG_TRACER = "tracers"
TELEMETRY_NAMESPACE_TAG_APPSEC = "appsec"
TELEMETRY_NAMESPACE_TAG_IAST = "iast"
Expand All @@ -6,6 +9,13 @@
TELEMETRY_TYPE_DISTRIBUTION = "distributions"
TELEMETRY_TYPE_LOGS = "logs"


class TELEMETRY_LOG_LEVEL(Enum):
DEBUG = "DEBUG"
WARNING = "WARN"
ERROR = "ERROR"


# Configuration names must map to values supported by backend services:
# https://github.com/DataDog/dd-go/blob/f88e85d64b173e7733ac03e23576d1c9be37f32e/trace/apps/tracer-telemetry-intake/telemetry-payload/static/config_norm_rules.json
TELEMETRY_DYNAMIC_INSTRUMENTATION_ENABLED = "DD_DYNAMIC_INSTRUMENTATION_ENABLED"
Expand Down
5 changes: 3 additions & 2 deletions ddtrace/internal/telemetry/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from .constants import TELEMETRY_INJECT_WAS_ATTEMPTED
from .constants import TELEMETRY_LIB_INJECTION_FORCED
from .constants import TELEMETRY_LIB_WAS_INJECTED
from .constants import TELEMETRY_LOG_LEVEL # noqa:F401
from .constants import TELEMETRY_OBFUSCATION_QUERY_STRING_PATTERN
from .constants import TELEMETRY_OTEL_ENABLED
from .constants import TELEMETRY_PARTIAL_FLUSH_ENABLED
Expand Down Expand Up @@ -652,7 +653,7 @@ def add_configurations(self, configuration_list):
}

def add_log(self, level, message, stack_trace="", tags=None):
# type: (str, str, str, Optional[Dict]) -> None
# type: (TELEMETRY_LOG_LEVEL, str, str, Optional[Dict]) -> None
"""
Queues log. This event is meant to send library logs to Datadog’s backend through the Telemetry intake.
This will make support cycles easier and ensure we know about potentially silent issues in libraries.
Expand All @@ -664,7 +665,7 @@ def add_log(self, level, message, stack_trace="", tags=None):
data = LogData(
{
"message": message,
"level": level,
"level": level.value,
"tracer_time": int(time.time()),
}
)
Expand Down
31 changes: 16 additions & 15 deletions tests/telemetry/test_telemetry_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from mock.mock import ANY

from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_APPSEC
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_TRACER
from ddtrace.internal.telemetry.constants import TELEMETRY_TYPE_DISTRIBUTION
Expand Down Expand Up @@ -335,10 +336,10 @@ def test_send_metric_flush_and_distributions_series_is_restarted(telemetry_write
def test_send_log_metric_simple(telemetry_writer, test_agent_session, mock_time):
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
with override_global_config(dict(_telemetry_dependency_collection=False)):
telemetry_writer.add_log("WARNING", "test error 1")
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1")
expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"tracer_time": 1642544540,
},
Expand All @@ -350,10 +351,10 @@ def test_send_log_metric_simple(telemetry_writer, test_agent_session, mock_time)
def test_send_log_metric_simple_tags(telemetry_writer, test_agent_session, mock_time):
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
with override_global_config(dict(_telemetry_dependency_collection=False)):
telemetry_writer.add_log("WARNING", "test error 1", tags={"a": "b", "c": "d"})
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", tags={"a": "b", "c": "d"})
expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"tracer_time": 1642544540,
"tags": "a:b,c:d",
Expand All @@ -366,10 +367,10 @@ def test_send_log_metric_simple_tags(telemetry_writer, test_agent_session, mock_
def test_send_multiple_log_metric(telemetry_writer, test_agent_session, mock_time):
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
with override_global_config(dict(_telemetry_dependency_collection=False)):
telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"})
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"})
expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"stack_trace": "Traceback:\nValueError",
"tracer_time": 1642544540,
Expand All @@ -379,19 +380,19 @@ def test_send_multiple_log_metric(telemetry_writer, test_agent_session, mock_tim

_assert_logs(test_agent_session, expected_payload)

telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"})
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"})

_assert_logs(test_agent_session, expected_payload)


def test_send_multiple_log_metric_no_duplicates(telemetry_writer, test_agent_session, mock_time):
with override_global_config(dict(_telemetry_dependency_collection=False)):
for _ in range(10):
telemetry_writer.add_log("WARNING", "test error 1", "Traceback:\nValueError", {"a": "b"})
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1", "Traceback:\nValueError", {"a": "b"})

expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"stack_trace": "Traceback:\nValueError",
"tracer_time": 1642544540,
Expand All @@ -405,11 +406,11 @@ def test_send_multiple_log_metric_no_duplicates(telemetry_writer, test_agent_ses
def test_send_multiple_log_metric_no_duplicates_for_each_interval(telemetry_writer, test_agent_session, mock_time):
with override_global_config(dict(_telemetry_dependency_collection=False)):
for _ in range(10):
telemetry_writer.add_log("WARNING", "test error 1")
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1")

expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"tracer_time": 1642544540,
},
Expand All @@ -418,7 +419,7 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval(telemetry_writ
_assert_logs(test_agent_session, expected_payload)

for _ in range(10):
telemetry_writer.add_log("WARNING", "test error 1")
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1")

_assert_logs(test_agent_session, expected_payload)

Expand All @@ -427,11 +428,11 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval_check_time(tel
with override_global_config(dict(_telemetry_dependency_collection=False)):
for _ in range(3):
sleep(0.1)
telemetry_writer.add_log("WARNING", "test error 1")
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1")

expected_payload = [
{
"level": "WARNING",
"level": "WARN",
"message": "test error 1",
"tracer_time": ANY,
},
Expand All @@ -441,6 +442,6 @@ def test_send_multiple_log_metric_no_duplicates_for_each_interval_check_time(tel

for _ in range(3):
sleep(0.1)
telemetry_writer.add_log("WARNING", "test error 1")
telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.WARNING, "test error 1")

_assert_logs(test_agent_session, expected_payload)

0 comments on commit 64c9b35

Please sign in to comment.