chore(ci_visibility): introduce experimental internal coverage collec…

…tor (#8727) Introduces the `ModuleCodeCollector` which collects coverage and executable lines for imported modules. The collector has two modes, one that stores executed lines on the singleton instance, and one that uses context variables and a context manager. This also introduces changes to the `pytest` integration as well as the `CIVisibility` service's use of coverage to feature-flag using the new module collector. The features are gated behind `_DD_USE_INTERNAL_COVERAGE` and `_DD_COVER_SESSION` (which introduces a new `coverage run` like behavior). There are no unit tests though the overall use of the feature flags has been tested quite extensively in the process of collecting performance data. There are no release notes since this is an entirely undocumented feature for the moment. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Gabriele N. Tornetta <[email protected]> Co-authored-by: Gabriele N. Tornetta <[email protected]>
DataDog · May 22, 2024 · 66b96e9 · 66b96e9
1 parent f53484d
commit 66b96e9
Show file tree

Hide file tree

Showing 14 changed files with 716 additions and 4 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -40,6 +40,7 @@ ddtrace/ext/ci_visibility           @DataDog/ci-app-libraries
 ddtrace/ext/test.py                 @DataDog/ci-app-libraries
 ddtrace/internal/ci_visibility      @DataDog/ci-app-libraries
 ddtrace/internal/codeowners.py      @DataDog/apm-core-python @datadog/ci-app-libraries
+ddtrace/internal/coverage           @DataDog/apm-core-python @datadog/ci-app-libraries @Datadog/debugger-python
 tests/internal/test_codeowners.py   @datadog/ci-app-libraries
 tests/ci_visibility                 @DataDog/ci-app-libraries
 tests/tracer/test_ci.py             @DataDog/ci-app-libraries

diff --git a/ddtrace/contrib/pytest/_plugin_v1.py b/ddtrace/contrib/pytest/_plugin_v1.py
@@ -13,6 +13,7 @@
 """
 from doctest import DocTest
 import json
+import os
 import re
 from typing import Dict  # noqa:F401
 
@@ -46,6 +47,7 @@
 from ddtrace.internal.ci_visibility.constants import SUITE_ID as _SUITE_ID
 from ddtrace.internal.ci_visibility.constants import SUITE_TYPE as _SUITE_TYPE
 from ddtrace.internal.ci_visibility.constants import TEST
+from ddtrace.internal.ci_visibility.coverage import USE_DD_COVERAGE
 from ddtrace.internal.ci_visibility.coverage import _module_has_dd_coverage_enabled
 from ddtrace.internal.ci_visibility.coverage import _report_coverage_to_span
 from ddtrace.internal.ci_visibility.coverage import _start_coverage
@@ -59,13 +61,19 @@
 from ddtrace.internal.ci_visibility.utils import get_relative_or_absolute_path_for_path
 from ddtrace.internal.ci_visibility.utils import take_over_logger_stream_handler
 from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal.coverage.code import ModuleCodeCollector
 from ddtrace.internal.logger import get_logger
+from ddtrace.internal.utils.formats import asbool
 
 
 log = get_logger(__name__)
 
 _global_skipped_elements = 0
 
+# COVER_SESSION is an experimental feature flag that provides full coverage (similar to coverage run), and is an
+# experimental feature. It currently significantly increases test import time and should not be used.
+COVER_SESSION = asbool(os.environ.get("_DD_COVER_SESSION", "false"))
+
 
 def _is_pytest_8_or_later():
     if hasattr(pytest, "version_tuple"):
@@ -859,3 +867,14 @@ def pytest_ddtrace_get_item_test_name(item):
             if item.config.getoption("ddtrace-include-class-name") or item.config.getini("ddtrace-include-class-name"):
                 return "%s.%s" % (item.cls.__name__, item.name)
         return item.name
+
+    @staticmethod
+    @pytest.hookimpl(trylast=True)
+    def pytest_terminal_summary(terminalreporter, exitstatus, config):
+        # Reports coverage if experimental session-level coverage is enabled.
+        if USE_DD_COVERAGE and COVER_SESSION:
+            ModuleCodeCollector.report()
+            try:
+                ModuleCodeCollector.write_json_report_to_file("dd_coverage.json")
+            except Exception:
+                log.debug("Failed to write coverage report to file", exc_info=True)
diff --git a/ddtrace/contrib/pytest/plugin.py b/ddtrace/contrib/pytest/plugin.py
@@ -11,6 +11,7 @@
         expected failures.
 
 """
+import os
 from typing import Dict  # noqa:F401
 
 import pytest
@@ -22,6 +23,21 @@
 PATCH_ALL_HELP_MSG = "Call ddtrace.patch_all before running tests."
 
 
+def _is_enabled_early(early_config):
+    """Hackily checks if the ddtrace plugin is enabled before the config is fully populated.
+
+    This is necessary because the module watchdog for coverage collectio needs to be enabled as early as possible.
+    """
+    if (
+        "--no-ddtrace" in early_config.invocation_params.args
+        or early_config.getini("ddtrace") is False
+        or early_config.getini("no-ddtrace")
+    ):
+        return False
+
+    return "--ddtrace" in early_config.invocation_params.args or early_config.getini("ddtrace")
+
+
 def is_enabled(config):
     """Check if the ddtrace plugin is enabled."""
     return (config.getoption("ddtrace") or config.getini("ddtrace")) and not config.getoption("no-ddtrace")
@@ -69,6 +85,31 @@ def pytest_addoption(parser):
     parser.addini("ddtrace-include-class-name", DDTRACE_INCLUDE_CLASS_HELP_MSG, type="bool")
 
 
+def pytest_load_initial_conftests(early_config, parser, args):
+    if _is_enabled_early(early_config):
+        # Enables experimental use of ModuleCodeCollector for coverage collection.
+        from ddtrace.internal.ci_visibility.coverage import USE_DD_COVERAGE
+        from ddtrace.internal.logger import get_logger
+        from ddtrace.internal.utils.formats import asbool
+
+        log = get_logger(__name__)
+
+        COVER_SESSION = asbool(os.environ.get("_DD_COVER_SESSION", "false"))
+
+        if USE_DD_COVERAGE:
+            from ddtrace.internal.coverage.code import ModuleCodeCollector
+
+            if not ModuleCodeCollector.is_installed():
+                ModuleCodeCollector.install()
+            if COVER_SESSION:
+                ModuleCodeCollector.start_coverage()
+        else:
+            if COVER_SESSION:
+                log.warning(
+                    "_DD_COVER_SESSION must be used with _DD_USE_INTERNAL_COVERAGE but not DD_CIVISIBILITY_ITR_ENABLED"
+                )
+
+
 def pytest_configure(config):
     config.addinivalue_line("markers", "dd_tags(**kwargs): add tags to current span")
     if is_enabled(config):

diff --git a/ddtrace/internal/ci_visibility/coverage.py b/ddtrace/internal/ci_visibility/coverage.py
@@ -1,10 +1,12 @@
 from itertools import groupby
 import json
+import os
 from typing import Dict  # noqa:F401
 from typing import Iterable  # noqa:F401
 from typing import List  # noqa:F401
 from typing import Optional  # noqa:F401
 from typing import Tuple  # noqa:F401
+from typing import Union  # noqa:F401
 
 import ddtrace
 from ddtrace.internal.ci_visibility.constants import COVERAGE_TAG_NAME
@@ -16,12 +18,18 @@
 from ddtrace.internal.ci_visibility.telemetry.coverage import record_code_coverage_finished
 from ddtrace.internal.ci_visibility.telemetry.coverage import record_code_coverage_started
 from ddtrace.internal.ci_visibility.utils import get_relative_or_absolute_path_for_path
+from ddtrace.internal.coverage.code import ModuleCodeCollector
 from ddtrace.internal.logger import get_logger
+from ddtrace.internal.utils.formats import asbool
 
 
 log = get_logger(__name__)
 _global_relative_file_paths_for_cov: Dict[str, Dict[str, str]] = {}
 
+# This feature-flags experimental collection of code coverage via our internal ModuleCodeCollector.
+# It is disabled by default because it is not production-ready.
+USE_DD_COVERAGE = asbool(os.environ.get("_DD_USE_INTERNAL_COVERAGE", "false"))
+
 try:
     from coverage import Coverage
     from coverage import version_info as coverage_version
@@ -52,19 +60,30 @@ def _initialize_coverage(root_dir):
 
 
 def _start_coverage(root_dir: str):
+    # Experimental feature to use internal coverage collection
+    if USE_DD_COVERAGE:
+        ctx = ModuleCodeCollector.CollectInContext()
+        return ctx
     coverage = _initialize_coverage(root_dir)
     coverage.start()
     return coverage
 
 
 def _stop_coverage(module):
+    # Experimental feature to use internal coverage collection
+    if USE_DD_COVERAGE:
+        module._dd_coverage.__exit__()
+        return
     if _module_has_dd_coverage_enabled(module):
         module._dd_coverage.stop()
         module._dd_coverage.erase()
         del module._dd_coverage
 
 
 def _module_has_dd_coverage_enabled(module, silent_mode: bool = False) -> bool:
+    # Experimental feature to use internal coverage collection
+    if USE_DD_COVERAGE:
+        return hasattr(module, "_dd_coverage")
     if not hasattr(module, "_dd_coverage"):
         if not silent_mode:
             log.warning("Datadog Coverage has not been initiated")
@@ -84,6 +103,13 @@ def _switch_coverage_context(
     coverage_data: Coverage, unique_test_name: str, framework: Optional[TEST_FRAMEWORKS] = None
 ):
     record_code_coverage_started(COVERAGE_LIBRARY.COVERAGEPY, framework)
+    # Experimental feature to use internal coverage collection
+    if isinstance(coverage_data, ModuleCodeCollector.CollectInContext):
+        if USE_DD_COVERAGE:
+            # In this case, coverage_data is the context manager supplied by ModuleCodeCollector.CollectInContext
+            coverage_data.__enter__()
+        return
+
     if not _coverage_has_valid_data(coverage_data, silent_mode=True):
         return
     coverage_data._collector.data.clear()  # type: ignore[union-attr]
@@ -97,6 +123,22 @@ def _switch_coverage_context(
 def _report_coverage_to_span(
     coverage_data: Coverage, span: ddtrace.Span, root_dir: str, framework: Optional[TEST_FRAMEWORKS] = None
 ):
+    # Experimental feature to use internal coverage collection
+    if isinstance(coverage_data, ModuleCodeCollector.CollectInContext):
+        if USE_DD_COVERAGE:
+            # In this case, coverage_data is the context manager supplied by ModuleCodeCollector.CollectInContext
+            files = ModuleCodeCollector.report_seen_lines()
+            if not files:
+                return
+            span.set_tag_str(
+                COVERAGE_TAG_NAME,
+                json.dumps({"files": files}),
+            )
+            record_code_coverage_finished(COVERAGE_LIBRARY.COVERAGEPY, framework)
+            coverage_data.__exit__(None, None, None)
+
+        return
+
     span_id = str(span.trace_id)
     if not _coverage_has_valid_data(coverage_data):
         record_code_coverage_error()

diff --git a/ddtrace/internal/coverage/__init__.py b/ddtrace/internal/coverage/__init__.py
diff --git a/ddtrace/internal/coverage/_native.c b/ddtrace/internal/coverage/_native.c
@@ -0,0 +1,61 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#if PY_VERSION_HEX < 0x030c0000
+#if defined __GNUC__ && defined HAVE_STD_ATOMIC
+#undef HAVE_STD_ATOMIC
+#endif
+#endif
+
+// ----------------------------------------------------------------------------
+static PyObject*
+replace_in_tuple(PyObject* m, PyObject* args)
+{
+    PyObject* tuple = NULL;
+    PyObject* item = NULL;
+    PyObject* replacement = NULL;
+
+    if (!PyArg_ParseTuple(args, "O!OO", &PyTuple_Type, &tuple, &item, &replacement))
+        return NULL;
+
+    for (Py_ssize_t i = 0; i < PyTuple_Size(tuple); i++) {
+        PyObject* current = PyTuple_GetItem(tuple, i);
+        if (current == item) {
+            Py_DECREF(current);
+            // !!! DANGER !!!
+            PyTuple_SET_ITEM(tuple, i, replacement);
+            Py_INCREF(replacement);
+        }
+    }
+
+    Py_RETURN_NONE;
+}
+
+// ----------------------------------------------------------------------------
+static PyMethodDef native_methods[] = {
+    { "replace_in_tuple", replace_in_tuple, METH_VARARGS, "Replace an item in a tuple." },
+    { NULL, NULL, 0, NULL } /* Sentinel */
+};
+
+// ----------------------------------------------------------------------------
+static struct PyModuleDef nativemodule = {
+    PyModuleDef_HEAD_INIT,
+    "_native", /* name of module */
+    NULL,      /* module documentation, may be NULL */
+    -1,        /* size of per-interpreter state of the module,
+                  or -1 if the module keeps state in global variables. */
+    native_methods,
+};
+
+// ----------------------------------------------------------------------------
+PyMODINIT_FUNC
+PyInit__native(void)
+{
+    PyObject* m;
+
+    m = PyModule_Create(&nativemodule);
+    if (m == NULL)
+        return NULL;
+
+    return m;
+}
diff --git a/ddtrace/internal/coverage/_native.pyi b/ddtrace/internal/coverage/_native.pyi
@@ -0,0 +1,3 @@
+import typing as t
+
+def replace_in_tuple(tup: tuple, item: t.Any, replacement: t.Any) -> None: ...