Skip to content

Commit

Permalink
Merge branch '2.13' into backport-10947-to-2.13
Browse files Browse the repository at this point in the history
  • Loading branch information
juanjux authored Oct 7, 2024
2 parents 7aa0019 + 6ba15c8 commit d516374
Show file tree
Hide file tree
Showing 72 changed files with 1,483 additions and 176 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/system-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,14 @@ jobs:
name: logs_parametric
path: artifact.tar.gz

finished:
runs-on: ubuntu-latest
needs: [parametric, system-tests]
if: success() || failure()
steps:
- name: True when everything else succeeded
if: needs.parametric.result == 'success' && needs.system-tests.result == 'success'
run: exit 0
- name: Fails if anything else failed
if: needs.parametric.result != 'success' || needs.system-tests.result != 'success'
run: exit 1
2 changes: 1 addition & 1 deletion .gitlab/package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ build_base_venvs:
paths:
- .riot/venv_*
- ddtrace/**/*.so*
- ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe
- ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe*

download_ddtrace_artifacts:
image: registry.ddbuild.io/github-cli:v27480869-eafb11d-2.43.0
Expand Down
10 changes: 10 additions & 0 deletions .gitlab/prepare-oci-package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ BUILD_DIR=sources

echo -n "$PYTHON_PACKAGE_VERSION" > sources/version

echo "Cleaning up binaries for ${ARCH}"
if [ "${ARCH}" == "arm64" ]; then
echo "Removing x86_64 binaries"
find ../pywheels-dep/ -type f -name '*x86_64*' -exec rm -f {} \;
elif [ "${ARCH}" == "amd64" ]; then
echo "Removing aarch64 binaries"
find ../pywheels-dep/ -type f -name '*aarch64*' -exec rm -f {} \;
else
echo "No ARCH set, not removing any binaries"
fi
cp -r ../pywheels-dep/site-packages* sources/ddtrace_pkgs

cp ../lib-injection/sitecustomize.py sources/
Expand Down
2 changes: 2 additions & 0 deletions .gitlab/tests/appsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ appsec iast:
variables:
SUITE_NAME: "appsec_iast$"
TEST_POSTGRES_HOST: "postgres"
retry: 2
timeout: 25m

appsec iast tdd_propagation:
extends: .test_base_riot_snapshot
Expand Down
5 changes: 2 additions & 3 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ build:
os: ubuntu-22.04
tools:
python: "3.10"
rust: "1.78"
commands:
- git fetch --unshallow || true
- curl -L https://github.com/pypa/hatch/releases/download/hatch-v1.12.0/hatch-x86_64-unknown-linux-gnu.tar.gz | tar zx
- install -t /usr/local/bin hatch
- install -t $HOME/.asdf/bin hatch
- hatch -q
- curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y
- echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> "$BASH_ENV"
- hatch -v run docs:sphinx-build -W -b html docs docs/_build/html
- mv docs/_build $READTHEDOCS_OUTPUT
21 changes: 19 additions & 2 deletions ddtrace/_trace/span.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class Span(object):
"duration_ns",
# Internal attributes
"_context",
"_local_root",
"_local_root_value",
"_parent",
"_ignored_exceptions",
"_on_finish_callbacks",
Expand Down Expand Up @@ -201,7 +201,7 @@ def __init__(
self._events: List[SpanEvent] = []
self._parent: Optional["Span"] = None
self._ignored_exceptions: Optional[List[Type[Exception]]] = None
self._local_root: Optional["Span"] = None
self._local_root_value: Optional["Span"] = None # None means this is the root span.
self._store: Optional[Dict[str, Any]] = None

def _ignore_exception(self, exc: Type[Exception]) -> None:
Expand Down Expand Up @@ -590,6 +590,23 @@ def context(self) -> Context:
self._context = Context(trace_id=self.trace_id, span_id=self.span_id, is_remote=False)
return self._context

@property
def _local_root(self) -> "Span":
if self._local_root_value is None:
return self
return self._local_root_value

@_local_root.setter
def _local_root(self, value: "Span") -> None:
if value is not self:
self._local_root_value = value
else:
self._local_root_value = None

@_local_root.deleter
def _local_root(self) -> None:
del self._local_root_value

def link_span(self, context: Context, attributes: Optional[Dict[str, Any]] = None) -> None:
"""Defines a causal relationship between two spans"""
if not context.trace_id or not context.span_id:
Expand Down
3 changes: 0 additions & 3 deletions ddtrace/_trace/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,8 +797,6 @@ def _start_span(
span._parent = parent
span._local_root = parent._local_root

if span._local_root is None:
span._local_root = span
for k, v in _get_metas_to_propagate(context):
# We do not want to propagate AppSec propagation headers
# to children spans, only across distributed spans
Expand All @@ -815,7 +813,6 @@ def _start_span(
span_api=span_api,
on_finish=[self._on_span_finish],
)
span._local_root = span
if config.report_hostname:
span.set_tag_str(HOSTNAME_KEY, hostname.get_hostname())

Expand Down
36 changes: 29 additions & 7 deletions ddtrace/appsec/_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,35 @@ def _on_django_func_wrapped(fn_args, fn_kwargs, first_arg_expected_type, *_):
http_req.COOKIES = taint_structure(http_req.COOKIES, OriginType.COOKIE_NAME, OriginType.COOKIE)
http_req.GET = taint_structure(http_req.GET, OriginType.PARAMETER_NAME, OriginType.PARAMETER)
http_req.POST = taint_structure(http_req.POST, OriginType.BODY, OriginType.BODY)
if not is_pyobject_tainted(getattr(http_req, "_body", None)):
http_req._body = taint_pyobject(
http_req.body,
source_name=origin_to_str(OriginType.BODY),
source_value=http_req.body,
source_origin=OriginType.BODY,
)

if (
getattr(http_req, "_body", None) is not None
and len(getattr(http_req, "_body", None)) > 0
and not is_pyobject_tainted(getattr(http_req, "_body", None))
):
try:
http_req._body = taint_pyobject(
http_req._body,
source_name=origin_to_str(OriginType.BODY),
source_value=http_req._body,
source_origin=OriginType.BODY,
)
except AttributeError:
log.debug("IAST can't set attribute http_req._body", exc_info=True)
elif (
getattr(http_req, "body", None) is not None
and len(getattr(http_req, "body", None)) > 0
and not is_pyobject_tainted(getattr(http_req, "body", None))
):
try:
http_req.body = taint_pyobject(
http_req.body,
source_name=origin_to_str(OriginType.BODY),
source_value=http_req.body,
source_origin=OriginType.BODY,
)
except AttributeError:
log.debug("IAST can't set attribute http_req.body", exc_info=True)

http_req.headers = taint_structure(http_req.headers, OriginType.HEADER_NAME, OriginType.HEADER)
http_req.path = taint_pyobject(
Expand Down
3 changes: 1 addition & 2 deletions ddtrace/appsec/_iast/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,11 @@ def _set_iast_error_metric(msg: Text) -> None:
exception_type, exception_instance, _traceback_list = sys.exc_info()
res = []
# first 10 frames are this function, the exception in aspects and the error line
res.extend(traceback.format_stack(limit=10))
res.extend(traceback.format_stack(limit=20))

# get the frame with the error and the error message
result = traceback.format_exception(exception_type, exception_instance, _traceback_list)
res.extend(result[1:])

stack_trace = "".join(res)

tags = {
Expand Down
24 changes: 13 additions & 11 deletions ddtrace/appsec/_iast/_taint_tracking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def taint_pyobject(pyobject: Any, source_name: Any, source_value: Any, source_or
_set_metric_iast_executed_source(source_origin)
return pyobject_newid
except ValueError as e:
iast_taint_log_error("Tainting object error (pyobject type %s): %s" % (type(pyobject), e))
log.debug("Tainting object error (pyobject type %s): %s", type(pyobject), e)
return pyobject


Expand Down Expand Up @@ -201,16 +201,18 @@ def trace_calls_and_returns(frame, event, arg):
return
if event == "call":
f_locals = frame.f_locals
if any([is_pyobject_tainted(f_locals[arg]) for arg in f_locals]):
TAINTED_FRAMES.append(frame)
log.debug("Call to %s on line %s of %s, args: %s", func_name, line_no, filename, frame.f_locals)
log.debug("Tainted arguments:")
for arg in f_locals:
if is_pyobject_tainted(f_locals[arg]):
log.debug("\t%s: %s", arg, f_locals[arg])
log.debug("-----")

return trace_calls_and_returns
try:
if any([is_pyobject_tainted(f_locals[arg]) for arg in f_locals]):
TAINTED_FRAMES.append(frame)
log.debug("Call to %s on line %s of %s, args: %s", func_name, line_no, filename, frame.f_locals)
log.debug("Tainted arguments:")
for arg in f_locals:
if is_pyobject_tainted(f_locals[arg]):
log.debug("\t%s: %s", arg, f_locals[arg])
log.debug("-----")
return trace_calls_and_returns
except AttributeError:
pass
elif event == "return":
if frame in TAINTED_FRAMES:
TAINTED_FRAMES.remove(frame)
Expand Down
59 changes: 59 additions & 0 deletions ddtrace/contrib/internal/celery/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sys

import celery
from celery import signals

Expand All @@ -15,9 +17,14 @@
from ddtrace.contrib.internal.celery.signals import trace_retry
from ddtrace.ext import SpanKind
from ddtrace.ext import SpanTypes
from ddtrace.internal import core
from ddtrace.internal.logger import get_logger
from ddtrace.pin import _DD_PIN_NAME


log = get_logger(__name__)


def patch_app(app, pin=None):
"""Attach the Pin class to the application and connect
our handlers to Celery signals.
Expand All @@ -41,6 +48,9 @@ def patch_app(app, pin=None):
trace_utils.wrap("celery.beat", "Scheduler.tick", _traced_beat_function(config.celery, "tick"))
pin.onto(celery.beat.Scheduler)

# Patch apply_async
trace_utils.wrap("celery.app.task", "Task.apply_async", _traced_apply_async_function(config.celery, "apply_async"))

# connect to the Signal framework
signals.task_prerun.connect(trace_prerun, weak=False)
signals.task_postrun.connect(trace_postrun, weak=False)
Expand All @@ -65,6 +75,7 @@ def unpatch_app(app):

trace_utils.unwrap(celery.beat.Scheduler, "apply_entry")
trace_utils.unwrap(celery.beat.Scheduler, "tick")
trace_utils.unwrap(celery.app.task.Task, "apply_async")

signals.task_prerun.disconnect(trace_prerun)
signals.task_postrun.disconnect(trace_postrun)
Expand Down Expand Up @@ -96,3 +107,51 @@ def _traced_beat_inner(func, instance, args, kwargs):
return func(*args, **kwargs)

return _traced_beat_inner


def _traced_apply_async_function(integration_config, fn_name, resource_fn=None):
"""
When apply_async is called, it calls various Celery signals in order, which gets used
to start and close the span.
Example: before_task_publish starts the span while after_task_publish closes the span.
If an exception occurs anywhere inside Celery or its dependencies, this can interrupt the
closing signals.
The purpose of _traced_apply_async_function is to close the spans even if one of the closing
signals don't get called over the course of the apply_task lifecycle.
This is done by fetching the stored span and closing it if it hasn't already been closed by a
closing signal.
"""

def _traced_apply_async_inner(func, instance, args, kwargs):
with core.context_with_data("task_context"):
try:
return func(*args, **kwargs)
except Exception:
# If an internal exception occurs, record the exception in the span,
# then raise the Celery error as usual
task_span = core.get_item("task_span")
if task_span:
task_span.set_exc_info(*sys.exc_info())

prerun_span = core.get_item("prerun_span")
if prerun_span:
prerun_span.set_exc_info(*sys.exc_info())

raise
finally:
task_span = core.get_item("task_span")
if task_span:
log.debug(
"The after_task_publish signal was not called, so manually closing span: %s",
task_span._pprint(),
)
task_span.finish()

prerun_span = core.get_item("prerun_span")
if prerun_span:
log.debug(
"The task_postrun signal was not called, so manually closing span: %s", prerun_span._pprint()
)
prerun_span.finish()

return _traced_apply_async_inner
7 changes: 7 additions & 0 deletions ddtrace/contrib/internal/celery/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ddtrace.ext import SpanKind
from ddtrace.ext import SpanTypes
from ddtrace.ext import net
from ddtrace.internal import core
from ddtrace.internal.constants import COMPONENT
from ddtrace.internal.logger import get_logger
from ddtrace.propagation.http import HTTPPropagator
Expand Down Expand Up @@ -48,6 +49,9 @@ def trace_prerun(*args, **kwargs):
service = config.celery["worker_service_name"]
span = pin.tracer.trace(c.WORKER_ROOT_SPAN, service=service, resource=task.name, span_type=SpanTypes.WORKER)

# Store an item called "prerun span" in case task_postrun doesn't get called
core.set_item("prerun_span", span)

# set span.kind to the type of request being performed
span.set_tag_str(SPAN_KIND, SpanKind.CONSUMER)

Expand Down Expand Up @@ -111,6 +115,9 @@ def trace_before_publish(*args, **kwargs):
service = config.celery["producer_service_name"]
span = pin.tracer.trace(c.PRODUCER_ROOT_SPAN, service=service, resource=task_name)

# Store an item called "task span" in case after_task_publish doesn't get called
core.set_item("task_span", span)

span.set_tag_str(COMPONENT, config.celery.integration_name)

# set span.kind to the type of request being performed
Expand Down
7 changes: 5 additions & 2 deletions ddtrace/contrib/internal/celery/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,12 @@ def retrieve_task_id(context):
"""
headers = context.get("headers")
body = context.get("body")
if headers:
# Check if the id is in the headers, then check the body for it.
# If we don't check the id first, we could wrongly assume no task_id
# when the task_id is in the body.
if headers and "id" in headers:
# Protocol Version 2 (default from Celery 4.0)
return headers.get("id")
else:
elif body and "id" in body:
# Protocol Version 1
return body.get("id")
2 changes: 1 addition & 1 deletion ddtrace/contrib/internal/django/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _extract_body(request):
def _remake_body(request):
# some libs that utilize django (Spyne) require the body stream to be unread or else will throw errors
# see: https://github.com/arskom/spyne/blob/f105ec2f41495485fef1211fe73394231b3f76e5/spyne/server/wsgi.py#L538
if request.method in _BODY_METHODS:
if request.method in _BODY_METHODS and getattr(request, "_body", None):
try:
unread_body = io.BytesIO(request._body)
if unread_body.seekable():
Expand Down
11 changes: 10 additions & 1 deletion ddtrace/internal/datadog/profiling/crashtracker/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,16 @@ target_include_directories(crashtracker_exe PRIVATE
..
${Datadog_INCLUDE_DIRS}
)
set_target_properties(crashtracker_exe PROPERTIES INSTALL_RPATH "$ORIGIN/..")

# The CRASHTRACKER_EXE_TARGET_NAME should have been set by dd_wrapper
if (NOT CRASHTRACKER_EXE_TARGET_NAME)
message(FATAL_ERROR "CRASHTRACKER_EXE_TARGET_NAME not set")
endif()

set_target_properties(crashtracker_exe PROPERTIES
INSTALL_RPATH "$ORIGIN/.."
OUTPUT_NAME ${CRASHTRACKER_EXE_TARGET_NAME}
)
target_link_libraries(crashtracker_exe PRIVATE
dd_wrapper
)
Expand Down
Loading

0 comments on commit d516374

Please sign in to comment.