From d837ff53aee29552ebcb7e00ab32aabe88edf165 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Fri, 3 May 2024 10:04:19 -0400 Subject: [PATCH 01/26] chore(ci): simplify the flask simple benchmark suite (#8902) This change aims to simplify the Flask simple benchmark suite by using the Flask test client instead of using gunicorn to spin up a subprocess server + requests to make http requests to the server. The primary goal was to simplify the code/coordination needed for the test, and to make the test suite faster. The downside is we are moving away from a theoretical "end user experience" latest measurement to more of a "worse case" since we are removing network and server latency from the equation. However, removing these pieces _should_ give us more stable numbers since there are less moving pieces. If we choose to adopt this new approach then the existing historical trends/measurements will no longer be comparable. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Dmytro Yurchenko <88330911+ddyurchenko@users.noreply.github.com> --- .gitlab/benchmarks.yml | 4 +- .gitlab/benchmarks/bp-runner.yml | 13 ++ benchmarks/flask_simple/app.py | 57 ------- benchmarks/flask_simple/gunicorn.conf.py | 34 ---- .../flask_simple/requirements_scenario.txt | 2 - benchmarks/flask_simple/scenario.py | 149 +++++++++++++++++- benchmarks/flask_simple/utils.py | 20 --- 7 files changed, 157 insertions(+), 122 deletions(-) create mode 100644 .gitlab/benchmarks/bp-runner.yml delete mode 100644 benchmarks/flask_simple/app.py delete mode 100644 benchmarks/flask_simple/gunicorn.conf.py delete mode 100644 benchmarks/flask_simple/utils.py diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index 15f70b54997..e6a83ad2bdb 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -15,7 +15,7 @@ variables: - git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/" - git clone --branch dd-trace-py https://github.com/DataDog/benchmarking-platform /platform && cd /platform - ./steps/capture-hardware-software-info.sh - - ./steps/run-benchmarks.sh + - '([ $SCENARIO = "flask_simple" ] && BP_SCENARIO=$SCENARIO /benchmarking-platform-tools/bp-runner/bp-runner "$REPORTS_DIR/../.gitlab/benchmarks/bp-runner.yml" --debug -t) || ([ $SCENARIO != "flask_simple" ] && ./steps/run-benchmarks.sh)' - ./steps/analyze-results.sh - "./steps/upload-results-to-s3.sh || :" artifacts: @@ -87,7 +87,7 @@ benchmark-flask-sqli: extends: .benchmarks variables: SCENARIO: "flask_sqli" - + benchmark-core-api: extends: .benchmarks variables: diff --git a/.gitlab/benchmarks/bp-runner.yml b/.gitlab/benchmarks/bp-runner.yml new file mode 100644 index 00000000000..0daee69419b --- /dev/null +++ b/.gitlab/benchmarks/bp-runner.yml @@ -0,0 +1,13 @@ +experiments: + - name: run-microbenchmarks + setup: + - name: datadog-agent + run: datadog_agent + cpus: 24-25 + config_sh: ./steps/update-dd-agent-config.sh + + steps: + - name: benchmarks + cpus: 26-47 + run: shell + script: export SCENARIO=$BP_SCENARIO && ./steps/run-benchmarks.sh diff --git a/benchmarks/flask_simple/app.py b/benchmarks/flask_simple/app.py deleted file mode 100644 index 40c40e8d92f..00000000000 --- a/benchmarks/flask_simple/app.py +++ /dev/null @@ -1,57 +0,0 @@ -import hashlib -import random - -from flask import Flask -from flask import render_template_string -from flask import request - - -app = Flask(__name__) - - -def make_index(): - rand_numbers = [random.random() for _ in range(20)] - m = hashlib.md5() - m.update(b"Insecure hash") - rand_numbers.append(m.digest()) - return render_template_string( - """ - - - - - - Hello World! - - -
-
-

- Hello World -

-

- My first website -

-
    - {% for i in rand_numbers %} -
  • {{ i }}
  • - {% endfor %} -
-
-
- - - """, - rand_numbers=rand_numbers, - ) - - -@app.route("/") -def index(): - return make_index() - - -@app.route("/post-view", methods=["POST"]) -def post_view(): - data = request.data - return data, 200 diff --git a/benchmarks/flask_simple/gunicorn.conf.py b/benchmarks/flask_simple/gunicorn.conf.py deleted file mode 100644 index 9f1689cb97c..00000000000 --- a/benchmarks/flask_simple/gunicorn.conf.py +++ /dev/null @@ -1,34 +0,0 @@ -from bm.di_utils import BMDebugger -from bm.flask_utils import post_fork # noqa:F401 -from bm.flask_utils import post_worker_init # noqa:F401 - -from ddtrace.debugging._probe.model import DEFAULT_CAPTURE_LIMITS -from ddtrace.debugging._probe.model import DEFAULT_SNAPSHOT_PROBE_RATE -from ddtrace.debugging._probe.model import LiteralTemplateSegment -from ddtrace.debugging._probe.model import LogLineProbe - - -# Probes are added only if the BMDebugger is enabled. -probe_id = "bm-test" -BMDebugger.add_probes( - LogLineProbe( - probe_id=probe_id, - version=0, - tags={}, - source_file="app.py", - line=17, - template=probe_id, - segments=[LiteralTemplateSegment(probe_id)], - take_snapshot=True, - limits=DEFAULT_CAPTURE_LIMITS, - condition=None, - condition_error_rate=0.0, - rate=DEFAULT_SNAPSHOT_PROBE_RATE, - ), -) - -bind = "0.0.0.0:8000" -worker_class = "sync" -workers = 4 -wsgi_app = "app:app" -pidfile = "gunicorn.pid" diff --git a/benchmarks/flask_simple/requirements_scenario.txt b/benchmarks/flask_simple/requirements_scenario.txt index ee57bcb69b0..5bd19d39d1a 100644 --- a/benchmarks/flask_simple/requirements_scenario.txt +++ b/benchmarks/flask_simple/requirements_scenario.txt @@ -1,3 +1 @@ flask==3.0.0 -gunicorn==20.1.0 -requests==2.31.0 diff --git a/benchmarks/flask_simple/scenario.py b/benchmarks/flask_simple/scenario.py index b8df732745e..311661d6a7b 100644 --- a/benchmarks/flask_simple/scenario.py +++ b/benchmarks/flask_simple/scenario.py @@ -1,6 +1,69 @@ +import hashlib +import os +import random + import bm -import bm.flask_utils as flask_utils -from utils import _post_response +import bm.utils as utils +from flask import Flask +from flask import render_template_string +from flask import request + +from ddtrace.debugging._probe.model import DEFAULT_CAPTURE_LIMITS +from ddtrace.debugging._probe.model import DEFAULT_SNAPSHOT_PROBE_RATE +from ddtrace.debugging._probe.model import LiteralTemplateSegment +from ddtrace.debugging._probe.model import LogLineProbe + + +def make_index(): + rand_numbers = [random.random() for _ in range(20)] + m = hashlib.md5() + m.update(b"Insecure hash") + rand_numbers.append(m.digest()) + return render_template_string( + """ + + + + + + Hello World! + + +
+
+

+ Hello World +

+

+ My first website +

+
    + {% for i in rand_numbers %} +
  • {{ i }}
  • + {% endfor %} +
+
+
+ + + """, + rand_numbers=rand_numbers, + ) + + +def create_app(): + app = Flask(__name__) + + @app.route("/") + def index(): + return make_index() + + @app.route("/post-view", methods=["POST"]) + def post_view(): + data = request.data + return data, 200 + + return app class FlaskSimple(bm.Scenario): @@ -13,10 +76,82 @@ class FlaskSimple(bm.Scenario): telemetry_metrics_enabled = bm.var_bool() def run(self): - with flask_utils.server(self, custom_post_response=_post_response) as get_response: + # Setup the environment and enable Datadog features + os.environ.update( + { + "DD_APPSEC_ENABLED": str(self.appsec_enabled), + "DD_IAST_ENABLED": str(self.iast_enabled), + "DD_TELEMETRY_METRICS_ENABLED": str(self.telemetry_metrics_enabled), + } + ) + if self.profiler_enabled: + os.environ.update( + {"DD_PROFILING_ENABLED": "1", "DD_PROFILING_API_TIMEOUT": "0.1", "DD_PROFILING_UPLOAD_INTERVAL": "10"} + ) + if not self.tracer_enabled: + import ddtrace.profiling.auto # noqa:F401 + + if self.tracer_enabled: + import ddtrace.bootstrap.sitecustomize # noqa:F401 + + if self.debugger_enabled: + from bm.di_utils import BMDebugger + + BMDebugger.enable() + + # Probes are added only if the BMDebugger is enabled. + probe_id = "bm-test" + BMDebugger.add_probes( + LogLineProbe( + probe_id=probe_id, + version=0, + tags={}, + source_file="scenario.py", + line=23, + template=probe_id, + segments=[LiteralTemplateSegment(probe_id)], + take_snapshot=True, + limits=DEFAULT_CAPTURE_LIMITS, + condition=None, + condition_error_rate=0.0, + rate=DEFAULT_SNAPSHOT_PROBE_RATE, + ), + ) + + # Create the Flask app + app = create_app() + + # Setup the request function + if self.post_request: + HEADERS = { + "SERVER_PORT": "8000", + "REMOTE_ADDR": "127.0.0.1", + "CONTENT_TYPE": "application/json", + "HTTP_HOST": "localhost:8000", + "HTTP_ACCEPT": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp," + "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "HTTP_SEC_FETCH_DEST": "document", + "HTTP_ACCEPT_ENCODING": "gzip, deflate, br", + "HTTP_ACCEPT_LANGUAGE": "en-US,en;q=0.9", + "User-Agent": "dd-test-scanner-log", + } + + def make_request(app): + client = app.test_client() + return client.post("/post-view", headers=HEADERS, data=utils.EXAMPLE_POST_DATA) + + else: + + def make_request(app): + client = app.test_client() + return client.get("/") - def _(loops): - for _ in range(loops): - get_response() + # Scenario loop function + def _(loops): + for _ in range(loops): + res = make_request(app) + assert res.status_code == 200 + # We have to close the request (or read `res.data`) to get the `flask.request` span to finalize + res.close() - yield _ + yield _ diff --git a/benchmarks/flask_simple/utils.py b/benchmarks/flask_simple/utils.py deleted file mode 100644 index 59be7e983ba..00000000000 --- a/benchmarks/flask_simple/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -import bm.flask_utils as flask_utils -import bm.utils as utils -import requests - - -def _post_response(): - HEADERS = { - "SERVER_PORT": "8000", - "REMOTE_ADDR": "127.0.0.1", - "CONTENT_TYPE": "application/json", - "HTTP_HOST": "localhost:8000", - "HTTP_ACCEPT": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp," - "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", - "HTTP_SEC_FETCH_DEST": "document", - "HTTP_ACCEPT_ENCODING": "gzip, deflate, br", - "HTTP_ACCEPT_LANGUAGE": "en-US,en;q=0.9", - "User-Agent": "dd-test-scanner-log", - } - r = requests.post(flask_utils.SERVER_URL + "post-view", data=utils.EXAMPLE_POST_DATA, headers=HEADERS) - r.raise_for_status() From 434f71188c9374cd6892f774fd262cd2bc181f56 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Martinez Date: Fri, 3 May 2024 18:04:01 +0200 Subject: [PATCH 02/26] fix: better None protection when tainting a grpc message (#9155) ## Checklist - [X] Change(s) are motivated and described in the PR description - [X] Testing strategy is described if automated tests are not included in the PR - [X] Risks are described (performance impact, potential for breakage, maintainability) - [X] Change is maintainable (easy to change, telemetry, documentation) - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [X] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez Co-authored-by: Brett Langdon --- ddtrace/contrib/grpc/client_interceptor.py | 12 ++++++++---- .../notes/asm-gprc-not-none-788b4b435b931a11.yaml | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/asm-gprc-not-none-788b4b435b931a11.yaml diff --git a/ddtrace/contrib/grpc/client_interceptor.py b/ddtrace/contrib/grpc/client_interceptor.py index 17a04330583..57808b81788 100644 --- a/ddtrace/contrib/grpc/client_interceptor.py +++ b/ddtrace/contrib/grpc/client_interceptor.py @@ -85,8 +85,10 @@ def _handle_response(span, response): "grpc.response_message", (response._response,), ) - if result and "response" in result: - response._response = result["response"].value + if result: + response_value = result.get("response") + if response_value: + response._response = response_value if hasattr(response, "add_done_callback"): response.add_done_callback(_future_done_callback(span)) @@ -173,8 +175,10 @@ def __next__(self): "grpc.response_message", (n,), ) - if result and "response" in result: - n = result["response"].value + if result: + response_value = result.get("response") + if response_value: + n = response_value return n next = __next__ diff --git a/releasenotes/notes/asm-gprc-not-none-788b4b435b931a11.yaml b/releasenotes/notes/asm-gprc-not-none-788b4b435b931a11.yaml new file mode 100644 index 00000000000..458a43d515e --- /dev/null +++ b/releasenotes/notes/asm-gprc-not-none-788b4b435b931a11.yaml @@ -0,0 +1,3 @@ +fixes: + - | + ASM: protect against potentially returning ``None`` when tainting a gRPC message. From 1ca90d51c8a840f8c5cf0332a844ded3d990f053 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Mon, 6 May 2024 05:35:15 -0400 Subject: [PATCH 03/26] fix(grpc): resolve issue with incorrect gRPC response being returned (#9157) The usage of the core API was not correct in the gRPC integration. It was working on accident based on how the listener was written. Since the listener mutates the underlying response class and not the response instance, there is nothing that needs to be returned, so we can change the `dispatch_with_results` to `dispatch`. As well, the previous usage of the `dispatch_with_results` API was not correct, the response was always replaced with the result from the ASM listener even when the response was `None`. ## Changelog No changelog entry is added since this is a fix for an unreleased change so it is not yet a "bug". ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/appsec/_handlers.py | 3 +-- ddtrace/contrib/grpc/client_interceptor.py | 18 ++---------------- ddtrace/internal/core/event_hub.py | 4 ++-- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/ddtrace/appsec/_handlers.py b/ddtrace/appsec/_handlers.py index e3e147b3bce..e5326f54165 100644 --- a/ddtrace/appsec/_handlers.py +++ b/ddtrace/appsec/_handlers.py @@ -410,7 +410,6 @@ def _on_grpc_response(response): msg_cls = type(response) _patch_protobuf_class(msg_cls) - return response def listen(): @@ -425,4 +424,4 @@ def listen(): core.on("flask.patch", _on_flask_patch) core.on("asgi.request.parse.body", _on_asgi_request_parse_body, "await_receive_and_body") -core.on("grpc.response_message", _on_grpc_response, "response") +core.on("grpc.response_message", _on_grpc_response) diff --git a/ddtrace/contrib/grpc/client_interceptor.py b/ddtrace/contrib/grpc/client_interceptor.py index 57808b81788..40a4f2bcbd8 100644 --- a/ddtrace/contrib/grpc/client_interceptor.py +++ b/ddtrace/contrib/grpc/client_interceptor.py @@ -81,14 +81,7 @@ def _handle_response(span, response): # google-api-core which has its own future base class # https://github.com/googleapis/python-api-core/blob/49c6755a21215bbb457b60db91bab098185b77da/google/api_core/future/base.py#L23 if hasattr(response, "_response"): - result = core.dispatch_with_results( - "grpc.response_message", - (response._response,), - ) - if result: - response_value = result.get("response") - if response_value: - response._response = response_value + core.dispatch("grpc.response_message", (response._response,)) if hasattr(response, "add_done_callback"): response.add_done_callback(_future_done_callback(span)) @@ -171,14 +164,7 @@ def _next(self): def __next__(self): n = self._next() if n is not None: - result = core.dispatch_with_results( - "grpc.response_message", - (n,), - ) - if result: - response_value = result.get("response") - if response_value: - n = response_value + core.dispatch("grpc.response_message", (n,)) return n next = __next__ diff --git a/ddtrace/internal/core/event_hub.py b/ddtrace/internal/core/event_hub.py index 5da8677b1a0..a27bde3549d 100644 --- a/ddtrace/internal/core/event_hub.py +++ b/ddtrace/internal/core/event_hub.py @@ -35,10 +35,10 @@ def __bool__(self): class EventResultDict(Dict[str, EventResult]): - def __missing__(self, key: str): + def __missing__(self, key: str) -> EventResult: return _MissingEvent - def __getattr__(self, name: str): + def __getattr__(self, name: str) -> EventResult: return dict.__getitem__(self, name) From fcfe10c7039919a649ccc13a3dc3a53c510b55d9 Mon Sep 17 00:00:00 2001 From: kyle Date: Mon, 6 May 2024 12:44:40 +0200 Subject: [PATCH 04/26] chore(ci): add OCI packaging (#8932) We are shifting over to using OCI packaging rather than system packaging. Unlike the existing deb/rpm builds we package only the relevant arch binaries in the package to reduce the final installation size on the system. Once validated and released we will remove the similar deb/rpm packaging. This packaging has been tested in https://github.com/DataDog/auto_inject/pull/185 where we pull in v2.8.2 of the library and inject it into a basic Python app and validate that traces are submitted. The job will run as part of the existing deb/rpm jobs done as part of the release process. No new manual steps are introduced to the release to build and deploy these images. --- .gitlab-ci.yml | 17 ++++++++++++++ .gitlab/build-oci.sh | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100755 .gitlab/build-oci.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 071dde14005..9074864e92e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -130,3 +130,20 @@ deploy_latest_tag_to_docker_registries: # See above note in the `deploy_to_docker_registries` job. RETRY_DELAY: 14400 RETRY_COUNT: 3 + +package-oci: + stage: package + extends: .package-oci + rules: + - if: $PYTHON_PACKAGE_VERSION + when: on_success + - if: '$CI_COMMIT_TAG =~ /^v[0-9]+\.[0-9]+\.[0-9]+(-prerelease)?$/' + when: manual + allow_failure: false + script: + - ../.gitlab/build-oci.sh + parallel: + matrix: + - ARCH: + - arm64 + - amd64 diff --git a/.gitlab/build-oci.sh b/.gitlab/build-oci.sh new file mode 100755 index 00000000000..47092a9cace --- /dev/null +++ b/.gitlab/build-oci.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +source common_build_functions.sh + +if [ -n "$CI_COMMIT_TAG" ] && [ -z "$PYTHON_PACKAGE_VERSION" ]; then + PYTHON_PACKAGE_VERSION=${CI_COMMIT_TAG##v} +fi + +if [ -z "$ARCH" ]; then + ARCH=amd64 +fi + + +TMP_DIR=$(mktemp --dir) +BUILD_DIR=$TMP_DIR/datadog-python-apm.build +mkdir $TMP_DIR/datadog-python-apm.build + +# Install known compatible pip as default version shipped in Ubuntu (20.0.2) +# does not work. +python3 -m pip install -U "pip>=22.0" +python3 -m pip install packaging + +WHEEL_ARCH="x86_64" +if [ "$ARCH" = "arm64" ]; then + WHEEL_ARCH="aarch64" +fi + +../lib-injection/dl_wheels.py \ + --python-version=3.12 \ + --python-version=3.11 \ + --python-version=3.10 \ + --python-version=3.9 \ + --python-version=3.8 \ + --python-version=3.7 \ + --ddtrace-version=$PYTHON_PACKAGE_VERSION \ + --arch=$WHEEL_ARCH \ + --platform=musllinux_1_1 \ + --platform=manylinux2014 \ + --output-dir=$BUILD_DIR/ddtrace_pkgs \ + --verbose +echo -n $PYTHON_PACKAGE_VERSION > auto_inject-python.version +cp ../lib-injection/sitecustomize.py $BUILD_DIR/ +cp auto_inject-python.version $BUILD_DIR/version +chmod -R o-w $BUILD_DIR +chmod -R g-w $BUILD_DIR + +# Build packages +datadog-package create \ + --version="$PYTHON_PACKAGE_VERSION" \ + --package="datadog-apm-library-python" \ + --archive=true \ + --archive-path="datadog-apm-library-python-$PYTHON_PACKAGE_VERSION-$ARCH.tar" \ + --arch "$ARCH" \ + --os "linux" \ + $BUILD_DIR From 87e7a1b1731403f4a9aa250032b88b801f509db9 Mon Sep 17 00:00:00 2001 From: Federico Mon Date: Mon, 6 May 2024 14:45:17 +0200 Subject: [PATCH 05/26] ci: fix macrobenchmarks base image variable (#9161) CI: Fix wrong variable name for the base image of benchmark tests ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .gitlab/macrobenchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index 16cf2b3b9be..d07a674613e 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -14,7 +14,7 @@ variables: # - if: $CI_COMMIT_REF_NAME == "main" # when: always # If you have a problem with Gitlab cache, see Troubleshooting section in Benchmarking Platform docs - image: $BENCHMARKS_CI_IMAGE + image: $BASE_CI_IMAGE script: | git clone --branch python/macrobenchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform platform && cd platform if [ "$BP_PYTHON_SCENARIO_DIR" == "flask-realworld" ]; then From 379d2c2622bef052a5cc33978aac83a9c48829d0 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Mon, 6 May 2024 06:16:08 -0700 Subject: [PATCH 06/26] chore(redis): loosen coupling between instrumentation and product (#9138) This pull request uses the Core API at `internal.core` to separate the concerns of Tracing and Instrumentation into separate files in the context of the `redis` integration. It also changes the term "tracing" to "instrumentation" in the redis files. Existing tests are enough to cover the changed code paths. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/_trace/trace_handlers.py | 8 ++- ddtrace/_trace/utils_redis.py | 18 +++--- ddtrace/contrib/aioredis/patch.py | 10 +-- ddtrace/contrib/aredis/patch.py | 10 +-- ddtrace/contrib/redis/asyncio_patch.py | 23 +++---- ddtrace/contrib/redis/patch.py | 86 ++++++++++++++------------ ddtrace/contrib/redis_utils.py | 4 +- ddtrace/contrib/rediscluster/patch.py | 12 ++-- ddtrace/contrib/yaaredis/patch.py | 10 +-- 9 files changed, 94 insertions(+), 87 deletions(-) diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py index f67eca90453..de4b4686828 100644 --- a/ddtrace/_trace/trace_handlers.py +++ b/ddtrace/_trace/trace_handlers.py @@ -711,9 +711,9 @@ def _on_botocore_kinesis_getrecords_post( ctx.set_item("distributed_context", extract_DD_context_from_messages(result["Records"], message_parser)) -def _on_redis_async_command_post(span, rowcount): +def _on_redis_command_post(ctx: core.ExecutionContext, rowcount): if rowcount is not None: - span.set_metric(db.ROWCOUNT, rowcount) + ctx[ctx["call_key"]].set_metric(db.ROWCOUNT, rowcount) def listen(): @@ -764,7 +764,8 @@ def listen(): core.on("botocore.bedrock.process_response", _on_botocore_bedrock_process_response) core.on("botocore.sqs.ReceiveMessage.post", _on_botocore_sqs_recvmessage_post) core.on("botocore.kinesis.GetRecords.post", _on_botocore_kinesis_getrecords_post) - core.on("redis.async_command.post", _on_redis_async_command_post) + core.on("redis.async_command.post", _on_redis_command_post) + core.on("redis.command.post", _on_redis_command_post) for context_name in ( "flask.call", @@ -782,6 +783,7 @@ def listen(): "botocore.patched_sqs_api_call", "botocore.patched_stepfunctions_api_call", "botocore.patched_bedrock_api_call", + "redis.command", ): core.on(f"context.started.start_span.{context_name}", _start_span) diff --git a/ddtrace/_trace/utils_redis.py b/ddtrace/_trace/utils_redis.py index 1e2d7b9b9a8..5d8ec99563d 100644 --- a/ddtrace/_trace/utils_redis.py +++ b/ddtrace/_trace/utils_redis.py @@ -14,6 +14,7 @@ from ddtrace.ext import SpanTypes from ddtrace.ext import db from ddtrace.ext import redis as redisx +from ddtrace.internal import core from ddtrace.internal.constants import COMPONENT from ddtrace.internal.schema import schematize_cache_operation from ddtrace.internal.utils.formats import stringify_cache_args @@ -48,20 +49,23 @@ def _set_span_tags( @contextmanager -def _trace_redis_cmd(pin, config_integration, instance, args): +def _instrument_redis_cmd(pin, config_integration, instance, args): query = stringify_cache_args(args, cmd_max_len=config_integration.cmd_max_length) - with pin.tracer.trace( - schematize_cache_operation(redisx.CMD, cache_provider=redisx.APP), + with core.context_with_data( + "redis.command", + span_name=schematize_cache_operation(redisx.CMD, cache_provider=redisx.APP), + pin=pin, service=trace_utils.ext_service(pin, config_integration), span_type=SpanTypes.REDIS, resource=query.split(" ")[0] if config_integration.resource_only_command else query, - ) as span: + call_key="redis_command_call", + ) as ctx, ctx[ctx["call_key"]] as span: _set_span_tags(span, pin, config_integration, args, instance, query) - yield span + yield ctx @contextmanager -def _trace_redis_execute_pipeline(pin, config_integration, cmds, instance, is_cluster=False): +def _instrument_redis_execute_pipeline(pin, config_integration, cmds, instance, is_cluster=False): cmd_string = resource = "\n".join(cmds) if config_integration.resource_only_command: resource = "\n".join([cmd.split(" ")[0] for cmd in cmds]) @@ -77,7 +81,7 @@ def _trace_redis_execute_pipeline(pin, config_integration, cmds, instance, is_cl @contextmanager -def _trace_redis_execute_async_cluster_pipeline(pin, config_integration, cmds, instance): +def _instrument_redis_execute_async_cluster_pipeline(pin, config_integration, cmds, instance): cmd_string = resource = "\n".join(cmds) if config_integration.resource_only_command: resource = "\n".join([cmd.split(" ")[0] for cmd in cmds]) diff --git a/ddtrace/contrib/aioredis/patch.py b/ddtrace/contrib/aioredis/patch.py index f44cea456c5..276f73f6a1f 100644 --- a/ddtrace/contrib/aioredis/patch.py +++ b/ddtrace/contrib/aioredis/patch.py @@ -5,8 +5,8 @@ import aioredis from ddtrace import config -from ddtrace._trace.utils_redis import _trace_redis_cmd -from ddtrace._trace.utils_redis import _trace_redis_execute_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_cmd +from ddtrace._trace.utils_redis import _instrument_redis_execute_pipeline from ddtrace.contrib.redis_utils import ROW_RETURNING_COMMANDS from ddtrace.contrib.redis_utils import _run_redis_command_async from ddtrace.contrib.redis_utils import determine_row_count @@ -93,8 +93,8 @@ async def traced_execute_command(func, instance, args, kwargs): if not pin or not pin.enabled(): return await func(*args, **kwargs) - with _trace_redis_cmd(pin, config.aioredis, instance, args) as span: - return await _run_redis_command_async(span=span, func=func, args=args, kwargs=kwargs) + with _instrument_redis_cmd(pin, config.aioredis, instance, args) as ctx: + return await _run_redis_command_async(ctx=ctx, func=func, args=args, kwargs=kwargs) def traced_pipeline(func, instance, args, kwargs): @@ -111,7 +111,7 @@ async def traced_execute_pipeline(func, instance, args, kwargs): return await func(*args, **kwargs) cmds = [stringify_cache_args(c, cmd_max_len=config.aioredis.cmd_max_length) for c, _ in instance.command_stack] - with _trace_redis_execute_pipeline(pin, config.aioredis, cmds, instance): + with _instrument_redis_execute_pipeline(pin, config.aioredis, cmds, instance): return await func(*args, **kwargs) diff --git a/ddtrace/contrib/aredis/patch.py b/ddtrace/contrib/aredis/patch.py index 65386e99932..523adffaacd 100644 --- a/ddtrace/contrib/aredis/patch.py +++ b/ddtrace/contrib/aredis/patch.py @@ -3,8 +3,8 @@ import aredis from ddtrace import config -from ddtrace._trace.utils_redis import _trace_redis_cmd -from ddtrace._trace.utils_redis import _trace_redis_execute_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_cmd +from ddtrace._trace.utils_redis import _instrument_redis_execute_pipeline from ddtrace.contrib.redis_utils import _run_redis_command_async from ddtrace.vendor import wrapt @@ -64,8 +64,8 @@ async def traced_execute_command(func, instance, args, kwargs): if not pin or not pin.enabled(): return await func(*args, **kwargs) - with _trace_redis_cmd(pin, config.aredis, instance, args) as span: - return await _run_redis_command_async(span=span, func=func, args=args, kwargs=kwargs) + with _instrument_redis_cmd(pin, config.aredis, instance, args) as ctx: + return await _run_redis_command_async(ctx=ctx, func=func, args=args, kwargs=kwargs) async def traced_pipeline(func, instance, args, kwargs): @@ -82,5 +82,5 @@ async def traced_execute_pipeline(func, instance, args, kwargs): return await func(*args, **kwargs) cmds = [stringify_cache_args(c, cmd_max_len=config.aredis.cmd_max_length) for c, _ in instance.command_stack] - with _trace_redis_execute_pipeline(pin, config.aredis, cmds, instance): + with _instrument_redis_execute_pipeline(pin, config.aredis, cmds, instance): return await func(*args, **kwargs) diff --git a/ddtrace/contrib/redis/asyncio_patch.py b/ddtrace/contrib/redis/asyncio_patch.py index 7bcc9653c74..a382b786919 100644 --- a/ddtrace/contrib/redis/asyncio_patch.py +++ b/ddtrace/contrib/redis/asyncio_patch.py @@ -1,40 +1,37 @@ from ddtrace import config -from ddtrace._trace.utils_redis import _trace_redis_cmd -from ddtrace._trace.utils_redis import _trace_redis_execute_async_cluster_pipeline -from ddtrace._trace.utils_redis import _trace_redis_execute_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_cmd +from ddtrace._trace.utils_redis import _instrument_redis_execute_async_cluster_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_execute_pipeline from ddtrace.contrib.redis_utils import _run_redis_command_async from ...internal.utils.formats import stringify_cache_args from ...pin import Pin -# -# tracing async functions -# -async def traced_async_execute_command(func, instance, args, kwargs): +async def instrumented_async_execute_command(func, instance, args, kwargs): pin = Pin.get_from(instance) if not pin or not pin.enabled(): return await func(*args, **kwargs) - with _trace_redis_cmd(pin, config.redis, instance, args) as span: - return await _run_redis_command_async(span=span, func=func, args=args, kwargs=kwargs) + with _instrument_redis_cmd(pin, config.redis, instance, args) as ctx: + return await _run_redis_command_async(ctx=ctx, func=func, args=args, kwargs=kwargs) -async def traced_async_execute_pipeline(func, instance, args, kwargs): +async def instrumented_async_execute_pipeline(func, instance, args, kwargs): pin = Pin.get_from(instance) if not pin or not pin.enabled(): return await func(*args, **kwargs) cmds = [stringify_cache_args(c, cmd_max_len=config.redis.cmd_max_length) for c, _ in instance.command_stack] - with _trace_redis_execute_pipeline(pin, config.redis, cmds, instance): + with _instrument_redis_execute_pipeline(pin, config.redis, cmds, instance): return await func(*args, **kwargs) -async def traced_async_execute_cluster_pipeline(func, instance, args, kwargs): +async def instrumented_async_execute_cluster_pipeline(func, instance, args, kwargs): pin = Pin.get_from(instance) if not pin or not pin.enabled(): return await func(*args, **kwargs) cmds = [stringify_cache_args(c.args, cmd_max_len=config.redis.cmd_max_length) for c in instance._command_stack] - with _trace_redis_execute_async_cluster_pipeline(pin, config.redis, cmds, instance): + with _instrument_redis_execute_async_cluster_pipeline(pin, config.redis, cmds, instance): return await func(*args, **kwargs) diff --git a/ddtrace/contrib/redis/patch.py b/ddtrace/contrib/redis/patch.py index f9784c4e29b..44bb79c91b4 100644 --- a/ddtrace/contrib/redis/patch.py +++ b/ddtrace/contrib/redis/patch.py @@ -3,11 +3,11 @@ import redis from ddtrace import config -from ddtrace._trace.utils_redis import _trace_redis_cmd -from ddtrace._trace.utils_redis import _trace_redis_execute_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_cmd +from ddtrace._trace.utils_redis import _instrument_redis_execute_pipeline from ddtrace.contrib.redis_utils import ROW_RETURNING_COMMANDS from ddtrace.contrib.redis_utils import determine_row_count -from ddtrace.ext import db +from ddtrace.internal import core from ddtrace.vendor import wrapt from ...internal.schema import schematize_service_name @@ -46,44 +46,44 @@ def patch(): _w = wrapt.wrap_function_wrapper if redis.VERSION < (3, 0, 0): - _w("redis", "StrictRedis.execute_command", traced_execute_command(config.redis)) - _w("redis", "StrictRedis.pipeline", traced_pipeline) - _w("redis", "Redis.pipeline", traced_pipeline) - _w("redis.client", "BasePipeline.execute", traced_execute_pipeline(config.redis, False)) - _w("redis.client", "BasePipeline.immediate_execute_command", traced_execute_command(config.redis)) + _w("redis", "StrictRedis.execute_command", instrumented_execute_command(config.redis)) + _w("redis", "StrictRedis.pipeline", instrumented_pipeline) + _w("redis", "Redis.pipeline", instrumented_pipeline) + _w("redis.client", "BasePipeline.execute", instrumented_execute_pipeline(config.redis, False)) + _w("redis.client", "BasePipeline.immediate_execute_command", instrumented_execute_command(config.redis)) else: - _w("redis", "Redis.execute_command", traced_execute_command(config.redis)) - _w("redis", "Redis.pipeline", traced_pipeline) - _w("redis.client", "Pipeline.execute", traced_execute_pipeline(config.redis, False)) - _w("redis.client", "Pipeline.immediate_execute_command", traced_execute_command(config.redis)) + _w("redis", "Redis.execute_command", instrumented_execute_command(config.redis)) + _w("redis", "Redis.pipeline", instrumented_pipeline) + _w("redis.client", "Pipeline.execute", instrumented_execute_pipeline(config.redis, False)) + _w("redis.client", "Pipeline.immediate_execute_command", instrumented_execute_command(config.redis)) if redis.VERSION >= (4, 1): # Redis v4.1 introduced support for redis clusters and rediscluster package was deprecated. # https://github.com/redis/redis-py/commit/9db1eec71b443b8e7e74ff503bae651dc6edf411 - _w("redis.cluster", "RedisCluster.execute_command", traced_execute_command(config.redis)) - _w("redis.cluster", "RedisCluster.pipeline", traced_pipeline) - _w("redis.cluster", "ClusterPipeline.execute", traced_execute_pipeline(config.redis, True)) + _w("redis.cluster", "RedisCluster.execute_command", instrumented_execute_command(config.redis)) + _w("redis.cluster", "RedisCluster.pipeline", instrumented_pipeline) + _w("redis.cluster", "ClusterPipeline.execute", instrumented_execute_pipeline(config.redis, True)) Pin(service=None).onto(redis.cluster.RedisCluster) # Avoid mypy invalid syntax errors when parsing Python 2 files if redis.VERSION >= (4, 2, 0): - from .asyncio_patch import traced_async_execute_command - from .asyncio_patch import traced_async_execute_pipeline + from .asyncio_patch import instrumented_async_execute_command + from .asyncio_patch import instrumented_async_execute_pipeline - _w("redis.asyncio.client", "Redis.execute_command", traced_async_execute_command) - _w("redis.asyncio.client", "Redis.pipeline", traced_pipeline) - _w("redis.asyncio.client", "Pipeline.execute", traced_async_execute_pipeline) - _w("redis.asyncio.client", "Pipeline.immediate_execute_command", traced_async_execute_command) + _w("redis.asyncio.client", "Redis.execute_command", instrumented_async_execute_command) + _w("redis.asyncio.client", "Redis.pipeline", instrumented_pipeline) + _w("redis.asyncio.client", "Pipeline.execute", instrumented_async_execute_pipeline) + _w("redis.asyncio.client", "Pipeline.immediate_execute_command", instrumented_async_execute_command) Pin(service=None).onto(redis.asyncio.Redis) if redis.VERSION >= (4, 3, 0): - from .asyncio_patch import traced_async_execute_command + from .asyncio_patch import instrumented_async_execute_command - _w("redis.asyncio.cluster", "RedisCluster.execute_command", traced_async_execute_command) + _w("redis.asyncio.cluster", "RedisCluster.execute_command", instrumented_async_execute_command) if redis.VERSION >= (4, 3, 2): - from .asyncio_patch import traced_async_execute_cluster_pipeline + from .asyncio_patch import instrumented_async_execute_cluster_pipeline - _w("redis.asyncio.cluster", "RedisCluster.pipeline", traced_pipeline) - _w("redis.asyncio.cluster", "ClusterPipeline.execute", traced_async_execute_cluster_pipeline) + _w("redis.asyncio.cluster", "RedisCluster.pipeline", instrumented_pipeline) + _w("redis.asyncio.cluster", "ClusterPipeline.execute", instrumented_async_execute_cluster_pipeline) Pin(service=None).onto(redis.asyncio.RedisCluster) @@ -121,36 +121,40 @@ def unpatch(): unwrap(redis.asyncio.cluster.ClusterPipeline, "execute") -def _run_redis_command(span, func, args, kwargs): +def _run_redis_command(ctx: core.ExecutionContext, func, args, kwargs): parsed_command = stringify_cache_args(args) redis_command = parsed_command.split(" ")[0] + rowcount = None try: result = func(*args, **kwargs) - if redis_command in ROW_RETURNING_COMMANDS: - span.set_metric(db.ROWCOUNT, determine_row_count(redis_command=redis_command, result=result)) return result except Exception: - if redis_command in ROW_RETURNING_COMMANDS: - span.set_metric(db.ROWCOUNT, 0) + rowcount = 0 raise + finally: + if rowcount is None: + rowcount = determine_row_count(redis_command=redis_command, result=result) + if redis_command not in ROW_RETURNING_COMMANDS: + rowcount = None + core.dispatch("redis.command.post", [ctx, rowcount]) # # tracing functions # -def traced_execute_command(integration_config): - def _traced_execute_command(func, instance, args, kwargs): +def instrumented_execute_command(integration_config): + def _instrumented_execute_command(func, instance, args, kwargs): pin = Pin.get_from(instance) if not pin or not pin.enabled(): return func(*args, **kwargs) - with _trace_redis_cmd(pin, integration_config, instance, args) as span: - return _run_redis_command(span=span, func=func, args=args, kwargs=kwargs) + with _instrument_redis_cmd(pin, integration_config, instance, args) as ctx: + return _run_redis_command(ctx=ctx, func=func, args=args, kwargs=kwargs) - return _traced_execute_command + return _instrumented_execute_command -def traced_pipeline(func, instance, args, kwargs): +def instrumented_pipeline(func, instance, args, kwargs): pipeline = func(*args, **kwargs) pin = Pin.get_from(instance) if pin: @@ -158,8 +162,8 @@ def traced_pipeline(func, instance, args, kwargs): return pipeline -def traced_execute_pipeline(integration_config, is_cluster=False): - def _traced_execute_pipeline(func, instance, args, kwargs): +def instrumented_execute_pipeline(integration_config, is_cluster=False): + def _instrumented_execute_pipeline(func, instance, args, kwargs): pin = Pin.get_from(instance) if not pin or not pin.enabled(): return func(*args, **kwargs) @@ -174,7 +178,7 @@ def _traced_execute_pipeline(func, instance, args, kwargs): stringify_cache_args(c, cmd_max_len=integration_config.cmd_max_length) for c, _ in instance.command_stack ] - with _trace_redis_execute_pipeline(pin, integration_config, cmds, instance, is_cluster): + with _instrument_redis_execute_pipeline(pin, integration_config, cmds, instance, is_cluster): return func(*args, **kwargs) - return _traced_execute_pipeline + return _instrumented_execute_pipeline diff --git a/ddtrace/contrib/redis_utils.py b/ddtrace/contrib/redis_utils.py index fa605cdeaa8..9627bcbf24f 100644 --- a/ddtrace/contrib/redis_utils.py +++ b/ddtrace/contrib/redis_utils.py @@ -64,7 +64,7 @@ def determine_row_count(redis_command: str, result: Optional[Union[List, Dict, s return 0 -async def _run_redis_command_async(span, func, args, kwargs): +async def _run_redis_command_async(ctx: core.ExecutionContext, func, args, kwargs): parsed_command = stringify_cache_args(args) redis_command = parsed_command.split(" ")[0] rowcount = None @@ -79,4 +79,4 @@ async def _run_redis_command_async(span, func, args, kwargs): rowcount = determine_row_count(redis_command=redis_command, result=result) if redis_command not in ROW_RETURNING_COMMANDS: rowcount = None - core.dispatch("redis.async_command.post", [span, rowcount]) + core.dispatch("redis.async_command.post", [ctx, rowcount]) diff --git a/ddtrace/contrib/rediscluster/patch.py b/ddtrace/contrib/rediscluster/patch.py index 0f52160f987..0ed6ffbda33 100644 --- a/ddtrace/contrib/rediscluster/patch.py +++ b/ddtrace/contrib/rediscluster/patch.py @@ -8,8 +8,8 @@ from ddtrace.constants import ANALYTICS_SAMPLE_RATE_KEY from ddtrace.constants import SPAN_KIND from ddtrace.constants import SPAN_MEASURED_KEY -from ddtrace.contrib.redis.patch import traced_execute_command -from ddtrace.contrib.redis.patch import traced_pipeline +from ddtrace.contrib.redis.patch import instrumented_execute_command +from ddtrace.contrib.redis.patch import instrumented_pipeline from ddtrace.ext import SpanKind from ddtrace.ext import SpanTypes from ddtrace.ext import db @@ -54,13 +54,13 @@ def patch(): _w = wrapt.wrap_function_wrapper if REDISCLUSTER_VERSION >= (2, 0, 0): - _w("rediscluster", "client.RedisCluster.execute_command", traced_execute_command(config.rediscluster)) - _w("rediscluster", "client.RedisCluster.pipeline", traced_pipeline) + _w("rediscluster", "client.RedisCluster.execute_command", instrumented_execute_command(config.rediscluster)) + _w("rediscluster", "client.RedisCluster.pipeline", instrumented_pipeline) _w("rediscluster", "pipeline.ClusterPipeline.execute", traced_execute_pipeline) Pin().onto(rediscluster.RedisCluster) else: - _w("rediscluster", "StrictRedisCluster.execute_command", traced_execute_command(config.rediscluster)) - _w("rediscluster", "StrictRedisCluster.pipeline", traced_pipeline) + _w("rediscluster", "StrictRedisCluster.execute_command", instrumented_execute_command(config.rediscluster)) + _w("rediscluster", "StrictRedisCluster.pipeline", instrumented_pipeline) _w("rediscluster", "StrictClusterPipeline.execute", traced_execute_pipeline) Pin().onto(rediscluster.StrictRedisCluster) diff --git a/ddtrace/contrib/yaaredis/patch.py b/ddtrace/contrib/yaaredis/patch.py index a23b0d86cc2..581531f7ad3 100644 --- a/ddtrace/contrib/yaaredis/patch.py +++ b/ddtrace/contrib/yaaredis/patch.py @@ -3,8 +3,8 @@ import yaaredis from ddtrace import config -from ddtrace._trace.utils_redis import _trace_redis_cmd -from ddtrace._trace.utils_redis import _trace_redis_execute_pipeline +from ddtrace._trace.utils_redis import _instrument_redis_cmd +from ddtrace._trace.utils_redis import _instrument_redis_execute_pipeline from ddtrace.contrib.redis_utils import _run_redis_command_async from ddtrace.vendor import wrapt @@ -61,8 +61,8 @@ async def traced_execute_command(func, instance, args, kwargs): if not pin or not pin.enabled(): return await func(*args, **kwargs) - with _trace_redis_cmd(pin, config.yaaredis, instance, args) as span: - return await _run_redis_command_async(span=span, func=func, args=args, kwargs=kwargs) + with _instrument_redis_cmd(pin, config.yaaredis, instance, args) as ctx: + return await _run_redis_command_async(ctx=ctx, func=func, args=args, kwargs=kwargs) async def traced_pipeline(func, instance, args, kwargs): @@ -79,5 +79,5 @@ async def traced_execute_pipeline(func, instance, args, kwargs): return await func(*args, **kwargs) cmds = [stringify_cache_args(c, cmd_max_len=config.yaaredis.cmd_max_length) for c, _ in instance.command_stack] - with _trace_redis_execute_pipeline(pin, config.yaaredis, cmds, instance): + with _instrument_redis_execute_pipeline(pin, config.yaaredis, cmds, instance): return await func(*args, **kwargs) From 3ea8bd887382d6a328b3b64430341a946c3fde1b Mon Sep 17 00:00:00 2001 From: Christophe Papazian <114495376+christophe-papazian@users.noreply.github.com> Date: Mon, 6 May 2024 15:39:36 +0200 Subject: [PATCH 07/26] chore(asm): add support for content-length header with asm blocked response (#9151) - Add support for the content-length header when ASM bypasses the normal response and block the request. - Improve unit test on response header blocking to check the header is properly set. - Goal is to pass https://github.com/DataDog/system-tests/blob/main/tests/appsec/waf/test_blocking.py#L202 APPSEC-52730 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/_trace/trace_handlers.py | 4 ++-- ddtrace/appsec/_asm_request_context.py | 1 + ddtrace/contrib/django/patch.py | 1 + ddtrace/contrib/flask/patch.py | 31 +++++++++++++++----------- ddtrace/contrib/wsgi/wsgi.py | 10 ++++----- tests/appsec/contrib_appsec/utils.py | 19 +++++++++++----- 6 files changed, 40 insertions(+), 26 deletions(-) diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py index de4b4686828..3fee65b575e 100644 --- a/ddtrace/_trace/trace_handlers.py +++ b/ddtrace/_trace/trace_handlers.py @@ -192,7 +192,7 @@ def _wsgi_make_block_content(ctx, construct_url): req_span.set_tag_str(http.USER_AGENT, user_agent) except Exception as e: log.warning("Could not set some span tags on blocked request: %s", str(e)) # noqa: G200 - + resp_headers.append(("Content-Length", str(len(content)))) return status, resp_headers, content @@ -240,7 +240,7 @@ def _asgi_make_block_content(ctx, url): req_span.set_tag_str(http.USER_AGENT, user_agent) except Exception as e: log.warning("Could not set some span tags on blocked request: %s", str(e)) # noqa: G200 - + resp_headers.append((b"Content-Length", str(len(content)).encode())) return status, resp_headers, content diff --git a/ddtrace/appsec/_asm_request_context.py b/ddtrace/appsec/_asm_request_context.py index ec88464cabe..6e03dae6bb4 100644 --- a/ddtrace/appsec/_asm_request_context.py +++ b/ddtrace/appsec/_asm_request_context.py @@ -550,6 +550,7 @@ def _on_block_decided(callback): return set_value(_CALLBACKS, "flask_block", callback) + core.on("flask.block.request.content", callback, "block_requested") def _get_headers_if_appsec(): diff --git a/ddtrace/contrib/django/patch.py b/ddtrace/contrib/django/patch.py index 670e94fe1ba..e3c8c7c2284 100644 --- a/ddtrace/contrib/django/patch.py +++ b/ddtrace/contrib/django/patch.py @@ -484,6 +484,7 @@ def blocked_response(): content = http_utils._get_blocked_template(ctype) response = HttpResponse(content, content_type=ctype, status=status) response.content = content + response["Content-Length"] = len(content.encode()) utils._after_request_tags(pin, ctx["call"], request, response) return response diff --git a/ddtrace/contrib/flask/patch.py b/ddtrace/contrib/flask/patch.py index c9f3c038b63..a1e9f19bb10 100644 --- a/ddtrace/contrib/flask/patch.py +++ b/ddtrace/contrib/flask/patch.py @@ -109,23 +109,28 @@ def _wrapped_start_response(self, start_response, ctx, status_code, headers, exc core.dispatch("flask.start_response.pre", (flask.request, ctx, config.flask, status_code, headers)) if not core.get_item(HTTP_REQUEST_BLOCKED): headers_from_context = "" - result = core.dispatch_with_results("flask.start_response", ("Flask",)).waf - if result: - headers_from_context = result.value + result_waf = core.dispatch_with_results("flask.start_response", ("Flask",)).waf + if result_waf: + headers_from_context = result_waf.value if core.get_item(HTTP_REQUEST_BLOCKED): # response code must be set here, or it will be too late - block_config = core.get_item(HTTP_REQUEST_BLOCKED) - desired_type = block_config.get("type", "auto") - status = block_config.get("status_code", 403) - if desired_type == "none": - response_headers = [] + result_content = core.dispatch_with_results("flask.block.request.content", ()).block_requested + if result_content: + _, status, response_headers = result_content.value + result = start_response(str(status), response_headers) else: - if block_config.get("type", "auto") == "auto": - ctype = "text/html" if "text/html" in headers_from_context else "text/json" + block_config = core.get_item(HTTP_REQUEST_BLOCKED) + desired_type = block_config.get("type", "auto") + status = block_config.get("status_code", 403) + if desired_type == "none": + response_headers = [] else: - ctype = "text/" + block_config["type"] - response_headers = [("content-type", ctype)] - result = start_response(str(status), response_headers) + if block_config.get("type", "auto") == "auto": + ctype = "text/html" if "text/html" in headers_from_context else "text/json" + else: + ctype = "text/" + block_config["type"] + response_headers = [("content-type", ctype)] + result = start_response(str(status), response_headers) core.dispatch("flask.start_response.blocked", (ctx, config.flask, response_headers, status)) else: result = start_response(status_code, headers) diff --git a/ddtrace/contrib/wsgi/wsgi.py b/ddtrace/contrib/wsgi/wsgi.py index aff74e3b0a0..a33e7e24e06 100644 --- a/ddtrace/contrib/wsgi/wsgi.py +++ b/ddtrace/contrib/wsgi/wsgi.py @@ -141,12 +141,12 @@ def blocked_view(): core.dispatch("wsgi.app.exception", (ctx,)) raise else: + if core.get_item(HTTP_REQUEST_BLOCKED): + _, _, content = core.dispatch_with_results( + "wsgi.block.started", (ctx, construct_url) + ).status_headers_content.value or (None, None, "") + closing_iterable = [content] core.dispatch("wsgi.app.success", (ctx, closing_iterable)) - if core.get_item(HTTP_REQUEST_BLOCKED): - _, _, content = core.dispatch_with_results( - "wsgi.block.started", (ctx, construct_url) - ).status_headers_content.value or (None, None, "") - closing_iterable = [content] result = core.dispatch_with_results( "wsgi.request.complete", (ctx, closing_iterable, self.app_is_iterator) diff --git a/tests/appsec/contrib_appsec/utils.py b/tests/appsec/contrib_appsec/utils.py index 41d4f5e2b8d..a1f6441db56 100644 --- a/tests/appsec/contrib_appsec/utils.py +++ b/tests/appsec/contrib_appsec/utils.py @@ -3,6 +3,7 @@ import json from typing import Dict from typing import List +from urllib.parse import quote from urllib.parse import urlencode import pytest @@ -661,16 +662,16 @@ def test_request_suspicious_request_block_match_response_status( @pytest.mark.parametrize("asm_enabled", [True, False]) @pytest.mark.parametrize("metastruct", [True, False]) @pytest.mark.parametrize( - ("uri", "blocked"), + ("uri", "headers", "blocked"), [ - ("/asm/1/a", False), - ("/asm/1/a?headers=header_name=MagicKey_Al4h7iCFep9s1", "tst-037-009"), - ("/asm/1/a?headers=key=anythin,header_name=HiddenMagicKey_Al4h7iCFep9s1Value", "tst-037-009"), - ("/asm/1/a?headers=header_name=NoWorryBeHappy", None), + ("/asm/1/a", {}, False), + ("/asm/1/a", {"header_name": "MagicKey_Al4h7iCFep9s1"}, "tst-037-009"), + ("/asm/1/a", {"key": "anything", "header_name": "HiddenMagicKey_Al4h7iCFep9s1Value"}, "tst-037-009"), + ("/asm/1/a", {"header_name": "NoWorryBeHappy"}, None), ], ) def test_request_suspicious_request_block_match_response_headers( - self, interface: Interface, get_tag, asm_enabled, metastruct, root_span, uri, blocked + self, interface: Interface, get_tag, asm_enabled, metastruct, root_span, uri, headers, blocked ): from ddtrace.ext import http @@ -678,6 +679,8 @@ def test_request_suspicious_request_block_match_response_headers( dict(_asm_enabled=asm_enabled, _use_metastruct_for_triggers=metastruct) ), override_env(dict(DD_APPSEC_RULES=rules.RULES_SRB)): self.update_tracer(interface) + if headers: + uri += "?headers=" + quote(",".join(f"{k}={v}" for k, v in headers.items())) response = interface.client.get(uri) # DEV Warning: encoded URL will behave differently assert get_tag(http.URL) == "http://localhost:8000" + uri @@ -691,10 +694,14 @@ def test_request_suspicious_request_block_match_response_headers( get_tag(asm_constants.SPAN_DATA_NAMES.RESPONSE_HEADERS_NO_COOKIES + ".content-type") == "text/json" ) assert self.headers(response)["content-type"] == "text/json" + for k in headers: + assert k not in self.headers(response) else: assert self.status(response) == 200 assert get_tag(http.STATUS_CODE) == "200" assert get_triggers(root_span()) is None + assert "content-length" in self.headers(response) + assert int(self.headers(response)["content-length"]) == len(self.body(response).encode()) LARGE_BODY = { f"key_{i}": {f"key_{i}_{j}": {f"key_{i}_{j}_{k}": f"value_{i}_{j}_{k}" for k in range(4)} for j in range(4)} From 48d0a16872e80bec2187c39d7ae5301dc411e742 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Martinez Date: Mon, 6 May 2024 17:52:07 +0200 Subject: [PATCH 08/26] chore(iast): add SSRF support for urllib3 (#9153) ## Description Adds support for detecting and reporting SSRF in `urllib3.request()`. ## Checklist - [X] Change(s) are motivated and described in the PR description - [X] Testing strategy is described if automated tests are not included in the PR - [X] Risks are described (performance impact, potential for breakage, maintainability) - [X] Change is maintainable (easy to change, telemetry, documentation) - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [X] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez --- .../requirements/{1a3fc04.txt => 18695ab.txt} | 8 +- .../requirements/{ec026f0.txt => 1aedbda.txt} | 14 +- .../requirements/{1ce89c8.txt => 4628049.txt} | 12 +- .../requirements/{e561a1c.txt => 58881b9.txt} | 14 +- .../requirements/{c14fe8f.txt => 63b44e2.txt} | 14 +- .../requirements/{43b3aa8.txt => aba89a0.txt} | 12 +- ddtrace/appsec/_iast/taint_sinks/ssrf.py | 3 +- ddtrace/contrib/urllib3/patch.py | 6 + riotfile.py | 1 + tests/appsec/iast/taint_sinks/test_ssrf.py | 137 ++++++++++++------ 10 files changed, 135 insertions(+), 86 deletions(-) rename .riot/requirements/{1a3fc04.txt => 18695ab.txt} (89%) rename .riot/requirements/{ec026f0.txt => 1aedbda.txt} (79%) rename .riot/requirements/{1ce89c8.txt => 4628049.txt} (81%) rename .riot/requirements/{e561a1c.txt => 58881b9.txt} (79%) rename .riot/requirements/{c14fe8f.txt => 63b44e2.txt} (84%) rename .riot/requirements/{43b3aa8.txt => aba89a0.txt} (81%) diff --git a/.riot/requirements/1a3fc04.txt b/.riot/requirements/18695ab.txt similarity index 89% rename from .riot/requirements/1a3fc04.txt rename to .riot/requirements/18695ab.txt index df8eafb0e43..4e7266493b5 100644 --- a/.riot/requirements/1a3fc04.txt +++ b/.riot/requirements/18695ab.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.7 # by the following command: # -# pip-compile --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/1a3fc04.in +# pip-compile --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/18695ab.in # astunparse==1.6.3 attrs==23.2.0 @@ -11,12 +11,12 @@ cffi==1.15.1 charset-normalizer==3.3.2 coverage[toml]==7.2.7 cryptography==42.0.5 -exceptiongroup==1.2.0 +exceptiongroup==1.2.1 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.62.2 hypothesis==6.45.0 -idna==3.6 +idna==3.7 importlib-metadata==6.7.0 iniconfig==2.0.0 mock==5.1.0 diff --git a/.riot/requirements/ec026f0.txt b/.riot/requirements/1aedbda.txt similarity index 79% rename from .riot/requirements/ec026f0.txt rename to .riot/requirements/1aedbda.txt index 8746b3084b2..811a3601483 100644 --- a/.riot/requirements/ec026f0.txt +++ b/.riot/requirements/1aedbda.txt @@ -2,31 +2,31 @@ # This file is autogenerated by pip-compile with Python 3.8 # by the following command: # -# pip-compile --no-annotate .riot/requirements/ec026f0.in +# pip-compile --no-annotate .riot/requirements/1aedbda.in # astunparse==1.6.3 attrs==23.2.0 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 cryptography==42.0.5 -exceptiongroup==1.2.0 +exceptiongroup==1.2.1 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.63.0 hypothesis==6.45.0 -idna==3.6 +idna==3.7 iniconfig==2.0.0 mock==5.1.0 opentracing==2.4.0 packaging==24.0 -pluggy==1.4.0 +pluggy==1.5.0 protobuf==4.25.3 psycopg2-binary==2.9.9 pycparser==2.22 pycryptodome==3.20.0 -pytest==8.1.1 +pytest==8.2.0 pytest-cov==5.0.0 pytest-mock==3.14.0 requests==2.31.0 diff --git a/.riot/requirements/1ce89c8.txt b/.riot/requirements/4628049.txt similarity index 81% rename from .riot/requirements/1ce89c8.txt rename to .riot/requirements/4628049.txt index 62492031406..f7b203ad964 100644 --- a/.riot/requirements/1ce89c8.txt +++ b/.riot/requirements/4628049.txt @@ -2,30 +2,30 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --no-annotate .riot/requirements/1ce89c8.in +# pip-compile --no-annotate .riot/requirements/4628049.in # astunparse==1.6.3 attrs==23.2.0 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 cryptography==42.0.5 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.63.0 hypothesis==6.45.0 -idna==3.6 +idna==3.7 iniconfig==2.0.0 mock==5.1.0 opentracing==2.4.0 packaging==24.0 -pluggy==1.4.0 +pluggy==1.5.0 protobuf==4.25.3 psycopg2-binary==2.9.9 pycparser==2.22 pycryptodome==3.20.0 -pytest==8.1.1 +pytest==8.2.0 pytest-cov==5.0.0 pytest-mock==3.14.0 requests==2.31.0 diff --git a/.riot/requirements/e561a1c.txt b/.riot/requirements/58881b9.txt similarity index 79% rename from .riot/requirements/e561a1c.txt rename to .riot/requirements/58881b9.txt index bac839e3526..bf9352d848c 100644 --- a/.riot/requirements/e561a1c.txt +++ b/.riot/requirements/58881b9.txt @@ -2,31 +2,31 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --no-annotate .riot/requirements/e561a1c.in +# pip-compile --no-annotate .riot/requirements/58881b9.in # astunparse==1.6.3 attrs==23.2.0 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 cryptography==42.0.5 -exceptiongroup==1.2.0 +exceptiongroup==1.2.1 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.63.0 hypothesis==6.45.0 -idna==3.6 +idna==3.7 iniconfig==2.0.0 mock==5.1.0 opentracing==2.4.0 packaging==24.0 -pluggy==1.4.0 +pluggy==1.5.0 protobuf==4.25.3 psycopg2-binary==2.9.9 pycparser==2.22 pycryptodome==3.20.0 -pytest==8.1.1 +pytest==8.2.0 pytest-cov==5.0.0 pytest-mock==3.14.0 requests==2.31.0 diff --git a/.riot/requirements/c14fe8f.txt b/.riot/requirements/63b44e2.txt similarity index 84% rename from .riot/requirements/c14fe8f.txt rename to .riot/requirements/63b44e2.txt index 46e0b6b1c78..8f5fa212cd9 100644 --- a/.riot/requirements/c14fe8f.txt +++ b/.riot/requirements/63b44e2.txt @@ -2,31 +2,31 @@ # This file is autogenerated by pip-compile with python 3.9 # To update, run: # -# pip-compile --no-annotate --resolver=backtracking .riot/requirements/c14fe8f.in +# pip-compile --no-annotate --resolver=backtracking .riot/requirements/63b44e2.in # astunparse==1.6.3 attrs==23.2.0 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 cryptography==42.0.5 -exceptiongroup==1.2.0 +exceptiongroup==1.2.1 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.63.0 hypothesis==6.45.0 -idna==3.6 +idna==3.7 iniconfig==2.0.0 mock==5.1.0 opentracing==2.4.0 packaging==24.0 -pluggy==1.4.0 +pluggy==1.5.0 protobuf==4.25.3 psycopg2-binary==2.9.9 pycparser==2.22 pycryptodome==3.20.0 -pytest==8.1.1 +pytest==8.2.0 pytest-cov==5.0.0 pytest-mock==3.14.0 requests==2.31.0 diff --git a/.riot/requirements/43b3aa8.txt b/.riot/requirements/aba89a0.txt similarity index 81% rename from .riot/requirements/43b3aa8.txt rename to .riot/requirements/aba89a0.txt index bb8bb2cea5b..6b3d997b9ab 100644 --- a/.riot/requirements/43b3aa8.txt +++ b/.riot/requirements/aba89a0.txt @@ -2,30 +2,30 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --no-annotate .riot/requirements/43b3aa8.in +# pip-compile --no-annotate .riot/requirements/aba89a0.in # astunparse==1.6.3 attrs==23.2.0 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 cryptography==42.0.5 googleapis-common-protos==1.63.0 greenlet==3.0.3 -grpcio==1.62.1 +grpcio==1.63.0 hypothesis==6.45.0 -idna==3.6 +idna==3.7 iniconfig==2.0.0 mock==5.1.0 opentracing==2.4.0 packaging==24.0 -pluggy==1.4.0 +pluggy==1.5.0 protobuf==4.25.3 psycopg2-binary==2.9.9 pycparser==2.22 pycryptodome==3.20.0 -pytest==8.1.1 +pytest==8.2.0 pytest-cov==5.0.0 pytest-mock==3.14.0 requests==2.31.0 diff --git a/ddtrace/appsec/_iast/taint_sinks/ssrf.py b/ddtrace/appsec/_iast/taint_sinks/ssrf.py index 7a070cf5425..b6d1300de7f 100644 --- a/ddtrace/appsec/_iast/taint_sinks/ssrf.py +++ b/ddtrace/appsec/_iast/taint_sinks/ssrf.py @@ -22,7 +22,8 @@ class SSRF(VulnerabilityBase): def _iast_report_ssrf(func: Callable, *args, **kwargs): - report_ssrf = kwargs.get("url", False) + report_ssrf = args[1] if len(args) > 1 else kwargs.get("url", None) + if report_ssrf: from .._metrics import _set_metric_iast_executed_sink diff --git a/ddtrace/contrib/urllib3/patch.py b/ddtrace/contrib/urllib3/patch.py index a2ae3df30e9..5a46b5f0e74 100644 --- a/ddtrace/contrib/urllib3/patch.py +++ b/ddtrace/contrib/urllib3/patch.py @@ -3,6 +3,7 @@ import urllib3 from ddtrace import config +from ddtrace.appsec._common_module_patches import wrapped_request_D8CB81E472AF98A2 as _wrap_request from ddtrace.internal.constants import COMPONENT from ddtrace.internal.schema.span_attribute_schema import SpanDirection from ddtrace.pin import Pin @@ -50,6 +51,11 @@ def patch(): urllib3.__datadog_patch = True _w("urllib3", "connectionpool.HTTPConnectionPool.urlopen", _wrap_urlopen) + if hasattr(urllib3, "_request_methods"): + _w("urllib3._request_methods", "RequestMethods.request", _wrap_request) + else: + # Old version before https://github.com/urllib3/urllib3/pull/2398 + _w("urllib3.request", "RequestMethods.request", _wrap_request) Pin().onto(urllib3.connectionpool.HTTPConnectionPool) diff --git a/riotfile.py b/riotfile.py index e52d53f37b1..2872bac2d31 100644 --- a/riotfile.py +++ b/riotfile.py @@ -136,6 +136,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): command="pytest -v {cmdargs} tests/appsec/iast/", pkgs={ "requests": latest, + "urllib3": latest, "pycryptodome": latest, "cryptography": latest, "astunparse": latest, diff --git a/tests/appsec/iast/taint_sinks/test_ssrf.py b/tests/appsec/iast/taint_sinks/test_ssrf.py index 49053f0b07b..00a461ff77e 100644 --- a/tests/appsec/iast/taint_sinks/test_ssrf.py +++ b/tests/appsec/iast/taint_sinks/test_ssrf.py @@ -6,7 +6,8 @@ from ddtrace.appsec._iast._taint_tracking import taint_pyobject from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_SSRF -from ddtrace.contrib.requests.patch import patch +from ddtrace.contrib.requests.patch import patch as requests_patch +from ddtrace.contrib.urllib3.patch import patch as urllib3_patch from ddtrace.internal import core from tests.appsec.iast.iast_utils import get_line_and_hash from tests.utils import override_global_config @@ -19,73 +20,113 @@ def setup(): oce._enabled = True -def test_ssrf(tracer, iast_span_defaults): +def _get_tainted_url(): + tainted_path = taint_pyobject( + pyobject="forbidden_dir/", + source_name="test_ssrf", + source_value="forbidden_dir/", + source_origin=OriginType.PARAMETER, + ) + return add_aspect("http://localhost/", tainted_path), tainted_path + + +def _check_report(span_report, tainted_path, label): + data = span_report.build_and_scrub_value_parts() + + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_SSRF + assert vulnerability["evidence"]["valueParts"] == [ + {"value": "http://localhost/"}, + {"source": 0, "value": tainted_path}, + ] + assert "value" not in vulnerability["evidence"].keys() + assert vulnerability["evidence"].get("pattern") is None + assert vulnerability["evidence"].get("redacted") is None + assert source["name"] == "test_ssrf" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == tainted_path + + line, hash_value = get_line_and_hash(label, VULN_SSRF, filename=FIXTURES_PATH) + assert vulnerability["location"]["path"] == FIXTURES_PATH + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value + + +def test_ssrf_requests(tracer, iast_span_defaults): with override_global_config(dict(_iast_enabled=True)): - patch() + requests_patch() import requests from requests.exceptions import ConnectionError - tainted_path = taint_pyobject( - pyobject="forbidden_dir/", - source_name="test_ssrf", - source_value="forbidden_dir/", - source_origin=OriginType.PARAMETER, - ) - url = add_aspect("http://localhost/", tainted_path) + tainted_url, tainted_path = _get_tainted_url() try: - # label test_ssrf - requests.get(url) + # label test_ssrf_requests + requests.get(tainted_url) except ConnectionError: pass + + span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) + assert span_report + _check_report(span_report, tainted_path, "test_ssrf_requests") + + +def test_ssrf_urllib3(tracer, iast_span_defaults): + with override_global_config(dict(_iast_enabled=True)): + urllib3_patch() + import urllib3 + + tainted_url, tainted_path = _get_tainted_url() + try: + # label test_ssrf_urllib3 + urllib3.request(method="GET", url=tainted_url) + except urllib3.exceptions.HTTPError: + pass + span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - data = span_report.build_and_scrub_value_parts() - - vulnerability = data["vulnerabilities"][0] - source = data["sources"][0] - assert vulnerability["type"] == VULN_SSRF - assert vulnerability["evidence"]["valueParts"] == [ - {"value": "http://localhost/"}, - {"source": 0, "value": tainted_path}, - ] - assert "value" not in vulnerability["evidence"].keys() - assert vulnerability["evidence"].get("pattern") is None - assert vulnerability["evidence"].get("redacted") is None - assert source["name"] == "test_ssrf" - assert source["origin"] == OriginType.PARAMETER - assert source["value"] == tainted_path - - line, hash_value = get_line_and_hash("test_ssrf", VULN_SSRF, filename=FIXTURES_PATH) - assert vulnerability["location"]["path"] == FIXTURES_PATH - assert vulnerability["location"]["line"] == line - assert vulnerability["hash"] == hash_value + _check_report(span_report, tainted_path, "test_ssrf_urllib3") + + +def _check_no_report_if_deduplicated(span_report, num_vuln_expected): + if num_vuln_expected == 0: + assert span_report is None + else: + assert span_report + + assert len(span_report.vulnerabilities) == num_vuln_expected @pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) -def test_ssrf_deduplication(num_vuln_expected, tracer, iast_span_deduplication_enabled): - patch() +def test_ssrf_requests_deduplication(num_vuln_expected, tracer, iast_span_deduplication_enabled): + requests_patch() import requests from requests.exceptions import ConnectionError - tainted_path = taint_pyobject( - pyobject="forbidden_dir/", - source_name="test_ssrf", - source_value="forbidden_dir/", - source_origin=OriginType.PARAMETER, - ) - url = add_aspect("http://localhost/", tainted_path) + tainted_url, tainted_path = _get_tainted_url() for _ in range(0, 5): try: - # label test_ssrf - requests.get(url) + # label test_ssrf_requests_deduplication + requests.get(tainted_url) except ConnectionError: pass span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_deduplication_enabled) + _check_no_report_if_deduplicated(span_report, num_vuln_expected) - if num_vuln_expected == 0: - assert span_report is None - else: - assert span_report - assert len(span_report.vulnerabilities) == num_vuln_expected +@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0]) +def test_ssrf_urllib3_deduplication(num_vuln_expected, tracer, iast_span_deduplication_enabled): + urllib3_patch() + import urllib3 + + tainted_url, tainted_path = _get_tainted_url() + for _ in range(0, 5): + try: + # label test_ssrf_urllib3_deduplication + urllib3.request(method="GET", url=tainted_url) + except urllib3.exceptions.HTTPError: + pass + + span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_deduplication_enabled) + _check_no_report_if_deduplicated(span_report, num_vuln_expected) From 9e862f9617351fc88cd6e7c1150100495cf077e9 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Mon, 6 May 2024 12:44:00 -0400 Subject: [PATCH 09/26] fix: pytest deadlock when using gevent (#9141) Fixes #8281 This fix resolves an issue when using ``pytest`` + ``gevent`` where the telemetry writer was eager initialized by ``pytest`` entrypoints loading of our plugin causing a potential dead lock. ## Reproduction The specific case that we targeted solving with this PR: ```python # test.py import ddtrace import gevent.monkey gevent.monkey.patch_all() def test_thing(): pass ``` ```shell $ pytest test.py $ pytest -p ddtrace -p ddtrace.pytest_bdd -p ddtrace.pytest_benchmark test.py $ pytest -p no:ddtrace -p no:ddtrace.pytest_bdd -p no:ddtrace.pytest_benchmark test.py ``` Previously would dead-lock, but now will execute without problem. ## Trade offs We no longer get telemetry enabled and sending data just from `import ddtrace`. Moving telemetry writer starting to `ddtrace.bootstrap.preload` will mean that any unhandled exceptions that occur before the telemetry writer can be started will not get reported. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .riot/requirements/17fe3fa.txt | 22 --------- .riot/requirements/1b90fc9.txt | 29 ++++++++++++ .riot/requirements/1d12360.txt | 24 ---------- .riot/requirements/1d45085.txt | 24 ---------- .riot/requirements/1ea9ac5.txt | 20 -------- .riot/requirements/27d0ff3.txt | 31 +++++++++++++ .../requirements/{11fe1f6.txt => 2d19e52.txt} | 17 +++++-- .riot/requirements/61891b4.txt | 31 +++++++++++++ .riot/requirements/d171c08.txt | 27 +++++++++++ .riot/requirements/daaf281.txt | 20 -------- .riot/requirements/de578a7.txt | 27 +++++++++++ ddtrace/__init__.py | 10 ---- ddtrace/bootstrap/preload.py | 12 +++++ ddtrace/internal/runtime/runtime_metrics.py | 13 ++++-- ...riter-initialization-163b1effc09d948f.yaml | 4 ++ riotfile.py | 1 + tests/ci_visibility/test_cli.py | 46 +++++++++++++++++++ tests/telemetry/test_telemetry.py | 16 +++---- 18 files changed, 235 insertions(+), 139 deletions(-) delete mode 100644 .riot/requirements/17fe3fa.txt create mode 100644 .riot/requirements/1b90fc9.txt delete mode 100644 .riot/requirements/1d12360.txt delete mode 100644 .riot/requirements/1d45085.txt delete mode 100644 .riot/requirements/1ea9ac5.txt create mode 100644 .riot/requirements/27d0ff3.txt rename .riot/requirements/{11fe1f6.txt => 2d19e52.txt} (61%) create mode 100644 .riot/requirements/61891b4.txt create mode 100644 .riot/requirements/d171c08.txt delete mode 100644 .riot/requirements/daaf281.txt create mode 100644 .riot/requirements/de578a7.txt create mode 100644 releasenotes/notes/fix-telemetry-writer-initialization-163b1effc09d948f.yaml create mode 100644 tests/ci_visibility/test_cli.py diff --git a/.riot/requirements/17fe3fa.txt b/.riot/requirements/17fe3fa.txt deleted file mode 100644 index dc9f9f2765d..00000000000 --- a/.riot/requirements/17fe3fa.txt +++ /dev/null @@ -1,22 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --no-annotate .riot/requirements/17fe3fa.in -# -attrs==23.1.0 -coverage[toml]==7.3.4 -exceptiongroup==1.2.0 -hypothesis==6.45.0 -iniconfig==2.0.0 -mock==5.1.0 -msgpack==1.0.7 -opentracing==2.4.0 -packaging==23.2 -pluggy==1.3.0 -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 -pytest-randomly==3.15.0 -sortedcontainers==2.4.0 -tomli==2.0.1 diff --git a/.riot/requirements/1b90fc9.txt b/.riot/requirements/1b90fc9.txt new file mode 100644 index 00000000000..3bf89e44bb0 --- /dev/null +++ b/.riot/requirements/1b90fc9.txt @@ -0,0 +1,29 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --no-annotate --resolver=backtracking .riot/requirements/1b90fc9.in +# +attrs==23.2.0 +coverage[toml]==7.5.0 +exceptiongroup==1.2.1 +gevent==24.2.1 +greenlet==3.0.3 +hypothesis==6.45.0 +iniconfig==2.0.0 +mock==5.1.0 +msgpack==1.0.8 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.5.0 +pytest==8.2.0 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-randomly==3.15.0 +sortedcontainers==2.4.0 +tomli==2.0.1 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.riot/requirements/1d12360.txt b/.riot/requirements/1d12360.txt deleted file mode 100644 index feb4de14f32..00000000000 --- a/.riot/requirements/1d12360.txt +++ /dev/null @@ -1,24 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.8 -# by the following command: -# -# pip-compile --no-annotate .riot/requirements/1d12360.in -# -attrs==23.1.0 -coverage[toml]==7.3.4 -exceptiongroup==1.2.0 -hypothesis==6.45.0 -importlib-metadata==7.0.0 -iniconfig==2.0.0 -mock==5.1.0 -msgpack==1.0.7 -opentracing==2.4.0 -packaging==23.2 -pluggy==1.3.0 -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 -pytest-randomly==3.15.0 -sortedcontainers==2.4.0 -tomli==2.0.1 -zipp==3.17.0 diff --git a/.riot/requirements/1d45085.txt b/.riot/requirements/1d45085.txt deleted file mode 100644 index 121f8f33d40..00000000000 --- a/.riot/requirements/1d45085.txt +++ /dev/null @@ -1,24 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --no-annotate .riot/requirements/1d45085.in -# -attrs==23.1.0 -coverage[toml]==7.3.4 -exceptiongroup==1.2.0 -hypothesis==6.45.0 -importlib-metadata==7.0.0 -iniconfig==2.0.0 -mock==5.1.0 -msgpack==1.0.7 -opentracing==2.4.0 -packaging==23.2 -pluggy==1.3.0 -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 -pytest-randomly==3.15.0 -sortedcontainers==2.4.0 -tomli==2.0.1 -zipp==3.17.0 diff --git a/.riot/requirements/1ea9ac5.txt b/.riot/requirements/1ea9ac5.txt deleted file mode 100644 index f0b56f982a2..00000000000 --- a/.riot/requirements/1ea9ac5.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --no-annotate .riot/requirements/1ea9ac5.in -# -attrs==23.1.0 -coverage[toml]==7.3.4 -hypothesis==6.45.0 -iniconfig==2.0.0 -mock==5.1.0 -msgpack==1.0.7 -opentracing==2.4.0 -packaging==23.2 -pluggy==1.3.0 -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 -pytest-randomly==3.15.0 -sortedcontainers==2.4.0 diff --git a/.riot/requirements/27d0ff3.txt b/.riot/requirements/27d0ff3.txt new file mode 100644 index 00000000000..4479eff6dea --- /dev/null +++ b/.riot/requirements/27d0ff3.txt @@ -0,0 +1,31 @@ +# +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: +# +# pip-compile --no-annotate .riot/requirements/27d0ff3.in +# +attrs==23.2.0 +coverage[toml]==7.5.0 +exceptiongroup==1.2.1 +gevent==24.2.1 +greenlet==3.0.3 +hypothesis==6.45.0 +importlib-metadata==7.1.0 +iniconfig==2.0.0 +mock==5.1.0 +msgpack==1.0.8 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.5.0 +pytest==8.2.0 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-randomly==3.15.0 +sortedcontainers==2.4.0 +tomli==2.0.1 +zipp==3.18.1 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.riot/requirements/11fe1f6.txt b/.riot/requirements/2d19e52.txt similarity index 61% rename from .riot/requirements/11fe1f6.txt rename to .riot/requirements/2d19e52.txt index c124b57faa5..27f31d1c4d2 100644 --- a/.riot/requirements/11fe1f6.txt +++ b/.riot/requirements/2d19e52.txt @@ -2,20 +2,22 @@ # This file is autogenerated by pip-compile with Python 3.7 # by the following command: # -# pip-compile --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/11fe1f6.in +# pip-compile --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/2d19e52.in # -attrs==23.1.0 +attrs==23.2.0 coverage[toml]==7.2.7 -exceptiongroup==1.2.0 +exceptiongroup==1.2.1 +gevent==22.10.2 +greenlet==3.0.3 hypothesis==6.45.0 importlib-metadata==6.7.0 iniconfig==2.0.0 mock==5.1.0 msgpack==1.0.5 opentracing==2.4.0 -packaging==23.2 +packaging==24.0 pluggy==1.2.0 -pytest==7.4.3 +pytest==7.4.4 pytest-cov==4.1.0 pytest-mock==3.11.1 pytest-randomly==3.12.0 @@ -23,3 +25,8 @@ sortedcontainers==2.4.0 tomli==2.0.1 typing-extensions==4.7.1 zipp==3.15.0 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.riot/requirements/61891b4.txt b/.riot/requirements/61891b4.txt new file mode 100644 index 00000000000..66adc696e0a --- /dev/null +++ b/.riot/requirements/61891b4.txt @@ -0,0 +1,31 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --no-annotate .riot/requirements/61891b4.in +# +attrs==23.2.0 +coverage[toml]==7.5.0 +exceptiongroup==1.2.1 +gevent==24.2.1 +greenlet==3.0.3 +hypothesis==6.45.0 +importlib-metadata==7.1.0 +iniconfig==2.0.0 +mock==5.1.0 +msgpack==1.0.8 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.5.0 +pytest==8.2.0 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-randomly==3.15.0 +sortedcontainers==2.4.0 +tomli==2.0.1 +zipp==3.18.1 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.riot/requirements/d171c08.txt b/.riot/requirements/d171c08.txt new file mode 100644 index 00000000000..a9f69cdfc8b --- /dev/null +++ b/.riot/requirements/d171c08.txt @@ -0,0 +1,27 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --no-annotate .riot/requirements/d171c08.in +# +attrs==23.2.0 +coverage[toml]==7.5.0 +gevent==24.2.1 +greenlet==3.0.3 +hypothesis==6.45.0 +iniconfig==2.0.0 +mock==5.1.0 +msgpack==1.0.8 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.5.0 +pytest==8.2.0 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-randomly==3.15.0 +sortedcontainers==2.4.0 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.riot/requirements/daaf281.txt b/.riot/requirements/daaf281.txt deleted file mode 100644 index a750810974f..00000000000 --- a/.riot/requirements/daaf281.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --no-annotate .riot/requirements/daaf281.in -# -attrs==23.1.0 -coverage[toml]==7.3.4 -hypothesis==6.45.0 -iniconfig==2.0.0 -mock==5.1.0 -msgpack==1.0.7 -opentracing==2.4.0 -packaging==23.2 -pluggy==1.3.0 -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 -pytest-randomly==3.15.0 -sortedcontainers==2.4.0 diff --git a/.riot/requirements/de578a7.txt b/.riot/requirements/de578a7.txt new file mode 100644 index 00000000000..8b543430680 --- /dev/null +++ b/.riot/requirements/de578a7.txt @@ -0,0 +1,27 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --no-annotate .riot/requirements/de578a7.in +# +attrs==23.2.0 +coverage[toml]==7.5.0 +gevent==24.2.1 +greenlet==3.0.3 +hypothesis==6.45.0 +iniconfig==2.0.0 +mock==5.1.0 +msgpack==1.0.8 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.5.0 +pytest==8.2.0 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-randomly==3.15.0 +sortedcontainers==2.4.0 +zope-event==5.0 +zope-interface==6.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/ddtrace/__init__.py b/ddtrace/__init__.py index 6002383a926..3d900b587c7 100644 --- a/ddtrace/__init__.py +++ b/ddtrace/__init__.py @@ -22,16 +22,6 @@ from .settings import _config as config -if config._telemetry_enabled: - from ddtrace.internal import telemetry - - telemetry.install_excepthook() - # In order to support 3.12, we start the writer upon initialization. - # See https://github.com/python/cpython/pull/104826. - # Telemetry events will only be sent after the `app-started` is queued. - # This will occur when the agent writer starts. - telemetry.telemetry_writer.enable() - from ._monkey import patch # noqa: E402 from ._monkey import patch_all # noqa: E402 from .internal.utils.deprecations import DDTraceDeprecationWarning # noqa: E402 diff --git a/ddtrace/bootstrap/preload.py b/ddtrace/bootstrap/preload.py index d23e26f295f..76c16fabaec 100644 --- a/ddtrace/bootstrap/preload.py +++ b/ddtrace/bootstrap/preload.py @@ -42,6 +42,18 @@ def register_post_preload(func: t.Callable) -> None: log = get_logger(__name__) +# Enable telemetry writer and excepthook as early as possible to ensure we capture any exceptions from initialization +if config._telemetry_enabled: + from ddtrace.internal import telemetry + + telemetry.install_excepthook() + # In order to support 3.12, we start the writer upon initialization. + # See https://github.com/python/cpython/pull/104826. + # Telemetry events will only be sent after the `app-started` is queued. + # This will occur when the agent writer starts. + telemetry.telemetry_writer.enable() + + if profiling_config.enabled: log.debug("profiler enabled via environment variable") try: diff --git a/ddtrace/internal/runtime/runtime_metrics.py b/ddtrace/internal/runtime/runtime_metrics.py index 662a3cf0cde..9a8f34423c6 100644 --- a/ddtrace/internal/runtime/runtime_metrics.py +++ b/ddtrace/internal/runtime/runtime_metrics.py @@ -7,14 +7,13 @@ import attr import ddtrace +from ddtrace import config from ddtrace.internal import atexit from ddtrace.internal import forksafe from .. import periodic -from .. import telemetry from ..dogstatsd import get_dogstatsd_client from ..logger import get_logger -from ..telemetry.constants import TELEMETRY_RUNTIMEMETRICS_ENABLED from .constants import DEFAULT_RUNTIME_METRICS from .metric_collectors import GCRuntimeMetricCollector from .metric_collectors import PSUtilRuntimeMetricCollector @@ -24,6 +23,10 @@ log = get_logger(__name__) +if config._telemetry_enabled: + import ddtrace.internal.telemetry as telemetry + from ddtrace.internal.telemetry.constants import TELEMETRY_RUNTIMEMETRICS_ENABLED + class RuntimeCollectorsIterable(object): def __init__(self, enabled=None): @@ -108,7 +111,8 @@ def disable(cls): cls.enabled = False # Report status to telemetry - telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, False, origin="unknown") + if config._telemetry_enabled: + telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, False, origin="unknown") @classmethod def _restart(cls): @@ -135,7 +139,8 @@ def enable(cls, flush_interval=None, tracer=None, dogstatsd_url=None): cls.enabled = True # Report status to telemetry - telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, True, origin="unknown") + if config._telemetry_enabled: + telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, True, origin="unknown") def flush(self): # type: () -> None diff --git a/releasenotes/notes/fix-telemetry-writer-initialization-163b1effc09d948f.yaml b/releasenotes/notes/fix-telemetry-writer-initialization-163b1effc09d948f.yaml new file mode 100644 index 00000000000..f05fe6ef3e7 --- /dev/null +++ b/releasenotes/notes/fix-telemetry-writer-initialization-163b1effc09d948f.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + telemetry: This fix resolves an issue when using ``pytest`` + ``gevent`` where the telemetry writer was eager initialized by ``pytest`` entrypoints loading of our plugin causing a potential dead lock. diff --git a/riotfile.py b/riotfile.py index 2872bac2d31..e634dec0f41 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2604,6 +2604,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): "msgpack": latest, "coverage": latest, "pytest-randomly": latest, + "gevent": latest, }, ), Venv( diff --git a/tests/ci_visibility/test_cli.py b/tests/ci_visibility/test_cli.py new file mode 100644 index 00000000000..a34f0f389be --- /dev/null +++ b/tests/ci_visibility/test_cli.py @@ -0,0 +1,46 @@ +import subprocess + + +def test_pytest_plugin_and_gevent(tmpdir): + """ + The following is a reproduction case for #8281. + + When running pytest with gevent patched, the ddtrace pytest plugin causes a deadlock + or error because of an unreleased lock. + + The cause was eager loading the telemetry writer in ddtrace/__init__.py which gets + loaded by pytest when loading the entrypoints for our plugin. + """ + + test_code = """ +import ddtrace + +import gevent.monkey +gevent.monkey.patch_all() + +def test_thing(): + pass + """ + + test_file = tmpdir / "test.py" + test_file.write(test_code) + + commands_to_test = [ + [ + "pytest", + ], + ["pytest", "-p", "no:ddtrace"], + ["pytest", "-p", "ddtrace"], + ["pytest", "-p", "ddtrace", "-p", "ddtrace.pytest_bdd", "-p", "ddtrace.pytest_benchmark"], + ["pytest", "-p", "no:ddtrace", "-p", "no:ddtrace.pytest_bdd", "-p", "no:ddtrace.pytest_benchmark"], + ] + + for command_args in commands_to_test: + args = command_args + [str(test_file)] + result = subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + assert result.returncode == 0, result.stderr diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py index f1a024c4c21..9d63d2454e2 100644 --- a/tests/telemetry/test_telemetry.py +++ b/tests/telemetry/test_telemetry.py @@ -266,12 +266,11 @@ def process_trace(self, trace): ]["message"] +@pytest.mark.skip(reason="We don't have a way to capture unhandled errors in bootstrap before telemetry is loaded") def test_app_started_error_unhandled_exception(test_agent_session, run_python_code_in_subprocess): - env = get_default_telemetry_env( - {"DD_SPAN_SAMPLING_RULES": "invalid_rules", "DD_INSTRUMENTATION_TELEMETRY_ENABLED": "true"} - ) + env = get_default_telemetry_env({"DD_SPAN_SAMPLING_RULES": "invalid_rules"}) - _, stderr, status, _ = run_python_code_in_subprocess("import ddtrace", env=env) + _, stderr, status, _ = run_python_code_in_subprocess("import ddtrace.auto", env=env) assert status == 1, stderr assert b"Unable to parse DD_SPAN_SAMPLING_RULES=" in stderr @@ -364,14 +363,12 @@ def test_handled_integration_error(test_agent_session, run_python_code_in_subpro assert integration_error["tags"] == ["integration_name:sqlite3", "error_type:attributeerror"] -def test_unhandled_integration_error(test_agent_session, run_python_code_in_subprocess): +def test_unhandled_integration_error(test_agent_session, ddtrace_run_python_code_in_subprocess): + env = get_default_telemetry_env({"DD_PATCH_MODULES": "jinja2:False,subprocess:False"}) code = """ import logging logging.basicConfig() -import ddtrace -ddtrace.patch(flask=True) - import flask f = flask.Flask("hi") @@ -379,7 +376,7 @@ def test_unhandled_integration_error(test_agent_session, run_python_code_in_subp f.wsgi_app() """ - _, stderr, status, _ = run_python_code_in_subprocess(code, env=get_default_telemetry_env()) + _, stderr, status, _ = ddtrace_run_python_code_in_subprocess(code, env=env) assert status == 1, stderr @@ -402,7 +399,6 @@ def test_unhandled_integration_error(test_agent_session, run_python_code_in_subp integration_events = [event for event in events if event["request_type"] == "app-integrations-change"] integrations = integration_events[0]["payload"]["integrations"] assert len(integrations) == 1 - assert integrations[0]["name"] == "flask" assert integrations[0]["enabled"] is True assert integrations[0]["compatible"] is False assert "ddtrace/contrib/flask/patch.py:" in integrations[0]["error"] From c763f8964efbfd9d818dab21e87f862fd9ecd711 Mon Sep 17 00:00:00 2001 From: Munir Abdinur Date: Mon, 6 May 2024 13:25:12 -0400 Subject: [PATCH 10/26] fix(tracing): sampling decision set in span links (#9159) When `context.sampling_priority == -1` trace_flags is set to `1`. This is incorrect. A sampling_priority of -1 or 0 should map to a trace_flag of 0. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Brett Langdon --- ddtrace/propagation/http.py | 2 +- .../fix-sampling-in-spanlink-dt-6eb088879f75cdba.yaml | 4 ++++ tests/tracer/test_propagation.py | 10 +++++----- 3 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/fix-sampling-in-spanlink-dt-6eb088879f75cdba.yaml diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 9a2489cadd4..c0768194d8b 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -904,7 +904,7 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): SpanLink( context.trace_id, context.span_id, - flags=1 if context.sampling_priority else 0, + flags=1 if context.sampling_priority and context.sampling_priority > 0 else 0, tracestate=context._meta.get(W3C_TRACESTATE_KEY, "") if style_w_ctx == _PROPAGATION_STYLE_W3C_TRACECONTEXT else None, diff --git a/releasenotes/notes/fix-sampling-in-spanlink-dt-6eb088879f75cdba.yaml b/releasenotes/notes/fix-sampling-in-spanlink-dt-6eb088879f75cdba.yaml new file mode 100644 index 00000000000..519e8cf8059 --- /dev/null +++ b/releasenotes/notes/fix-sampling-in-spanlink-dt-6eb088879f75cdba.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + tracing: Ensures span links generated by distributed tracing headers record the correct sampling decision. diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py index 2a28b17fd92..e6c263e5d83 100644 --- a/tests/tracer/test_propagation.py +++ b/tests/tracer/test_propagation.py @@ -680,7 +680,7 @@ def test_get_wsgi_header(tracer): # noqa: F811 } TRACECONTEXT_HEADERS_VALID_RUM_NO_SAMPLING_DECISION = { - _HTTP_HEADER_TRACEPARENT: "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01", + _HTTP_HEADER_TRACEPARENT: "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-00", _HTTP_HEADER_TRACESTATE: "dd=o:rum", } @@ -1761,7 +1761,7 @@ def test_extract_tracecontext(headers, expected_context): ( "no_additional_tracestate_support_when_present_but_trace_ids_do_not_match", [PROPAGATION_STYLE_DATADOG, _PROPAGATION_STYLE_W3C_TRACECONTEXT], - ALL_HEADERS, + {**DATADOG_HEADERS_VALID, **TRACECONTEXT_HEADERS_VALID_RUM_NO_SAMPLING_DECISION}, { "trace_id": 13088165645273925489, "span_id": 5678, @@ -1772,8 +1772,8 @@ def test_extract_tracecontext(headers, expected_context): SpanLink( trace_id=TRACE_ID, span_id=67667974448284343, - tracestate=TRACECONTEXT_HEADERS_VALID[_HTTP_HEADER_TRACESTATE], - flags=1, + tracestate="dd=o:rum", + flags=0, attributes={"reason": "terminated_context", "context_headers": "tracecontext"}, ) ], @@ -1824,7 +1824,7 @@ def test_extract_tracecontext(headers, expected_context): "dd_origin": "rum", "meta": { "tracestate": "dd=o:rum", - "traceparent": TRACECONTEXT_HEADERS_VALID[_HTTP_HEADER_TRACEPARENT], + "traceparent": TRACECONTEXT_HEADERS_VALID_RUM_NO_SAMPLING_DECISION[_HTTP_HEADER_TRACEPARENT], LAST_DD_PARENT_ID_KEY: "0000000000000000", }, }, From 43ed11f2288d3677d6aa2d574a8f0d2fd2e9982a Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Mon, 6 May 2024 12:01:29 -0700 Subject: [PATCH 11/26] ci: mark flaky test, verbose output on some suites (#9171) This change adds verbose output to two suites to aid in catching unreliable tests like these: https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60831/workflows/1208499e-b0a0-4f81-a50d-ea38475a11dc/jobs/3816030 https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60831/workflows/1208499e-b0a0-4f81-a50d-ea38475a11dc/jobs/3816040 It also marks an unreliable test in the `celery` suite. https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60798/workflows/7a11bd02-5a05-412e-9e07-c98dd0dd85ea/jobs/3813881 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- riotfile.py | 4 ++-- tests/contrib/celery/test_integration.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/riotfile.py b/riotfile.py index e634dec0f41..edf89b9d147 100644 --- a/riotfile.py +++ b/riotfile.py @@ -440,7 +440,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): env={ "DD_TRACE_AGENT_URL": "http://localhost:8126", }, - command="pytest {cmdargs} tests/internal/", + command="pytest -v {cmdargs} tests/internal/", pkgs={ "httpretty": latest, "gevent": latest, @@ -1698,7 +1698,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): ), Venv( name="grpc", - command="python -m pytest {cmdargs} tests/contrib/grpc", + command="python -m pytest -v {cmdargs} tests/contrib/grpc", pkgs={ "googleapis-common-protos": latest, "pytest-randomly": latest, diff --git a/tests/contrib/celery/test_integration.py b/tests/contrib/celery/test_integration.py index 09c30f2c2ef..8210ad634c7 100644 --- a/tests/contrib/celery/test_integration.py +++ b/tests/contrib/celery/test_integration.py @@ -704,6 +704,7 @@ def fn_task_parameters(user, force_logout=False): assert len(traces) == 2 assert len(traces[0]) + len(traces[1]) == 3 + @flaky(1720288420) def test_beat_scheduler_tracing(self): @self.app.task def fn_task(): From 242e2b02bc56ce38247a7352bb04fa115f32f3a1 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Mon, 6 May 2024 12:22:06 -0700 Subject: [PATCH 12/26] ci: mark flaky tests in the profile suite (#9170) This change temporarily skips unreliable tests in the `profile` suite. Recent failures of these tests on the main branch: https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60782/workflows/078778dd-deb8-4c4d-bb3c-1325370eba89/jobs/3812548 https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60798/workflows/7a11bd02-5a05-412e-9e07-c98dd0dd85ea/jobs/3813849 https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60814/workflows/80c5392d-0991-401f-8119-c290a15141b9/jobs/3814905 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- tests/profiling/collector/test_memalloc.py | 1 + tests/profiling/collector/test_threading.py | 36 +++++++++++---------- tests/profiling/test_uwsgi.py | 2 ++ 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/tests/profiling/collector/test_memalloc.py b/tests/profiling/collector/test_memalloc.py index 17120bd491a..3265b760fee 100644 --- a/tests/profiling/collector/test_memalloc.py +++ b/tests/profiling/collector/test_memalloc.py @@ -183,6 +183,7 @@ def test_memory_collector(): assert count_object > 0 +@flaky(1719591602) @pytest.mark.parametrize( "ignore_profiler", (True, False), diff --git a/tests/profiling/collector/test_threading.py b/tests/profiling/collector/test_threading.py index 8064f1cb474..dbe1e0179f4 100644 --- a/tests/profiling/collector/test_threading.py +++ b/tests/profiling/collector/test_threading.py @@ -8,6 +8,7 @@ from ddtrace.profiling import recorder from ddtrace.profiling.collector import threading as collector_threading +from tests.utils import flaky from . import test_collector @@ -67,13 +68,13 @@ def test_lock_acquire_events(): assert len(r.events[collector_threading.ThreadingLockAcquireEvent]) == 1 assert len(r.events[collector_threading.ThreadingLockReleaseEvent]) == 0 event = r.events[collector_threading.ThreadingLockAcquireEvent][0] - assert event.lock_name == "test_threading.py:65" + assert event.lock_name == "test_threading.py:66" assert event.thread_id == _thread.get_ident() assert event.wait_time_ns >= 0 # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == (__file__.replace(".pyc", ".py"), 66, "test_lock_acquire_events", "") + assert event.frames[0] == (__file__.replace(".pyc", ".py"), 67, "test_lock_acquire_events", "") assert event.sampling_pct == 100 @@ -91,13 +92,13 @@ def lockfunc(self): assert len(r.events[collector_threading.ThreadingLockAcquireEvent]) == 1 assert len(r.events[collector_threading.ThreadingLockReleaseEvent]) == 0 event = r.events[collector_threading.ThreadingLockAcquireEvent][0] - assert event.lock_name == "test_threading.py:86" + assert event.lock_name == "test_threading.py:87" assert event.thread_id == _thread.get_ident() assert event.wait_time_ns >= 0 # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == (__file__.replace(".pyc", ".py"), 87, "lockfunc", "Foobar") + assert event.frames[0] == (__file__.replace(".pyc", ".py"), 88, "lockfunc", "Foobar") assert event.sampling_pct == 100 @@ -118,14 +119,14 @@ def test_lock_events_tracer(tracer): events = r.reset() # The tracer might use locks, so we need to look into every event to assert we got ours for event_type in (collector_threading.ThreadingLockAcquireEvent, collector_threading.ThreadingLockReleaseEvent): - assert {"test_threading.py:109", "test_threading.py:112"}.issubset({e.lock_name for e in events[event_type]}) + assert {"test_threading.py:110", "test_threading.py:113"}.issubset({e.lock_name for e in events[event_type]}) for event in events[event_type]: - if event.name == "test_threading.py:85": + if event.name == "test_threading.py:86": assert event.trace_id is None assert event.span_id is None assert event.trace_resource_container is None assert event.trace_type is None - elif event.name == "test_threading.py:88": + elif event.name == "test_threading.py:89": assert event.trace_id == trace_id assert event.span_id == span_id assert event.trace_resource_container[0] == t.resource @@ -151,14 +152,14 @@ def test_lock_events_tracer_late_finish(tracer): events = r.reset() # The tracer might use locks, so we need to look into every event to assert we got ours for event_type in (collector_threading.ThreadingLockAcquireEvent, collector_threading.ThreadingLockReleaseEvent): - assert {"test_threading.py:140", "test_threading.py:143"}.issubset({e.lock_name for e in events[event_type]}) + assert {"test_threading.py:141", "test_threading.py:144"}.issubset({e.lock_name for e in events[event_type]}) for event in events[event_type]: - if event.name == "test_threading.py:116": + if event.name == "test_threading.py:117": assert event.trace_id is None assert event.span_id is None assert event.trace_resource_container is None assert event.trace_type is None - elif event.name == "test_threading.py:119": + elif event.name == "test_threading.py:120": assert event.trace_id == trace_id assert event.span_id == span_id assert event.trace_resource_container[0] == span.resource @@ -183,14 +184,14 @@ def test_resource_not_collected(monkeypatch, tracer): events = r.reset() # The tracer might use locks, so we need to look into every event to assert we got ours for event_type in (collector_threading.ThreadingLockAcquireEvent, collector_threading.ThreadingLockReleaseEvent): - assert {"test_threading.py:174", "test_threading.py:177"}.issubset({e.lock_name for e in events[event_type]}) + assert {"test_threading.py:175", "test_threading.py:178"}.issubset({e.lock_name for e in events[event_type]}) for event in events[event_type]: - if event.name == "test_threading.py:150": + if event.name == "test_threading.py:151": assert event.trace_id is None assert event.span_id is None assert event.trace_resource_container is None assert event.trace_type is None - elif event.name == "test_threading.py:153": + elif event.name == "test_threading.py:154": assert event.trace_id == trace_id assert event.span_id == span_id assert event.trace_resource_container is None @@ -206,13 +207,13 @@ def test_lock_release_events(): assert len(r.events[collector_threading.ThreadingLockAcquireEvent]) == 1 assert len(r.events[collector_threading.ThreadingLockReleaseEvent]) == 1 event = r.events[collector_threading.ThreadingLockReleaseEvent][0] - assert event.lock_name == "test_threading.py:203" + assert event.lock_name == "test_threading.py:204" assert event.thread_id == _thread.get_ident() assert event.locked_for_ns >= 0 # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == (__file__.replace(".pyc", ".py"), 205, "test_lock_release_events", "") + assert event.frames[0] == (__file__.replace(".pyc", ".py"), 206, "test_lock_release_events", "") assert event.sampling_pct == 100 @@ -246,7 +247,7 @@ def play_with_lock(): assert len(r.events[collector_threading.ThreadingLockReleaseEvent]) >= 1 for event in r.events[collector_threading.ThreadingLockAcquireEvent]: - if event.lock_name == "test_threading.py:236": + if event.lock_name == "test_threading.py:237": assert event.wait_time_ns >= 0 assert event.task_id == t.ident assert event.task_name == "foobar" @@ -260,7 +261,7 @@ def play_with_lock(): pytest.fail("Lock event not found") for event in r.events[collector_threading.ThreadingLockReleaseEvent]: - if event.lock_name == "test_threading.py:236": + if event.lock_name == "test_threading.py:237": assert event.locked_for_ns >= 0 assert event.task_id == t.ident assert event.task_name == "foobar" @@ -315,6 +316,7 @@ def test_lock_acquire_release_speed(benchmark): benchmark(_lock_acquire_release, threading.Lock()) +@flaky(1719591602) @pytest.mark.skipif(not sys.platform.startswith("linux"), reason="only works on linux") @pytest.mark.subprocess( ddtrace_run=True, diff --git a/tests/profiling/test_uwsgi.py b/tests/profiling/test_uwsgi.py index 1781a339868..6216552100b 100644 --- a/tests/profiling/test_uwsgi.py +++ b/tests/profiling/test_uwsgi.py @@ -10,6 +10,7 @@ import pytest from tests.contrib.uwsgi import run_uwsgi +from tests.utils import flaky from . import utils @@ -49,6 +50,7 @@ def test_uwsgi_threads_disabled(uwsgi): assert THREADS_MSG in stdout +@flaky(1719591602) def test_uwsgi_threads_number_set(uwsgi): proc = uwsgi("--threads", "1") try: From 0c7b9ca54cca928e39704a628019e9254d72ae16 Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Tue, 7 May 2024 10:33:42 +0200 Subject: [PATCH 13/26] fix(iast): false positives outside client code (#9176) If the vulnerability is outside client code, skip the vulnerability ![image](https://github.com/DataDog/dd-trace-py/assets/6352942/f7d9613a-b787-403a-ae99-e790a21b2a35) ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/appsec/_iast/taint_sinks/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/appsec/_iast/taint_sinks/_base.py b/ddtrace/appsec/_iast/taint_sinks/_base.py index 7cba289d644..215e2abb208 100644 --- a/ddtrace/appsec/_iast/taint_sinks/_base.py +++ b/ddtrace/appsec/_iast/taint_sinks/_base.py @@ -135,7 +135,7 @@ def report(cls, evidence_value="", value_parts=None, sources=None): skip_location = getattr(cls, "skip_location", False) if not skip_location: frame_info = get_info_frame(CWD) - if not frame_info: + if not frame_info or frame_info[0] == "" or frame_info[0] == -1: return None file_name, line_number = frame_info From b860ab0e839609f72cb6e97b2237cc3950245658 Mon Sep 17 00:00:00 2001 From: David Sanchez <838104+sanchda@users.noreply.github.com> Date: Tue, 7 May 2024 06:06:37 -0500 Subject: [PATCH 14/26] chore(profiling): pin version of echion dependency (#9142) Profiling introduced a "stack v2" feature recently in order to address reports of segmentation faults. As part of this, we rely on the [echion](https://github.com/P403n1x87/echion) codebase. We'll be making changes to echion in response to certain customer issues, with the corresponding fixes needing to be backported. This PR simply tightens up the build process for stack v2 in order to pin the dependency version more precisely. It's a developmental PR for future fixups. ## Checklist - [X] Change(s) are motivated and described in the PR description - [X] Testing strategy is described if automated tests are not included in the PR - [X] Risks are described (performance impact, potential for breakage, maintainability) - [X] Change is maintainable (easy to change, telemetry, documentation) - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [X] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: sanchda --- ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index 7a57a86fd43..f3ddeefc1c8 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -26,10 +26,11 @@ if (NOT Python3_INCLUDE_DIRS) endif() # Add echion +set(ECHION_COMMIT "d69048b63e7d49091659964915eb52ff0ced9fc8" CACHE STRING "Commit hash of echion to use") FetchContent_Declare( echion GIT_REPOSITORY "https://github.com/sanchda/echion.git" - GIT_TAG "sanchda/more_rendering" + GIT_TAG ${ECHION_COMMIT} ) FetchContent_GetProperties(echion) if(NOT echion_POPULATED) From a50add4b766ca5c090049f954378d541a35f6bc6 Mon Sep 17 00:00:00 2001 From: Dmytro Yurchenko <88330911+ddyurchenko@users.noreply.github.com> Date: Tue, 7 May 2024 15:26:46 +0200 Subject: [PATCH 15/26] chore(ci): fix benchmarks images (#9181) Currently microbenchmarks are broken, because wrong BASE_CI_IMAGE is substituted in CI job. This PR fixes references to CI images, so both macro- and microbenchmarks work. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .gitlab/benchmarks.yml | 6 +++--- .gitlab/macrobenchmarks.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index e6a83ad2bdb..5e2f613b17b 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -1,12 +1,12 @@ variables: - BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py + MICROBENCHMARKS_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py SLS_CI_IMAGE: registry.ddbuild.io/ci/serverless-tools:1 .benchmarks: stage: benchmarks when: on_success tags: ["runner:apm-k8s-tweaked-metal"] - image: $BASE_CI_IMAGE + image: $MICROBENCHMARKS_CI_IMAGE interruptible: true timeout: 1h script: @@ -37,7 +37,7 @@ benchmarks-pr-comment: stage: benchmarks-pr-comment when: on_success tags: ["arch:amd64"] - image: $BASE_CI_IMAGE + image: $MICROBENCHMARKS_CI_IMAGE script: - export REPORTS_DIR="$(pwd)/reports/" && (mkdir "${REPORTS_DIR}" || :) - git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/" diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index d07a674613e..69d65339ac2 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -1,5 +1,5 @@ variables: - BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py-macrobenchmarks + MACROBENCHMARKS_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-py-macrobenchmarks .macrobenchmarks: stage: macrobenchmarks @@ -14,7 +14,7 @@ variables: # - if: $CI_COMMIT_REF_NAME == "main" # when: always # If you have a problem with Gitlab cache, see Troubleshooting section in Benchmarking Platform docs - image: $BASE_CI_IMAGE + image: $MACROBENCHMARKS_CI_IMAGE script: | git clone --branch python/macrobenchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform platform && cd platform if [ "$BP_PYTHON_SCENARIO_DIR" == "flask-realworld" ]; then From 91653fb27c9e7d98026b2d6a26042f4e7d5f1cd6 Mon Sep 17 00:00:00 2001 From: Christophe Papazian <114495376+christophe-papazian@users.noreply.github.com> Date: Tue, 7 May 2024 17:29:20 +0200 Subject: [PATCH 16/26] chore(asm): enable rasp system tests for tracer (#9183) Enable new system tests for exploit prevention on the tracer ci APPSEC-52926 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .github/workflows/system-tests.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/system-tests.yml b/.github/workflows/system-tests.yml index 76d679311e5..b3ece4ea8db 100644 --- a/.github/workflows/system-tests.yml +++ b/.github/workflows/system-tests.yml @@ -208,8 +208,8 @@ jobs: - name: Run APPSEC_BLOCKING if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' - run: ./run.sh APPSEC_BLOCKING + - name: Run APPSEC_BLOCKING_FULL_DENYLIST if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_BLOCKING_FULL_DENYLIST @@ -218,6 +218,10 @@ jobs: if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_REQUEST_BLOCKING + - name: Run APPSEC_RASP + if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + run: ./run.sh APPSEC_RASP + # The compress step speed up a lot the upload artifact process - name: Compress artifact if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' From 2483e17ee9e22054f9c864dedadbeaa8eca9efc0 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Tue, 7 May 2024 09:19:36 -0700 Subject: [PATCH 17/26] ci(profile): update outdated test assumption (#9187) This change updates the expected line number in a failing profile test. Not sure how this managed to pass its pre-merge checks. https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/60875/workflows/96e1dbf5-dd5e-409a-a513-74404953c893/jobs/3818904 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- tests/profiling/collector/test_threading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/profiling/collector/test_threading.py b/tests/profiling/collector/test_threading.py index dbe1e0179f4..2cd538a5a49 100644 --- a/tests/profiling/collector/test_threading.py +++ b/tests/profiling/collector/test_threading.py @@ -254,7 +254,7 @@ def play_with_lock(): # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == ("tests/profiling/collector/test_threading.py", 237, "play_with_lock", "") + assert event.frames[0] == ("tests/profiling/collector/test_threading.py", 238, "play_with_lock", "") assert event.sampling_pct == 100 break else: From fef6e6a583e00ddaa438d2c115ad90964d8a9e8e Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Tue, 7 May 2024 19:21:25 +0200 Subject: [PATCH 18/26] fix(iast): unexpected Import error catching (#9188) When loading a Python module, AST patching was catching the ImportError exception and not propagating it correctly, causing if one wanted to catch this exception, it couldn't be done, generating unexpected effects as in the BeautifulSoup4 package. In Example: ```python try: from . import _html5lib register_treebuilders_from(_html5lib) except ImportError: # They don't have html5lib installed. pass ``` ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Juanjo Alvarez Martinez Co-authored-by: Federico Mon --- ddtrace/appsec/_iast/_loader.py | 5 +--- ...ix-iast-import-error-2eedea126bb9d92d.yaml | 4 +++ tests/appsec/app.py | 8 ++++++ .../packages/pkg_beautifulsoup4.py | 26 +++++++++++++++++++ tests/appsec/iast_packages/test_packages.py | 1 + .../integrations/module_with_import_errors.py | 11 ++++++++ .../integrations/test_flask_iast_patching.py | 20 ++++++++++++++ 7 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/iast-fix-iast-import-error-2eedea126bb9d92d.yaml create mode 100644 tests/appsec/iast_packages/packages/pkg_beautifulsoup4.py create mode 100644 tests/appsec/integrations/module_with_import_errors.py create mode 100644 tests/appsec/integrations/test_flask_iast_patching.py diff --git a/ddtrace/appsec/_iast/_loader.py b/ddtrace/appsec/_iast/_loader.py index 41ac26dfda3..f211191430b 100644 --- a/ddtrace/appsec/_iast/_loader.py +++ b/ddtrace/appsec/_iast/_loader.py @@ -32,10 +32,7 @@ def _exec_iast_patched_module(module_watchdog, module): if compiled_code: # Patched source is executed instead of original module - try: - exec(compiled_code, module.__dict__) # nosec B102 - except ImportError: - log.debug("Unexpected exception while executing patched code", exc_info=True) + exec(compiled_code, module.__dict__) # nosec B102 elif module_watchdog.loader is not None: try: module_watchdog.loader.exec_module(module) diff --git a/releasenotes/notes/iast-fix-iast-import-error-2eedea126bb9d92d.yaml b/releasenotes/notes/iast-fix-iast-import-error-2eedea126bb9d92d.yaml new file mode 100644 index 00000000000..a7f8d0172a0 --- /dev/null +++ b/releasenotes/notes/iast-fix-iast-import-error-2eedea126bb9d92d.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Code Security: This fixes a bug in the AST patching process where ``ImportError`` exceptions were being caught, interfering with the proper application cycle if an ``ImportError`` was expected." diff --git a/tests/appsec/app.py b/tests/appsec/app.py index 7c789280088..f761923c27a 100644 --- a/tests/appsec/app.py +++ b/tests/appsec/app.py @@ -9,6 +9,7 @@ import ddtrace.auto # noqa: F401 # isort: skip +from tests.appsec.iast_packages.packages.pkg_beautifulsoup4 import pkg_beautifulsoup4 from tests.appsec.iast_packages.packages.pkg_chartset_normalizer import pkg_chartset_normalizer from tests.appsec.iast_packages.packages.pkg_google_api_core import pkg_google_api_core from tests.appsec.iast_packages.packages.pkg_idna import pkg_idna @@ -17,9 +18,11 @@ from tests.appsec.iast_packages.packages.pkg_pyyaml import pkg_pyyaml from tests.appsec.iast_packages.packages.pkg_requests import pkg_requests from tests.appsec.iast_packages.packages.pkg_urllib3 import pkg_urllib3 +import tests.appsec.integrations.module_with_import_errors as module_with_import_errors app = Flask(__name__) +app.register_blueprint(pkg_beautifulsoup4) app.register_blueprint(pkg_chartset_normalizer) app.register_blueprint(pkg_google_api_core) app.register_blueprint(pkg_idna) @@ -59,5 +62,10 @@ def iast_cmdi_vulnerability(): return resp +@app.route("/iast-ast-patching-import-error", methods=["GET"]) +def iast_ast_patching_import_error(): + return Response(str(module_with_import_errors.verbal_kint_is_keyser_soze)) + + if __name__ == "__main__": app.run(debug=False, port=8000) diff --git a/tests/appsec/iast_packages/packages/pkg_beautifulsoup4.py b/tests/appsec/iast_packages/packages/pkg_beautifulsoup4.py new file mode 100644 index 00000000000..6f55b82ba92 --- /dev/null +++ b/tests/appsec/iast_packages/packages/pkg_beautifulsoup4.py @@ -0,0 +1,26 @@ +""" +beautifulsoup4==4.12.3 + +https://pypi.org/project/beautifulsoup4/ +""" +from flask import Blueprint +from flask import request + +from .utils import ResultResponse + + +pkg_beautifulsoup4 = Blueprint("package_beautifulsoup4", __name__) + + +@pkg_beautifulsoup4.route("/beautifulsoup4") +def pkg_beautifusoup4_view(): + from bs4 import BeautifulSoup + + response = ResultResponse(request.args.get("package_param")) + + try: + html = response.package_param + "".join(BeautifulSoup(html, features="lxml").findAll(string=True)).lstrip() + except Exception: + pass + return response.json() diff --git a/tests/appsec/iast_packages/test_packages.py b/tests/appsec/iast_packages/test_packages.py index 7cfe86ce087..5d8f9c1ac98 100644 --- a/tests/appsec/iast_packages/test_packages.py +++ b/tests/appsec/iast_packages/test_packages.py @@ -87,6 +87,7 @@ def install(self): ["https", None, "www.datadoghq.com", None, "/", None, None], "www.datadoghq.com", ), + PackageForTesting("beautifulsoup4", "4.12.3", "", "", ""), ] diff --git a/tests/appsec/integrations/module_with_import_errors.py b/tests/appsec/integrations/module_with_import_errors.py new file mode 100644 index 00000000000..aa65299d081 --- /dev/null +++ b/tests/appsec/integrations/module_with_import_errors.py @@ -0,0 +1,11 @@ +try: + from . import _keyser_soze # noqa: F401 + + verbal_kint_is_keyser_soze = True +except ImportError: + verbal_kint_is_keyser_soze = False + + +def func(): + VARIABLE_TO_FORCE_AST_PATCHINT = "a" + "b" + return VARIABLE_TO_FORCE_AST_PATCHINT diff --git a/tests/appsec/integrations/test_flask_iast_patching.py b/tests/appsec/integrations/test_flask_iast_patching.py new file mode 100644 index 00000000000..522dede932e --- /dev/null +++ b/tests/appsec/integrations/test_flask_iast_patching.py @@ -0,0 +1,20 @@ +from tests.appsec.appsec_utils import flask_server + + +def test_flask_iast_ast_patching_import_error(): + """this is a regression test for a bug with IAST + BeautifulSoup4, we were catching the ImportError exception in + ddtrace.appsec._iast._loader + try: + from . import _html5lib + register_treebuilders_from(_html5lib) + except ImportError: + # They don't have html5lib installed. + pass + """ + with flask_server(iast_enabled="true", token=None) as context: + _, flask_client, pid = context + + response = flask_client.get("/iast-ast-patching-import-error") + + assert response.status_code == 200 + assert response.content == b"False" From e5cddcdc6f045b7302f7d4ee237a4e101f91ad85 Mon Sep 17 00:00:00 2001 From: David Sanchez <838104+sanchda@users.noreply.github.com> Date: Tue, 7 May 2024 12:32:46 -0500 Subject: [PATCH 19/26] chore(codeowners): add new directory to profiling (#9175) Probably should have done this a while ago. This directory is the new home for certain native-code assets for supporting the libdatadog (profiling) exporter and "stack v2." ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: sanchda --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 01e3effa28e..485214bbe12 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -76,6 +76,7 @@ tests/contrib/*/test*appsec*.py @DataDog/asm-python # Profiling ddtrace/profiling @DataDog/profiling-python @DataDog/apm-core-python +ddtrace/internal/datadog/profiling @DataDog/profiling-python @DataDog/apm-core-python tests/profiling @DataDog/profiling-python @DataDog/apm-core-python # MLObs From 666d1744a112b208d79389b0117f79c1332b6f05 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Tue, 7 May 2024 10:56:16 -0700 Subject: [PATCH 20/26] ci(profiling): adjust expectation (#9191) This change updates a line-number expectation in the profiling test suite to match reality. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- tests/profiling/collector/test_threading.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/profiling/collector/test_threading.py b/tests/profiling/collector/test_threading.py index 2cd538a5a49..5b2e7b92d6d 100644 --- a/tests/profiling/collector/test_threading.py +++ b/tests/profiling/collector/test_threading.py @@ -254,7 +254,12 @@ def play_with_lock(): # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == ("tests/profiling/collector/test_threading.py", 238, "play_with_lock", "") + assert event.frames[0] == ( + "tests/profiling/collector/test_threading.py", + 238, + "play_with_lock", + "", + ), event.frames assert event.sampling_pct == 100 break else: @@ -268,7 +273,12 @@ def play_with_lock(): # It's called through pytest so I'm sure it's gonna be that long, right? assert len(event.frames) > 3 assert event.nframes > 3 - assert event.frames[0] == ("tests/profiling/collector/test_threading.py", 238, "play_with_lock", "") + assert event.frames[0] == ( + "tests/profiling/collector/test_threading.py", + 239, + "play_with_lock", + "", + ), event.frames assert event.sampling_pct == 100 break else: From c3085672583a0008a4f21d5a04b9e6b0f9e59265 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Tue, 7 May 2024 14:05:29 -0400 Subject: [PATCH 21/26] docs: add falcon version support (#8778) Adds falcon version support to integration table in documentation. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- docs/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index 582103f7d3d..e218f9d6b75 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -82,6 +82,8 @@ contacting support. +--------------------------------------------------+---------------+----------------+ | :ref:`elasticsearch` | >= 1.10,< 8.0 | Yes | +--------------------------------------------------+---------------+----------------+ +| :ref:`falcon` | >= 3.0 | Yes | ++--------------------------------------------------+---------------+----------------+ | :ref:`fastapi` | >= 0.64 | Yes | +--------------------------------------------------+---------------+----------------+ | :ref:`flask` | >= 1.0 | Yes | From 3a12bacf64563ce94cf2f7797b2b8e12c6d5524c Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Tue, 7 May 2024 12:03:33 -0700 Subject: [PATCH 22/26] chore(telemetry): remove _trace_enabled config option (#9164) This change removes the redundant `_trace_enabled` configuration option from `settings.config`. It was only used in the instrumentation telemetry writer and its functionality is fully duplicated by the `_tracing_enabled` option. Existing tests cover this change. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/internal/telemetry/writer.py | 8 ++------ ddtrace/settings/config.py | 5 ----- tests/integration/test_settings.py | 8 ++++---- tests/telemetry/test_writer.py | 4 +--- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/ddtrace/internal/telemetry/writer.py b/ddtrace/internal/telemetry/writer.py index 8c9ebf6f1f7..ba6fed2cf17 100644 --- a/ddtrace/internal/telemetry/writer.py +++ b/ddtrace/internal/telemetry/writer.py @@ -370,10 +370,7 @@ def add_configs_changed(self, cfg_names): def _telemetry_entry(self, cfg_name: str) -> Tuple[str, str, _ConfigSource]: item = config._config[cfg_name] - if cfg_name == "_trace_enabled": - name = "trace_enabled" - value = "true" if item.value() else "false" - elif cfg_name == "_profiling_enabled": + if cfg_name == "_profiling_enabled": name = "profiling_enabled" value = "true" if item.value() else "false" elif cfg_name == "_asm_enabled": @@ -398,7 +395,7 @@ def _telemetry_entry(self, cfg_name: str) -> Tuple[str, str, _ConfigSource]: name = "trace_tags" value = ",".join(":".join(x) for x in item.value().items()) elif cfg_name == "_tracing_enabled": - name = "tracing_enabled" + name = "trace_enabled" value = "true" if item.value() else "false" elif cfg_name == "_sca_enabled": name = "DD_APPSEC_SCA_ENABLED" @@ -432,7 +429,6 @@ def _app_started_event(self, register_app_shutdown=True): self.add_configurations( [ - self._telemetry_entry("_trace_enabled"), self._telemetry_entry("_profiling_enabled"), self._telemetry_entry("_asm_enabled"), self._telemetry_entry("_sca_enabled"), diff --git a/ddtrace/settings/config.py b/ddtrace/settings/config.py index fc0083d6222..972d2fff700 100644 --- a/ddtrace/settings/config.py +++ b/ddtrace/settings/config.py @@ -273,11 +273,6 @@ def _parse_global_tags(s): def _default_config(): # type: () -> Dict[str, _ConfigItem] return { - "_trace_enabled": _ConfigItem( - name="trace_enabled", - default=True, - envs=[("DD_TRACE_ENABLED", asbool)], - ), "_trace_sample_rate": _ConfigItem( name="trace_sample_rate", default=1.0, diff --git a/tests/integration/test_settings.py b/tests/integration/test_settings.py index d440ba5f058..2b439a8cf84 100644 --- a/tests/integration/test_settings.py +++ b/tests/integration/test_settings.py @@ -59,8 +59,8 @@ def test_setting_origin_environment(test_agent_session, run_python_code_in_subpr events_trace_tags = _get_telemetry_config_items(events, "trace_tags") assert {"name": "trace_tags", "value": "team:apm,component:web", "origin": "env_var"} in events_trace_tags - events_tracing_enabled = _get_telemetry_config_items(events, "tracing_enabled") - assert {"name": "tracing_enabled", "value": "true", "origin": "env_var"} in events_tracing_enabled + events_tracing_enabled = _get_telemetry_config_items(events, "trace_enabled") + assert {"name": "trace_enabled", "value": "true", "origin": "env_var"} in events_tracing_enabled @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") @@ -122,9 +122,9 @@ def test_setting_origin_code(test_agent_session, run_python_code_in_subprocess): "origin": "code", } in events_trace_tags - events_tracing_enabled = _get_telemetry_config_items(events, "tracing_enabled") + events_tracing_enabled = _get_telemetry_config_items(events, "trace_enabled") assert { - "name": "tracing_enabled", + "name": "trace_enabled", "value": "false", "origin": "code", } in events_tracing_enabled diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py index c25482e849e..541e002d299 100644 --- a/tests/telemetry/test_writer.py +++ b/tests/telemetry/test_writer.py @@ -136,7 +136,6 @@ def test_app_started_event(telemetry_writer, test_agent_session, mock_time): {"name": "DD_TRACE_WRITER_REUSE_CONNECTIONS", "origin": "unknown", "value": False}, {"name": "ddtrace_auto_used", "origin": "unknown", "value": False}, {"name": "ddtrace_bootstrapped", "origin": "unknown", "value": False}, - {"name": "trace_enabled", "origin": "default", "value": "true"}, {"name": "profiling_enabled", "origin": "default", "value": "false"}, {"name": "data_streams_enabled", "origin": "default", "value": "false"}, {"name": "appsec_enabled", "origin": "default", "value": "false"}, @@ -145,7 +144,7 @@ def test_app_started_event(telemetry_writer, test_agent_session, mock_time): {"name": "trace_header_tags", "origin": "default", "value": ""}, {"name": "logs_injection_enabled", "origin": "default", "value": "false"}, {"name": "trace_tags", "origin": "default", "value": ""}, - {"name": "tracing_enabled", "origin": "default", "value": "true"}, + {"name": "trace_enabled", "origin": "default", "value": "true"}, {"name": "instrumentation_config_id", "origin": "default", "value": ""}, ], key=lambda x: x["name"], @@ -315,7 +314,6 @@ def test_app_started_event_configuration_override( {"name": "logs_injection_enabled", "origin": "env_var", "value": "true"}, {"name": "trace_header_tags", "origin": "default", "value": ""}, {"name": "trace_tags", "origin": "env_var", "value": "team:apm,component:web"}, - {"name": "tracing_enabled", "origin": "env_var", "value": "false"}, {"name": "instrumentation_config_id", "origin": "env_var", "value": "abcedf123"}, ], key=lambda x: x["name"], From dac8aa29c737baaac8b1431b26c7fba487ba4fb8 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Tue, 7 May 2024 12:31:58 -0700 Subject: [PATCH 23/26] chore: shorten time to close stale issues and PRs (#9193) This change updates the github actions config to close pull requests and issues that have not been updated in the last 30 days. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Brett Langdon --- .github/workflows/stale.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 59c1f9d6ab1..9a9a7f672a8 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,4 +1,4 @@ -name: 'Label stale issues and PRs' +name: 'Close stale issues and PRs' on: schedule: # 00:00:000 UTC @@ -17,16 +17,16 @@ jobs: # DEV: GitHub Actions have an API rate limit of 1000 operations per hour per repository # This limit is shared across all actions operations-per-run: 200 - days-before-close: 180 + days-before-close: 30 exempt-issue-labels: 'proposal' exempt-pr-labels: 'proposal' close-issue-message: | - This issue has been automatically closed after six months of inactivity. If it's a + This issue has been automatically closed after a period of inactivity. If it's a feature request, it has been added to the maintainers' internal backlog and will be included in an upcoming round of feature prioritization. Please comment or reopen if you think this issue was closed in error. close-pr-message: | - This pull request has been automatically closed after six months of inactivity. + This pull request has been automatically closed after a period of inactivity. After this much time, it will likely be easier to open a new pull request with the same changes than to update this one from the base branch. Please comment or reopen if you think this pull request was closed in error. From 22302147a2ac3baa03727c73085fd6575fba60e9 Mon Sep 17 00:00:00 2001 From: Munir Abdinur Date: Tue, 7 May 2024 21:58:41 -0400 Subject: [PATCH 24/26] chore(distributed_tracing): ensure last datadog parent id tag is always set (#9174) Implements W3C Phase 3. The last datadog parent id is always propagated in a distributed trace (if ddtrace propagation style includes tracecontext). This change will reduce instances where flamegraphs contain missing spans. Note - This change does not require a release note. It is a follow up to a previous PR. Follow up to: https://github.com/DataDog/dd-trace-py/commit/90a3e3fed47763abf64cb4c04c6a32f5c64076cb#diff-18af2a7682cf3014ab4ae236fc54d2454bbf5293e81e098b1d22d39bdb61012d ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/internal/constants.py | 1 + ddtrace/propagation/http.py | 14 +++++++++----- tests/tracer/test_propagation.py | 17 +++++++++++------ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/ddtrace/internal/constants.py b/ddtrace/internal/constants.py index 50b8e1280e4..7988687a74c 100644 --- a/ddtrace/internal/constants.py +++ b/ddtrace/internal/constants.py @@ -25,6 +25,7 @@ DEFAULT_SAMPLING_RATE_LIMIT = 100 SAMPLING_DECISION_TRACE_TAG_KEY = "_dd.p.dm" LAST_DD_PARENT_ID_KEY = "_dd.parent_id" +DEFAULT_LAST_PARENT_ID = "0000000000000000" DEFAULT_SERVICE_NAME = "unnamed-python-service" # Used to set the name of an integration on a span COMPONENT = "component" diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index c0768194d8b..6ad0c86d41d 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -38,6 +38,7 @@ from ..internal.compat import ensure_text from ..internal.constants import _PROPAGATION_STYLE_NONE from ..internal.constants import _PROPAGATION_STYLE_W3C_TRACECONTEXT +from ..internal.constants import DEFAULT_LAST_PARENT_ID from ..internal.constants import HIGHER_ORDER_TRACE_ID_BITS as _HIGHER_ORDER_TRACE_ID_BITS from ..internal.constants import LAST_DD_PARENT_ID_KEY from ..internal.constants import MAX_UINT_64BITS as _MAX_UINT_64BITS @@ -700,7 +701,7 @@ def _get_traceparent_values(tp): @staticmethod def _get_tracestate_values(ts_l): - # type: (List[str]) -> Tuple[Optional[int], Dict[str, str], Optional[str], Optional[str]] + # type: (List[str]) -> Tuple[Optional[int], Dict[str, str], Optional[str], str] # tracestate list parsing example: ["dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64","congo=t61rcWkgMzE"] # -> 2, {"_dd.p.dm":"-4","_dd.p.usr.id":"baz64"}, "rum" @@ -727,7 +728,7 @@ def _get_tracestate_values(ts_l): origin = _TraceContext.decode_tag_val(origin) # Get last datadog parent id, this field is used to reconnect traces with missing spans - lpid = dd.get("p", "0000000000000000") + lpid = dd.get("p", DEFAULT_LAST_PARENT_ID) # need to convert from t. to _dd.p. other_propagated_tags = { @@ -736,7 +737,7 @@ def _get_tracestate_values(ts_l): return sampling_priority_ts_int, other_propagated_tags, origin, lpid else: - return None, {}, None, None + return None, {}, None, DEFAULT_LAST_PARENT_ID @staticmethod def _get_sampling_priority( @@ -820,8 +821,7 @@ def _get_context(trace_id, span_id, trace_flag, ts, meta=None): if tracestate_values: sampling_priority_ts, other_propagated_tags, origin, lpid = tracestate_values meta.update(other_propagated_tags.items()) - if lpid: - meta[LAST_DD_PARENT_ID_KEY] = lpid + meta[LAST_DD_PARENT_ID_KEY] = lpid sampling_priority = _TraceContext._get_sampling_priority(trace_flag, sampling_priority_ts, origin) else: @@ -921,6 +921,10 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): ts = _extract_header_value(_POSSIBLE_HTTP_HEADER_TRACESTATE, normalized_headers) if ts: primary_context._meta[W3C_TRACESTATE_KEY] = ts + # Ensure the last datadog parent id is always set on the primary context + primary_context._meta[LAST_DD_PARENT_ID_KEY] = context._meta.get( + LAST_DD_PARENT_ID_KEY, DEFAULT_LAST_PARENT_ID + ) primary_context._span_links = links return primary_context diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py index e6c263e5d83..4c3071e61b0 100644 --- a/tests/tracer/test_propagation.py +++ b/tests/tracer/test_propagation.py @@ -692,7 +692,7 @@ def test_get_wsgi_header(tracer): # noqa: F811 TRACECONTEXT_HEADERS_VALID_64_bit = { _HTTP_HEADER_TRACEPARENT: "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01", - _HTTP_HEADER_TRACESTATE: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE", + _HTTP_HEADER_TRACESTATE: "dd=s:2;o:rum;p:00f067aa0ba902b7;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE", } @@ -937,7 +937,7 @@ def test_extract_traceparent(caplog, headers, expected_tuple, expected_logging, ( "congo=t61rcWkgMzE,mako=s:2;o:rum;", # sampling_priority_ts, other_propagated_tags, origin, parent id - (None, {}, None, None), + (None, {}, None, "0000000000000000"), None, None, ), @@ -1754,7 +1754,11 @@ def test_extract_tracecontext(headers, expected_context): "span_id": 5678, "sampling_priority": 1, "dd_origin": "synthetics", - "meta": {"tracestate": TRACECONTEXT_HEADERS_VALID[_HTTP_HEADER_TRACESTATE], "_dd.p.dm": "-3"}, + "meta": { + "tracestate": DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS[_HTTP_HEADER_TRACESTATE], + "_dd.p.dm": "-3", + LAST_DD_PARENT_ID_KEY: "00f067aa0ba902b7", + }, }, ), # testing that tracestate is not added when tracecontext style comes later and does not match first style's trace-id @@ -2002,11 +2006,11 @@ def test_DD_TRACE_PROPAGATION_STYLE_EXTRACT_overrides_DD_TRACE_PROPAGATION_STYLE span_id=67667974448284343, meta={ "traceparent": "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01", - "tracestate": TRACECONTEXT_HEADERS_VALID[_HTTP_HEADER_TRACESTATE], + "tracestate": ALL_HEADERS_CHAOTIC_2[_HTTP_HEADER_TRACESTATE], "_dd.p.dm": "-4", "_dd.p.usr.id": "baz64", "_dd.origin": "rum", - LAST_DD_PARENT_ID_KEY: "0000000000000000", + LAST_DD_PARENT_ID_KEY: "00f067aa0ba902b7", }, metrics={"_sampling_priority_v1": 2}, span_links=[ @@ -2117,7 +2121,8 @@ def test_DD_TRACE_PROPAGATION_STYLE_EXTRACT_overrides_DD_TRACE_PROPAGATION_STYLE meta={ "_dd.p.dm": "-3", "_dd.origin": "synthetics", - "tracestate": "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE", + "tracestate": ALL_HEADERS_CHAOTIC_1[_HTTP_HEADER_TRACESTATE], + LAST_DD_PARENT_ID_KEY: "00f067aa0ba902b7", }, metrics={"_sampling_priority_v1": 1}, span_links=[ From 64658dc6be2330c064c523c13ad80e38441c4a91 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Wed, 8 May 2024 06:19:10 -0700 Subject: [PATCH 25/26] chore: different lifetimes for PRs and issues (#9199) This change makes inactive issues get closed at a slower rate than inactive pull requests, reflecting actual usage patterns of this repo. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .github/workflows/stale.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 9a9a7f672a8..63a045d498b 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -17,7 +17,8 @@ jobs: # DEV: GitHub Actions have an API rate limit of 1000 operations per hour per repository # This limit is shared across all actions operations-per-run: 200 - days-before-close: 30 + days-before-pr-close: 30 + days-before-issue-close: 90 exempt-issue-labels: 'proposal' exempt-pr-labels: 'proposal' close-issue-message: | From 8d678697b35e8206237f134c697a45572df80d78 Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Wed, 8 May 2024 15:54:37 +0200 Subject: [PATCH 26/26] chore(iast): redaction algorithms refactor II (#9163) # Summarize Refactor of the IAST redaction system. The old algorithms had several problems: ## Description This PR continues this https://github.com/DataDog/dd-trace-py/pull/9126 - Migrate SQL Injection to this new algorithm - Remove deprecated code ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .../_evidence_redaction/_sensitive_handler.py | 7 +- .../sql_sensitive_analyzer.py | 70 ++++ ddtrace/appsec/_iast/_taint_dict.py | 21 -- .../appsec/_iast/_taint_tracking/aspects.py | 1 - ddtrace/appsec/_iast/_taint_utils.py | 4 +- ddtrace/appsec/_iast/_utils.py | 90 +----- ddtrace/appsec/_iast/constants.py | 14 +- ddtrace/appsec/_iast/reporter.py | 20 +- ddtrace/appsec/_iast/taint_sinks/_base.py | 159 +-------- .../_iast/taint_sinks/command_injection.py | 3 - .../_iast/taint_sinks/header_injection.py | 14 - .../_iast/taint_sinks/insecure_cookie.py | 4 - .../appsec/_iast/taint_sinks/sql_injection.py | 164 ---------- ddtrace/appsec/_iast/taint_sinks/ssrf.py | 3 - .../appsec/_iast/taint_sinks/weak_cipher.py | 2 - ddtrace/appsec/_iast/taint_sinks/weak_hash.py | 2 - .../_iast/taint_sinks/weak_randomness.py | 2 - ddtrace/contrib/dbapi/__init__.py | 6 +- ddtrace/contrib/dbapi_async/__init__.py | 6 +- .../test_header_injection_redacted.py | 61 ++-- .../test_path_traversal_redacted.py | 83 +++-- .../iast/taint_sinks/test_sql_injection.py | 29 +- .../test_sql_injection_redacted.py | 305 ++++++++++-------- tests/appsec/iast/test_taint_utils.py | 16 +- tests/contrib/dbapi/test_dbapi_appsec.py | 2 +- .../contrib/django/django_app/appsec_urls.py | 4 +- .../contrib/django/test_django_appsec_iast.py | 180 +++++------ tests/contrib/flask/test_flask_appsec_iast.py | 11 +- 28 files changed, 476 insertions(+), 807 deletions(-) create mode 100644 ddtrace/appsec/_iast/_evidence_redaction/sql_sensitive_analyzer.py delete mode 100644 ddtrace/appsec/_iast/_taint_dict.py diff --git a/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py index b76ad6c96b1..c41e56ca1c3 100644 --- a/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py +++ b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py @@ -3,11 +3,14 @@ from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config +from .._utils import _get_source_index from ..constants import VULN_CMDI from ..constants import VULN_HEADER_INJECTION +from ..constants import VULN_SQL_INJECTION from ..constants import VULN_SSRF from .command_injection_sensitive_analyzer import command_injection_sensitive_analyzer from .header_injection_sensitive_analyzer import header_injection_sensitive_analyzer +from .sql_sensitive_analyzer import sql_sensitive_analyzer from .url_sensitive_analyzer import url_sensitive_analyzer @@ -27,7 +30,7 @@ def __init__(self): self._sensitive_analyzers = { VULN_CMDI: command_injection_sensitive_analyzer, - # SQL_INJECTION: sql_sensitive_analyzer, + VULN_SQL_INJECTION: sql_sensitive_analyzer, VULN_SSRF: url_sensitive_analyzer, VULN_HEADER_INJECTION: header_injection_sensitive_analyzer, } @@ -178,7 +181,7 @@ def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources): if next_tainted and next_tainted["start"] == i: self.write_value_part(value_parts, evidence_value[start:i], source_index) - source_index = next_tainted_index + source_index = _get_source_index(sources, next_tainted["source"]) while next_sensitive and self._contains(next_tainted, next_sensitive): redaction_start = next_sensitive["start"] - next_tainted["start"] diff --git a/ddtrace/appsec/_iast/_evidence_redaction/sql_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/sql_sensitive_analyzer.py new file mode 100644 index 00000000000..7410ec46b4a --- /dev/null +++ b/ddtrace/appsec/_iast/_evidence_redaction/sql_sensitive_analyzer.py @@ -0,0 +1,70 @@ +import re + +from ddtrace.appsec._iast.constants import DBAPI_MARIADB +from ddtrace.appsec._iast.constants import DBAPI_MYSQL +from ddtrace.appsec._iast.constants import DBAPI_PSYCOPG +from ddtrace.appsec._iast.constants import DBAPI_SQLITE +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) + + +STRING_LITERAL = r"'(?:''|[^'])*'" +POSTGRESQL_ESCAPED_LITERAL = r"\$([^$]*)\$.*?\$\1\$" +MYSQL_STRING_LITERAL = r'"(?:\\\\"|[^"])*"|\'(?:\\\\\'|[^\'])*\'' +LINE_COMMENT = r"--.*$" +BLOCK_COMMENT = r"/\*[\s\S]*?\*/" +EXPONENT = r"(?:E[-+]?\\d+[fd]?)?" +INTEGER_NUMBER = r"(? start + 1: + next_char = evidence.value[start + 1] + if start_char == "/" and next_char == "*": + start += 2 + end -= 2 + elif start_char == "-" and start_char == next_char: + start += 2 + elif start_char.lower() == "q" and next_char == "'": + start += 3 + end -= 2 + elif start_char == "$": + match = regex_result.group(0) + size = match.find("$", 1) + 1 + if size > 1: + start += size + end -= size + tokens.append({"start": start, "end": end}) + regex_result = pattern.search(evidence.value, regex_result.end()) + return tokens diff --git a/ddtrace/appsec/_iast/_taint_dict.py b/ddtrace/appsec/_iast/_taint_dict.py deleted file mode 100644 index 97df240d4d0..00000000000 --- a/ddtrace/appsec/_iast/_taint_dict.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -from typing import TYPE_CHECKING # noqa:F401 - - -if TYPE_CHECKING: - from typing import Dict # noqa:F401 - from typing import Tuple # noqa:F401 - - from ._taint_tracking import Source # noqa:F401 - -_IAST_TAINT_DICT = {} # type: Dict[int, Tuple[Tuple[Source, int, int],...]] - - -def get_taint_dict(): # type: () -> Dict[int, Tuple[Tuple[Source, int, int],...]] - return _IAST_TAINT_DICT - - -def clear_taint_mapping(): # type: () -> None - global _IAST_TAINT_DICT - _IAST_TAINT_DICT = {} diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index cae1e07d455..3c0465884ce 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -506,7 +506,6 @@ def format_value_aspect( if options == 115: new_text = str_aspect(str, 0, element) elif options == 114: - # TODO: use our repr once we have implemented it new_text = repr_aspect(repr, 0, element) elif options == 97: new_text = ascii(element) diff --git a/ddtrace/appsec/_iast/_taint_utils.py b/ddtrace/appsec/_iast/_taint_utils.py index 8f91c44ff5c..07e5199cb01 100644 --- a/ddtrace/appsec/_iast/_taint_utils.py +++ b/ddtrace/appsec/_iast/_taint_utils.py @@ -5,11 +5,11 @@ from typing import Optional from typing import Union +from ddtrace.appsec._iast.constants import DBAPI_INTEGRATIONS from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config -DBAPI_INTEGRATIONS = ("sqlite", "psycopg", "mysql", "mariadb") DBAPI_PREFIXES = ("django-",) log = get_logger(__name__) @@ -529,7 +529,7 @@ def supported_dbapi_integration(integration_name): return integration_name in DBAPI_INTEGRATIONS or integration_name.startswith(DBAPI_PREFIXES) -def check_tainted_args(args, kwargs, tracer, integration_name, method): +def check_tainted_dbapi_args(args, kwargs, tracer, integration_name, method): if supported_dbapi_integration(integration_name) and method.__name__ == "execute": from ._taint_tracking import is_pyobject_tainted diff --git a/ddtrace/appsec/_iast/_utils.py b/ddtrace/appsec/_iast/_utils.py index 7272abb9016..c1b72f28f04 100644 --- a/ddtrace/appsec/_iast/_utils.py +++ b/ddtrace/appsec/_iast/_utils.py @@ -1,19 +1,10 @@ -import re -import string import sys -from typing import TYPE_CHECKING # noqa:F401 +from typing import List from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config -if TYPE_CHECKING: - from typing import Any # noqa:F401 - from typing import List # noqa:F401 - from typing import Set # noqa:F401 - from typing import Tuple # noqa:F401 - - def _is_python_version_supported(): # type: () -> bool # IAST supports Python versions 3.6 to 3.12 return (3, 6, 0) <= sys.version_info < (3, 13, 0) @@ -31,78 +22,13 @@ def _is_iast_enabled(): return True -# Used to cache the compiled regular expression -_SOURCE_NAME_SCRUB = None -_SOURCE_VALUE_SCRUB = None -_SOURCE_NUMERAL_SCRUB = None - - -def _has_to_scrub(s): # type: (str) -> bool - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - global _SOURCE_NAME_SCRUB - global _SOURCE_VALUE_SCRUB - global _SOURCE_NUMERAL_SCRUB - - if _SOURCE_NAME_SCRUB is None: - _SOURCE_NAME_SCRUB = re.compile(asm_config._iast_redaction_name_pattern) - _SOURCE_VALUE_SCRUB = re.compile(asm_config._iast_redaction_value_pattern) - _SOURCE_NUMERAL_SCRUB = re.compile(asm_config._iast_redaction_numeral_pattern) - - return ( - _SOURCE_NAME_SCRUB.match(s) is not None - or _SOURCE_VALUE_SCRUB.match(s) is not None - or _SOURCE_NUMERAL_SCRUB.match(s) is not None - ) - - -def _is_numeric(s): - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - global _SOURCE_NUMERAL_SCRUB - - if _SOURCE_NUMERAL_SCRUB is None: - _SOURCE_NUMERAL_SCRUB = re.compile(asm_config._iast_redaction_numeral_pattern) - - return _SOURCE_NUMERAL_SCRUB.match(s) is not None - - -_REPLACEMENTS = string.ascii_letters -_LEN_REPLACEMENTS = len(_REPLACEMENTS) - - -def _scrub(s, has_range=False): # type: (str, bool) -> str - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - if has_range: - return "".join([_REPLACEMENTS[i % _LEN_REPLACEMENTS] for i in range(len(s))]) - return "*" * len(s) - - -def _is_evidence_value_parts(value): # type: (Any) -> bool - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - return isinstance(value, (set, list)) - - -def _scrub_get_tokens_positions(text, tokens): - # type: (str, Set[str]) -> List[Tuple[int, int]] - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - token_positions = [] - - for token in tokens: - position = text.find(token) - if position != -1: - token_positions.append((position, position + len(token))) - - token_positions.sort() - return token_positions +def _get_source_index(sources: List, source) -> int: + i = 0 + for source_ in sources: + if hash(source_) == hash(source): + return i + i += 1 + return -1 def _get_patched_code(module_path, module_name): # type: (str, str) -> str diff --git a/ddtrace/appsec/_iast/constants.py b/ddtrace/appsec/_iast/constants.py index 17981bccbcc..60c864f59ec 100644 --- a/ddtrace/appsec/_iast/constants.py +++ b/ddtrace/appsec/_iast/constants.py @@ -16,15 +16,6 @@ VULNERABILITY_TOKEN_TYPE = Dict[int, Dict[str, Any]] -EVIDENCE_ALGORITHM_TYPE = "ALGORITHM" -EVIDENCE_SQL_INJECTION = "SQL_INJECTION" -EVIDENCE_PATH_TRAVERSAL = "PATH_TRAVERSAL" -EVIDENCE_WEAK_RANDOMNESS = "WEAK_RANDOMNESS" -EVIDENCE_COOKIE = "COOKIE" -EVIDENCE_CMDI = "COMMAND" -EVIDENCE_HEADER_INJECTION = "HEADER_INJECTION" -EVIDENCE_SSRF = "SSRF" - HEADER_NAME_VALUE_SEPARATOR = ": " MD5_DEF = "md5" @@ -91,3 +82,8 @@ "tarfile": {"open"}, "zipfile": {"ZipFile"}, } +DBAPI_SQLITE = "sqlite" +DBAPI_PSYCOPG = "psycopg" +DBAPI_MYSQL = "mysql" +DBAPI_MARIADB = "mariadb" +DBAPI_INTEGRATIONS = (DBAPI_SQLITE, DBAPI_PSYCOPG, DBAPI_MYSQL, DBAPI_MARIADB) diff --git a/ddtrace/appsec/_iast/reporter.py b/ddtrace/appsec/_iast/reporter.py index fa2cc8ae96c..90d334277bd 100644 --- a/ddtrace/appsec/_iast/reporter.py +++ b/ddtrace/appsec/_iast/reporter.py @@ -13,6 +13,7 @@ import attr from ddtrace.appsec._iast._evidence_redaction import sensitive_handler +from ddtrace.appsec._iast._utils import _get_source_index from ddtrace.appsec._iast.constants import VULN_INSECURE_HASHING_TYPE from ddtrace.appsec._iast.constants import VULN_WEAK_CIPHER_TYPE from ddtrace.appsec._iast.constants import VULN_WEAK_RANDOMNESS @@ -26,8 +27,12 @@ def _only_if_true(value): return value if value else None +ATTRS_TO_SKIP = frozenset({"_ranges", "_evidences_with_no_sources", "dialect"}) + + @attr.s(eq=False, hash=False) class Evidence(object): + dialect = attr.ib(type=str, default=None) # type: Optional[str] value = attr.ib(type=str, default=None) # type: Optional[str] _ranges = attr.ib(type=dict, default={}) # type: Any valueParts = attr.ib(type=list, default=None) # type: Any @@ -143,14 +148,6 @@ def add_ranges_to_evidence_and_extract_sources(self, vuln): if source not in self.sources: self.sources = self.sources + [source] - def _get_source_index(self, sources: List[Source], source: Source) -> int: - i = 0 - for source_ in sources: - if hash(source_) == hash(source): - return i - i += 1 - return -1 - def build_and_scrub_value_parts(self) -> Dict[str, Any]: """ Builds and scrubs value parts of vulnerabilities. @@ -197,7 +194,7 @@ def get_unredacted_value_parts(self, evidence_value: str, ranges: List[dict], so if from_index < range_["start"]: value_parts.append({"value": evidence_value[from_index : range_["start"]]}) - source_index = self._get_source_index(sources, range_["source"]) + source_index = _get_source_index(sources, range_["source"]) value_parts.append( {"value": evidence_value[range_["start"] : range_["end"]], "source": source_index} # type: ignore[dict-item] @@ -217,7 +214,10 @@ def _to_dict(self) -> Dict[str, Any]: Returns: - Dict[str, Any]: Dictionary representation of the IAST span reporter. """ - return attr.asdict(self, filter=lambda attr, x: x is not None and attr.name != "_ranges") + return attr.asdict( + self, + filter=lambda attr, x: x is not None and attr.name not in ATTRS_TO_SKIP, + ) def _to_str(self) -> str: """ diff --git a/ddtrace/appsec/_iast/taint_sinks/_base.py b/ddtrace/appsec/_iast/taint_sinks/_base.py index 215e2abb208..50e025f393c 100644 --- a/ddtrace/appsec/_iast/taint_sinks/_base.py +++ b/ddtrace/appsec/_iast/taint_sinks/_base.py @@ -1,20 +1,18 @@ import os -from typing import TYPE_CHECKING # noqa:F401 -from typing import cast # noqa:F401 +from typing import Any +from typing import Callable +from typing import Optional +from typing import Text from ddtrace import tracer from ddtrace.appsec._constants import IAST from ddtrace.internal import core from ddtrace.internal.logger import get_logger from ddtrace.internal.utils.cache import LFUCache -from ddtrace.settings.asm import config as asm_config from ..._deduplications import deduplication from .._overhead_control_engine import Operation from .._stacktrace import get_info_frame -from .._utils import _has_to_scrub -from .._utils import _is_evidence_value_parts -from .._utils import _scrub from ..processor import AppSecIastSpanProcessor from ..reporter import Evidence from ..reporter import IastSpanReporter @@ -22,16 +20,6 @@ from ..reporter import Vulnerability -if TYPE_CHECKING: # pragma: no cover - from typing import Any # noqa:F401 - from typing import Callable # noqa:F401 - from typing import Dict # noqa:F401 - from typing import List # noqa:F401 - from typing import Optional # noqa:F401 - from typing import Set # noqa:F401 - from typing import Text # noqa:F401 - from typing import Union # noqa:F401 - log = get_logger(__name__) CWD = os.path.abspath(os.getcwd()) @@ -39,8 +27,8 @@ class taint_sink_deduplication(deduplication): def _extract(self, args): - # we skip 0, 1 and last position because its the cls, span and sources respectively - return args[2:-1] + # We skip positions 0 and 1 because they represent the 'cls' and 'span' respectively + return args[2:] def _check_positions_contained(needle, container): @@ -57,7 +45,6 @@ def _check_positions_contained(needle, container): class VulnerabilityBase(Operation): vulnerability_type = "" - evidence_type = "" _redacted_report_cache = LFUCache() @classmethod @@ -66,10 +53,8 @@ def _reset_cache_for_testing(cls): cls._redacted_report_cache.clear() @classmethod - def wrap(cls, func): - # type: (Callable) -> Callable - def wrapper(wrapped, instance, args, kwargs): - # type: (Callable, Any, Any, Any) -> Any + def wrap(cls, func: Callable) -> Callable: + def wrapper(wrapped: Callable, instance: Any, args: Any, kwargs: Any) -> Any: """Get the current root Span and attach it to the wrapped function. We need the span to report the vulnerability and update the context with the report information. """ @@ -83,7 +68,7 @@ def wrapper(wrapped, instance, args, kwargs): @classmethod @taint_sink_deduplication - def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_number, sources): + def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_number): if line_number is not None and (line_number == 0 or line_number < -1): line_number = -1 @@ -99,19 +84,12 @@ def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_num report = IastSpanReporter(vulnerabilities={vulnerability}) report.add_ranges_to_evidence_and_extract_sources(vulnerability) - if getattr(cls, "redact_report", False): - redacted_report = cls._redacted_report_cache.get( - hash(report), lambda x: cls._redact_report(cast(IastSpanReporter, report)) - ) - else: - redacted_report = report - core.set_item(IAST.CONTEXT_KEY, redacted_report, span=span) + core.set_item(IAST.CONTEXT_KEY, report, span=span) return True @classmethod - def report(cls, evidence_value="", value_parts=None, sources=None): - # type: (Any, Any, Optional[List[Any]]) -> None + def report(cls, evidence_value: Text = "", dialect: Optional[Text] = None) -> None: """Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON""" # TODO: type of evidence_value will be Text. We wait to finish the redaction refactor. if cls.acquire_quota(): @@ -146,122 +124,15 @@ def report(cls, evidence_value="", value_parts=None, sources=None): if not cls.is_not_reported(file_name, line_number): return - - # TODO: This function is deprecated, but we need to migrate all vulnerabilities first before deleting it - if _is_evidence_value_parts(evidence_value) or _is_evidence_value_parts(value_parts): - evidence = Evidence(value=evidence_value, valueParts=value_parts) # Evidence is a string in weak cipher, weak hash and weak randomness - elif isinstance(evidence_value, (str, bytes, bytearray)): - evidence = Evidence(value=evidence_value) # type: ignore + if isinstance(evidence_value, (str, bytes, bytearray)): + evidence = Evidence(value=evidence_value, dialect=dialect) else: log.debug("Unexpected evidence_value type: %s", type(evidence_value)) - evidence = Evidence(value="") + evidence = Evidence(value="", dialect=dialect) - result = cls._prepare_report(span, cls.vulnerability_type, evidence, file_name, line_number, sources) + result = cls._prepare_report(span, cls.vulnerability_type, evidence, file_name, line_number) # If result is None that's mean deduplication raises and no vulnerability wasn't reported, with that, # we need to restore the quota if not result: cls.increment_quota() - - @classmethod - def _extract_sensitive_tokens(cls, report): - # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - log.debug("Base class VulnerabilityBase._extract_sensitive_tokens called") - return {} - - @classmethod - def _get_vulnerability_text(cls, vulnerability): - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - if vulnerability and vulnerability.evidence.value is not None: - return vulnerability.evidence.value - - if vulnerability.evidence.valueParts is not None: - return "".join( - [ - (part.get("value", "") if type(part) is not str else part) - for part in vulnerability.evidence.valueParts - ] - ) - - return "" - - @classmethod - def replace_tokens( - cls, - vuln, - vulns_to_tokens, - has_range=False, - ): - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - ret = vuln.evidence.value - replaced = False - - for token in vulns_to_tokens[hash(vuln)]["tokens"]: - ret = ret.replace(token, _scrub(token, has_range)) - replaced = True - - return ret, replaced - - @classmethod - def _custom_edit_valueparts(cls, vuln): - # Subclasses could optionally implement this to add further processing to the - # vulnerability valueParts - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - return - - @classmethod - def _redact_report(cls, report): # type: (IastSpanReporter) -> IastSpanReporter - # TODO: This function is deprecated. - # Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - if not asm_config._iast_redaction_enabled: - return report - - # See if there is a match on either any of the sources or value parts of the report - already_scrubbed = {} - - sources_values_to_scrubbed = {} - vulns_to_text = {vuln: cls._get_vulnerability_text(vuln) for vuln in report.vulnerabilities} - vulns_to_tokens = cls._extract_sensitive_tokens(vulns_to_text) - - for source in report.sources: - # Join them so we only run the regexps once for each source - # joined_fields = "%s%s" % (source.name, source.value) - if _has_to_scrub(source.name) or _has_to_scrub(source.value): # type: ignore - scrubbed = _scrub(source.value, has_range=True) # type: ignore - already_scrubbed[source.value] = scrubbed - source.redacted = True - sources_values_to_scrubbed[source.value] = scrubbed - source.pattern = scrubbed - source.value = None - - already_scrubbed_set = set(already_scrubbed.keys()) - for vuln in report.vulnerabilities: - if vuln.evidence.value is not None: - pattern, replaced = cls.replace_tokens(vuln, vulns_to_tokens, hasattr(vuln.evidence.value, "source")) - if replaced: - vuln.evidence.value = None - - if vuln.evidence.valueParts is None: - continue - for part in vuln.evidence.valueParts: - part_value = part.get("value") - if not part_value: - continue - - if part_value in already_scrubbed_set: - part["pattern"] = already_scrubbed[part["value"]] - part["redacted"] = True - del part["value"] - - cls._custom_edit_valueparts(vuln) - return report diff --git a/ddtrace/appsec/_iast/taint_sinks/command_injection.py b/ddtrace/appsec/_iast/taint_sinks/command_injection.py index 8f123a2be4c..c95d99567cd 100644 --- a/ddtrace/appsec/_iast/taint_sinks/command_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/command_injection.py @@ -74,9 +74,6 @@ def _iast_cmdi_subprocess_init(wrapped, instance, args, kwargs): @oce.register class CommandInjection(VulnerabilityBase): vulnerability_type = VULN_CMDI - # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - redact_report = False def _iast_report_cmdi(shell_args: Union[str, List[str]]) -> None: diff --git a/ddtrace/appsec/_iast/taint_sinks/header_injection.py b/ddtrace/appsec/_iast/taint_sinks/header_injection.py index 1ce8a52d5e4..7fa0b6111dd 100644 --- a/ddtrace/appsec/_iast/taint_sinks/header_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/header_injection.py @@ -1,5 +1,3 @@ -import re - from ddtrace.internal.logger import get_logger from ddtrace.settings.asm import config as asm_config @@ -19,15 +17,6 @@ log = get_logger(__name__) -_HEADERS_NAME_REGEXP = re.compile( - r"(?:p(?:ass)?w(?:or)?d|pass(?:_?phrase)?|secret|(?:api_?|private_?|public_?|access_?|secret_?)key(?:_?id)?|token|consumer_?(?:id|key|secret)|sign(?:ed|ature)?|auth(?:entication|orization)?)", - re.IGNORECASE, -) -_HEADERS_VALUE_REGEXP = re.compile( - r"(?:bearer\\s+[a-z0-9\\._\\-]+|glpat-[\\w\\-]{20}|gh[opsu]_[0-9a-zA-Z]{36}|ey[I-L][\\w=\\-]+\\.ey[I-L][\\w=\\-]+(?:\\.[\\w.+/=\\-]+)?|(?:[\\-]{5}BEGIN[a-z\\s]+PRIVATE\\sKEY[\\-]{5}[^\\-]+[\\-]{5}END[a-z\\s]+PRIVATE\\sKEY[\\-]{5}|ssh-rsa\\s*[a-z0-9/\\.+]{100,}))", - re.IGNORECASE, -) - def get_version(): # type: () -> str @@ -103,9 +92,6 @@ def _iast_h(wrapped, instance, args, kwargs): @oce.register class HeaderInjection(VulnerabilityBase): vulnerability_type = VULN_HEADER_INJECTION - # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - redact_report = False def _iast_report_header_injection(headers_args) -> None: diff --git a/ddtrace/appsec/_iast/taint_sinks/insecure_cookie.py b/ddtrace/appsec/_iast/taint_sinks/insecure_cookie.py index bb8147760a7..e188e2ecbe4 100644 --- a/ddtrace/appsec/_iast/taint_sinks/insecure_cookie.py +++ b/ddtrace/appsec/_iast/taint_sinks/insecure_cookie.py @@ -4,7 +4,6 @@ from .. import oce from .._metrics import _set_metric_iast_executed_sink from .._metrics import increment_iast_span_metric -from ..constants import EVIDENCE_COOKIE from ..constants import VULN_INSECURE_COOKIE from ..constants import VULN_NO_HTTPONLY_COOKIE from ..constants import VULN_NO_SAMESITE_COOKIE @@ -19,7 +18,6 @@ @oce.register class InsecureCookie(VulnerabilityBase): vulnerability_type = VULN_INSECURE_COOKIE - evidence_type = EVIDENCE_COOKIE scrub_evidence = False skip_location = True @@ -27,14 +25,12 @@ class InsecureCookie(VulnerabilityBase): @oce.register class NoHttpOnlyCookie(VulnerabilityBase): vulnerability_type = VULN_NO_HTTPONLY_COOKIE - evidence_type = EVIDENCE_COOKIE skip_location = True @oce.register class NoSameSite(VulnerabilityBase): vulnerability_type = VULN_NO_SAMESITE_COOKIE - evidence_type = EVIDENCE_COOKIE skip_location = True diff --git a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py index 68d5a289c01..f671d92c387 100644 --- a/ddtrace/appsec/_iast/taint_sinks/sql_injection.py +++ b/ddtrace/appsec/_iast/taint_sinks/sql_injection.py @@ -1,172 +1,8 @@ -import re -from typing import TYPE_CHECKING # noqa:F401 - from .. import oce -from .._taint_tracking import taint_ranges_as_evidence_info -from .._utils import _has_to_scrub -from .._utils import _is_numeric -from .._utils import _scrub_get_tokens_positions -from ..constants import EVIDENCE_SQL_INJECTION from ..constants import VULN_SQL_INJECTION from ._base import VulnerabilityBase -if TYPE_CHECKING: - from typing import Any # noqa:F401 - from typing import Dict # noqa:F401 - - from .reporter import Vulnerability # noqa:F401 - -from sqlparse import parse -from sqlparse import tokens - - -_TEXT_TOKENS_REGEXP = re.compile(r"\b\w+\b") - - @oce.register class SqlInjection(VulnerabilityBase): vulnerability_type = VULN_SQL_INJECTION - evidence_type = EVIDENCE_SQL_INJECTION - redact_report = True - - @classmethod - def report(cls, evidence_value=None, sources=None): - value_parts = [] - if isinstance(evidence_value, (str, bytes, bytearray)): - value_parts, sources = taint_ranges_as_evidence_info(evidence_value) - super(SqlInjection, cls).report(evidence_value=evidence_value, value_parts=value_parts, sources=sources) - - @classmethod - def _extract_sensitive_tokens(cls, vulns_to_text): - # type: (Dict[Vulnerability, str]) -> Dict[int, Dict[str, Any]] - ret = {} # type: Dict[int, Dict[str, Any]] - for vuln, text in vulns_to_text.items(): - vuln_hash = hash(vuln) - ret[vuln_hash] = { - "tokens": set(_TEXT_TOKENS_REGEXP.findall(text)), - } - ret[vuln_hash]["token_positions"] = _scrub_get_tokens_positions(text, ret[vuln_hash]["tokens"]) - - return ret - - @classmethod - def _custom_edit_valueparts(cls, vuln): - def _maybe_with_source(source, value): - if source is not None: - return {"value": value, "source": source} - return {"value": value} - - new_valueparts = [] - - in_singleline_comment = False - - for part in vuln.evidence.valueParts: - source = part.get("source") - value = part.get("value") - - if not value or part.get("redacted"): - new_valueparts.append(part) - continue - - parsed = parse(value)[0].flatten() - out = [] - - for item in parsed: - if item.ttype == tokens.Whitespace.Newline: - in_singleline_comment = False - - elif in_singleline_comment: - # Skip all tokens after a -- comment until newline - continue - - if item.ttype in { - tokens.Literal.String.Single, - tokens.Literal.String.Double, - tokens.Literal.String.Symbol, - tokens.Literal.Number.Integer, - tokens.Literal.Number.Float, - tokens.Literal.Number.Hexadecimal, - tokens.Comment.Single, - tokens.Comment.Multiline, - tokens.Name, - }: - redact_fully = False - add_later = None - sitem = str(item) - - if _is_numeric(sitem): - redact_fully = True - elif item.ttype == tokens.Literal.String.Single or ( - item.ttype == tokens.Literal.String.Symbol and "'" in str(item) - ): - out.append("'") - add_later = "'" - str_item = sitem.replace("'", "") - if _is_numeric(str_item): - redact_fully = True - elif item.ttype == tokens.Literal.String.Double or ( - item.ttype == tokens.Literal.String.Symbol and '"' in str(item) - ): - out.append('"') - add_later = '"' - str_item = sitem.replace('"', "") - if _is_numeric(str_item): - redact_fully = True - elif item.ttype == tokens.Comment.Single: - out.append("--") - add_later = "" - redact_fully = True - in_singleline_comment = True - elif item.ttype == tokens.Comment.Multiline: - out.append("/*") - add_later = "*/" - redact_fully = True - elif item.ttype in (tokens.Number.Integer, tokens.Number.Float, tokens.Number.Hexadecimal): - redact_fully = True - else: - out.append(sitem) - continue - - if len(out): - new_valueparts.append(_maybe_with_source(source, "".join(out))) - - if redact_fully: - # Comments are totally redacted - new_valueparts.append({"redacted": True}) - else: - new_valueparts.append(_maybe_with_source(source, str_item)) - - if add_later: - out = [add_later] - else: - out = [] - else: - out.append(str(item)) - - if len(out): - new_valueparts.append(_maybe_with_source(source, "".join(out))) - - # Scrub as needed - idx = 0 - len_parts = len(new_valueparts) - while idx < len_parts: - value = new_valueparts[idx].get("value") - if value and _has_to_scrub(value) and idx < (len_parts - 1) and "redacted" not in new_valueparts[idx + 1]: - # Scrub the value, which is the next one, except when the previous was a LIKE or an assignment - # in which case this is the value to scrub - prev_valuepart = new_valueparts[idx - 1].get("value", "").strip().lower() - if len(prev_valuepart) and (" like " in prev_valuepart or prev_valuepart[-1] == "="): - new_valueparts[idx] = {"redacted": True} - else: # scrub the next non empty quote value - for part in new_valueparts[idx + 1 :]: - idx += 1 - next_valuepart = part.get("value", "").strip() - if not len(next_valuepart) or next_valuepart in ("'", '"'): - continue - - new_valueparts[idx] = {"redacted": True} - break - idx += 1 - - vuln.evidence.valueParts = new_valueparts diff --git a/ddtrace/appsec/_iast/taint_sinks/ssrf.py b/ddtrace/appsec/_iast/taint_sinks/ssrf.py index b6d1300de7f..40b0f2fdb5e 100644 --- a/ddtrace/appsec/_iast/taint_sinks/ssrf.py +++ b/ddtrace/appsec/_iast/taint_sinks/ssrf.py @@ -16,9 +16,6 @@ @oce.register class SSRF(VulnerabilityBase): vulnerability_type = VULN_SSRF - # TODO: Redaction migrated to `ddtrace.appsec._iast._evidence_redaction._sensitive_handler` but we need to migrate - # all vulnerabilities to use it first. - redact_report = False def _iast_report_ssrf(func: Callable, *args, **kwargs): diff --git a/ddtrace/appsec/_iast/taint_sinks/weak_cipher.py b/ddtrace/appsec/_iast/taint_sinks/weak_cipher.py index 3199528ef03..21a494edf3b 100644 --- a/ddtrace/appsec/_iast/taint_sinks/weak_cipher.py +++ b/ddtrace/appsec/_iast/taint_sinks/weak_cipher.py @@ -15,7 +15,6 @@ from ..constants import BLOWFISH_DEF from ..constants import DEFAULT_WEAK_CIPHER_ALGORITHMS from ..constants import DES_DEF -from ..constants import EVIDENCE_ALGORITHM_TYPE from ..constants import RC2_DEF from ..constants import RC4_DEF from ..constants import VULN_WEAK_CIPHER_TYPE @@ -44,7 +43,6 @@ def get_weak_cipher_algorithms(): @oce.register class WeakCipher(VulnerabilityBase): vulnerability_type = VULN_WEAK_CIPHER_TYPE - evidence_type = EVIDENCE_ALGORITHM_TYPE def unpatch_iast(): diff --git a/ddtrace/appsec/_iast/taint_sinks/weak_hash.py b/ddtrace/appsec/_iast/taint_sinks/weak_hash.py index 9bebaf805be..0932cc9fb05 100644 --- a/ddtrace/appsec/_iast/taint_sinks/weak_hash.py +++ b/ddtrace/appsec/_iast/taint_sinks/weak_hash.py @@ -14,7 +14,6 @@ from .._patch import try_unwrap from .._patch import try_wrap_function_wrapper from ..constants import DEFAULT_WEAK_HASH_ALGORITHMS -from ..constants import EVIDENCE_ALGORITHM_TYPE from ..constants import MD5_DEF from ..constants import SHA1_DEF from ..constants import VULN_INSECURE_HASHING_TYPE @@ -42,7 +41,6 @@ def get_weak_hash_algorithms(): @oce.register class WeakHash(VulnerabilityBase): vulnerability_type = VULN_INSECURE_HASHING_TYPE - evidence_type = EVIDENCE_ALGORITHM_TYPE def unpatch_iast(): diff --git a/ddtrace/appsec/_iast/taint_sinks/weak_randomness.py b/ddtrace/appsec/_iast/taint_sinks/weak_randomness.py index bd7fc6e5051..a9fc130b39f 100644 --- a/ddtrace/appsec/_iast/taint_sinks/weak_randomness.py +++ b/ddtrace/appsec/_iast/taint_sinks/weak_randomness.py @@ -1,5 +1,4 @@ from .. import oce -from ..constants import EVIDENCE_WEAK_RANDOMNESS from ..constants import VULN_WEAK_RANDOMNESS from ._base import VulnerabilityBase @@ -7,7 +6,6 @@ @oce.register class WeakRandomness(VulnerabilityBase): vulnerability_type = VULN_WEAK_RANDOMNESS - evidence_type = EVIDENCE_WEAK_RANDOMNESS @classmethod def report(cls, evidence_value=None, sources=None): diff --git a/ddtrace/contrib/dbapi/__init__.py b/ddtrace/contrib/dbapi/__init__.py index 3fb5d6cbe1c..b0e4d2aec81 100644 --- a/ddtrace/contrib/dbapi/__init__.py +++ b/ddtrace/contrib/dbapi/__init__.py @@ -105,13 +105,13 @@ def _trace_method(self, method, name, resource, extra_tags, dbm_propagator, *arg if _is_iast_enabled(): try: from ddtrace.appsec._iast._metrics import _set_metric_iast_executed_sink - from ddtrace.appsec._iast._taint_utils import check_tainted_args + from ddtrace.appsec._iast._taint_utils import check_tainted_dbapi_args from ddtrace.appsec._iast.taint_sinks.sql_injection import SqlInjection increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SqlInjection.vulnerability_type) _set_metric_iast_executed_sink(SqlInjection.vulnerability_type) - if check_tainted_args(args, kwargs, pin.tracer, self._self_config.integration_name, method): - SqlInjection.report(evidence_value=args[0]) + if check_tainted_dbapi_args(args, kwargs, pin.tracer, self._self_config.integration_name, method): + SqlInjection.report(evidence_value=args[0], dialect=self._self_config.integration_name) except Exception: log.debug("Unexpected exception while reporting vulnerability", exc_info=True) diff --git a/ddtrace/contrib/dbapi_async/__init__.py b/ddtrace/contrib/dbapi_async/__init__.py index c37638fdf67..abbae3b77a3 100644 --- a/ddtrace/contrib/dbapi_async/__init__.py +++ b/ddtrace/contrib/dbapi_async/__init__.py @@ -77,13 +77,13 @@ async def _trace_method(self, method, name, resource, extra_tags, dbm_propagator if _is_iast_enabled(): from ddtrace.appsec._iast._metrics import _set_metric_iast_executed_sink - from ddtrace.appsec._iast._taint_utils import check_tainted_args + from ddtrace.appsec._iast._taint_utils import check_tainted_dbapi_args from ddtrace.appsec._iast.taint_sinks.sql_injection import SqlInjection increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, SqlInjection.vulnerability_type) _set_metric_iast_executed_sink(SqlInjection.vulnerability_type) - if check_tainted_args(args, kwargs, pin.tracer, self._self_config.integration_name, method): - SqlInjection.report(evidence_value=args[0]) + if check_tainted_dbapi_args(args, kwargs, pin.tracer, self._self_config.integration_name, method): + SqlInjection.report(evidence_value=args[0], dialect=self._self_config.integration_name) # set analytics sample rate if enabled but only for non-FetchTracedCursor if not isinstance(self, FetchTracedAsyncCursor): diff --git a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py index db9272e1625..6861d28edbf 100644 --- a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py @@ -1,14 +1,17 @@ +from mock.mock import ANY import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import OriginType from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_HEADER_INJECTION from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.header_injection import HeaderInjection from ddtrace.internal import core @@ -24,20 +27,25 @@ ], ) def test_header_injection_redact_excluded(header_name, header_value): - ev = Evidence( - valueParts=[ - {"value": header_name + ": "}, - {"value": header_value, "source": 0}, - ] - ) + header_value_tainted = taint_pyobject(pyobject=header_value, source_name="SomeName", source_value=header_value) + ev = Evidence(value=add_aspect(header_name, add_aspect(": ", header_value_tainted))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value=header_value) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) report.add_ranges_to_evidence_and_extract_sources(v) - redacted_report = HeaderInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [{"value": header_name + ": "}, {"source": 0, "value": header_value}] + result = report.build_and_scrub_value_parts() + + assert result == { + "sources": [{"name": "SomeName", "origin": OriginType.PARAMETER, "value": header_value}], + "vulnerabilities": [ + { + "evidence": {"valueParts": [{"value": header_name + ": "}, {"source": 0, "value": header_value}]}, + "hash": ANY, + "location": {"line": ANY, "path": "foobar.py", "spanId": ANY}, + "type": VULN_HEADER_INJECTION, + } + ], + } @pytest.mark.parametrize( @@ -46,7 +54,7 @@ def test_header_injection_redact_excluded(header_name, header_value): ( "WWW-Authenticate", 'Basic realm="api"', - [{"value": "WWW-Authenticate: "}, {"source": 0, "value": 'Basic realm="api"'}], + [{"value": "WWW-Authenticate: "}, {"pattern": "abcdefghijklmnopq", "redacted": True, "source": 0}], ), ( "Authorization", @@ -63,20 +71,25 @@ def test_header_injection_redact_excluded(header_name, header_value): ], ) def test_common_django_header_injection_redact(header_name, header_value, value_part): - ev = Evidence( - valueParts=[ - {"value": header_name + ": "}, - {"value": header_value, "source": 0}, - ] - ) + header_value_tainted = taint_pyobject(pyobject=header_value, source_name="SomeName", source_value=header_value) + ev = Evidence(value=add_aspect(header_name, add_aspect(": ", header_value_tainted))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_HEADER_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value=header_value) - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) report.add_ranges_to_evidence_and_extract_sources(v) - redacted_report = HeaderInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == value_part + result = report.build_and_scrub_value_parts() + + assert result == { + "sources": [{"name": "SomeName", "origin": OriginType.PARAMETER, "pattern": ANY, "redacted": True}], + "vulnerabilities": [ + { + "evidence": {"valueParts": value_part}, + "hash": ANY, + "location": {"line": ANY, "path": "foobar.py", "spanId": ANY}, + "type": VULN_HEADER_INJECTION, + } + ], + } @pytest.mark.parametrize( diff --git a/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py b/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py index ccd88c0ce11..aacaae0a156 100644 --- a/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py @@ -1,14 +1,15 @@ import os +from mock.mock import ANY import pytest +from ddtrace.appsec._iast._taint_tracking import OriginType +from ddtrace.appsec._iast._taint_tracking import taint_pyobject from ddtrace.appsec._iast.constants import VULN_PATH_TRAVERSAL from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability -from ddtrace.appsec._iast.taint_sinks.path_traversal import PathTraversal ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -29,19 +30,25 @@ ], ) def test_path_traversal_redact_exclude(file_path): - ev = Evidence( - valueParts=[ - {"value": file_path, "source": 0}, - ] - ) + file_path = taint_pyobject(pyobject=file_path, source_name="path_traversal", source_value=file_path) + ev = Evidence(value=file_path) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_PATH_TRAVERSAL, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = PathTraversal._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [{"source": 0, "value": file_path}] + assert result == { + "sources": [{"name": "path_traversal", "origin": OriginType.PARAMETER, "value": file_path}], + "vulnerabilities": [ + { + "evidence": {"valueParts": [{"source": 0, "value": file_path}]}, + "hash": ANY, + "location": {"line": ANY, "path": "foobar.py", "spanId": ANY}, + "type": VULN_PATH_TRAVERSAL, + } + ], + } @pytest.mark.parametrize( @@ -75,33 +82,45 @@ def test_path_traversal_redact_exclude(file_path): ], ) def test_path_traversal_redact_rel_paths(file_path): - ev = Evidence( - valueParts=[ - {"value": file_path, "source": 0}, - ] - ) + file_path = taint_pyobject(pyobject=file_path, source_name="path_traversal", source_value=file_path) + ev = Evidence(value=file_path) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_PATH_TRAVERSAL, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = PathTraversal._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [{"source": 0, "value": file_path}] + assert result == { + "sources": [{"name": "path_traversal", "origin": OriginType.PARAMETER, "value": file_path}], + "vulnerabilities": [ + { + "evidence": {"valueParts": [{"source": 0, "value": file_path}]}, + "hash": ANY, + "location": {"line": ANY, "path": "foobar.py", "spanId": ANY}, + "type": VULN_PATH_TRAVERSAL, + } + ], + } def test_path_traversal_redact_abs_paths(): file_path = os.path.join(ROOT_DIR, "../fixtures", "taint_sinks", "path_traversal_test_file.txt") - ev = Evidence( - valueParts=[ - {"value": file_path, "source": 0}, - ] - ) + file_path = taint_pyobject(pyobject=file_path, source_name="path_traversal", source_value=file_path) + ev = Evidence(value=file_path) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_PATH_TRAVERSAL, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = PathTraversal._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [{"source": 0, "value": file_path}] + assert result == { + "sources": [{"name": "path_traversal", "origin": OriginType.PARAMETER, "value": file_path}], + "vulnerabilities": [ + { + "evidence": {"valueParts": [{"source": 0, "value": file_path}]}, + "hash": ANY, + "location": {"line": ANY, "path": "foobar.py", "spanId": ANY}, + "type": VULN_PATH_TRAVERSAL, + } + ], + } diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection.py b/tests/appsec/iast/taint_sinks/test_sql_injection.py index 54efea82ffe..85f0e8e123e 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection.py @@ -41,26 +41,25 @@ def test_sql_injection(fixture_path, fixture_module, iast_span_defaults): mod.sqli_simple(table) span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - - assert len(span_report.vulnerabilities) == 1 - vulnerability = list(span_report.vulnerabilities)[0] - source = span_report.sources[0] - assert vulnerability.type == VULN_SQL_INJECTION - assert vulnerability.evidence.valueParts == [ + data = span_report.build_and_scrub_value_parts() + vulnerability = data["vulnerabilities"][0] + source = data["sources"][0] + assert vulnerability["type"] == VULN_SQL_INJECTION + assert vulnerability["evidence"]["valueParts"] == [ {"value": "SELECT "}, {"redacted": True}, {"value": " FROM "}, {"value": "students", "source": 0}, ] - assert vulnerability.evidence.value is None - assert source.name == "test_ossystem" - assert source.origin == OriginType.PARAMETER - assert source.value == "students" + assert "value" not in vulnerability["evidence"].keys() + assert source["name"] == "test_ossystem" + assert source["origin"] == OriginType.PARAMETER + assert source["value"] == "students" line, hash_value = get_line_and_hash("test_sql_injection", VULN_SQL_INJECTION, filename=fixture_path) - assert vulnerability.location.line == line - assert vulnerability.location.path == fixture_path - assert vulnerability.hash == hash_value + assert vulnerability["location"]["path"] == fixture_path + assert vulnerability["location"]["line"] == line + assert vulnerability["hash"] == hash_value @pytest.mark.parametrize("fixture_path,fixture_module", DDBBS) @@ -80,6 +79,6 @@ def test_sql_injection_deduplication(fixture_path, fixture_module, iast_span_ded span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_deduplication_enabled) assert span_report - - assert len(span_report.vulnerabilities) == 1 + data = span_report.build_and_scrub_value_parts() + assert len(data["vulnerabilities"]) == 1 VulnerabilityBase._prepare_report._reset_cache() diff --git a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py index 4122b53d402..a4d1da049f8 100644 --- a/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py @@ -1,13 +1,16 @@ import pytest from ddtrace.appsec._constants import IAST +from ddtrace.appsec._iast._taint_tracking import OriginType from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted +from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking import str_to_origin +from ddtrace.appsec._iast._taint_tracking import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect from ddtrace.appsec._iast.constants import VULN_SQL_INJECTION from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import IastSpanReporter from ddtrace.appsec._iast.reporter import Location -from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability from ddtrace.appsec._iast.taint_sinks.sql_injection import SqlInjection from ddtrace.internal import core @@ -18,33 +21,7 @@ # FIXME: ideally all these should pass, through the key is that we don't leak any potential PII -_ignore_list = { - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, # unsupported weird strings - 23, - 28, - 31, - 33, - 34, # difference in numerics parsing (e.g. sign in the previous valuepart) - 40, - 41, - 42, - 43, - 44, # overlapping ":string", not supported by sqlparser, - 45, - 46, - 47, - 49, - 50, - 51, - 52, # slight differences in sqlparser parsing -} +_ignore_list = {46, 47} @pytest.mark.parametrize( @@ -74,164 +51,212 @@ def test_sqli_redaction_suite(evidence_input, sources_expected, vulnerabilities_ span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults) assert span_report - vulnerability = list(span_report.vulnerabilities)[0] + span_report.build_and_scrub_value_parts() + result = span_report._to_dict() + vulnerability = list(result["vulnerabilities"])[0] + source = list(result["sources"])[0] + source["origin"] = origin_to_str(source["origin"]) - assert vulnerability.type == VULN_SQL_INJECTION - assert vulnerability.evidence.valueParts == vulnerabilities_expected["evidence"]["valueParts"] + assert vulnerability["type"] == VULN_SQL_INJECTION + assert source == sources_expected -@pytest.mark.skip(reason="TODO: Currently replacing too eagerly here") def test_redacted_report_no_match(): - ev = Evidence(value="SomeEvidenceValue") - orig_ev = ev.value + string_evicence = taint_pyobject( + pyobject="SomeEvidenceValue", source_name="source_name", source_value="SomeEvidenceValue" + ) + ev = Evidence(value=string_evicence) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == {"valueParts": [{"source": 0, "value": "SomeEvidenceValue"}]} - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert not v.evidence.redacted - assert v.evidence.value == orig_ev + for v in result["sources"]: + assert v == {"name": "source_name", "origin": OriginType.PARAMETER, "value": "SomeEvidenceValue"} def test_redacted_report_source_name_match(): - ev = Evidence(value="'SomeEvidenceValue'") + string_evicence = taint_pyobject(pyobject="'SomeEvidenceValue'", source_name="secret", source_value="SomeValue") + ev = Evidence(value=string_evicence) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="secret", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert not v.evidence.value + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == {"valueParts": [{"pattern": "*******************", "redacted": True, "source": 0}]} + + for v in result["sources"]: + assert v == {"name": "secret", "origin": OriginType.PARAMETER, "pattern": "abcdefghi", "redacted": True} def test_redacted_report_source_value_match(): - ev = Evidence(value="'SomeEvidenceValue'") + string_evicence = taint_pyobject( + pyobject="'SomeEvidenceValue'", source_name="SomeName", source_value="somepassword" + ) + ev = Evidence(value=string_evicence) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="somepassword") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert not v.evidence.value + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == {"valueParts": [{"pattern": "*******************", "redacted": True, "source": 0}]} + + for v in result["sources"]: + assert v == {"name": "SomeName", "origin": OriginType.PARAMETER, "pattern": "abcdefghijkl", "redacted": True} def test_redacted_report_evidence_value_match_also_redacts_source_value(): - ev = Evidence(value="'SomeSecretPassword'") + string_evicence = taint_pyobject( + pyobject="'SomeSecretPassword'", source_name="SomeName", source_value="SomeSecretPassword" + ) + ev = Evidence(value=string_evicence) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeSecretPassword") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert not v.evidence.value - for s in redacted_report.sources: - assert s.redacted - assert s.pattern == "abcdefghijklmnopqr" - assert not s.value + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == {"valueParts": [{"pattern": "********************", "redacted": True, "source": 0}]} + + for v in result["sources"]: + assert v == { + "name": "SomeName", + "origin": OriginType.PARAMETER, + "pattern": "abcdefghijklmnopqr", + "redacted": True, + } def test_redacted_report_valueparts(): - ev = Evidence( - valueParts=[ - {"value": "SELECT * FROM users WHERE password = '"}, - {"value": "1234", "source": 0}, - {"value": ":{SHA1}'"}, - ] - ) + string_evicence = taint_pyobject(pyobject="1234", source_name="SomeName", source_value="SomeValue") + + ev = Evidence(value=add_aspect("SELECT * FROM users WHERE password = '", add_aspect(string_evicence, ":{SHA1}'"))) loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == { + "valueParts": [ + {"value": "SELECT * FROM users WHERE password = '"}, + {"pattern": "****", "redacted": True, "source": 0}, + {"redacted": True}, + {"value": "'"}, + ] + } - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "SELECT * FROM users WHERE password = '"}, - {"redacted": True}, - {"value": ":{SHA1}'"}, - ] + for v in result["sources"]: + assert v == {"name": "SomeName", "origin": OriginType.PARAMETER, "pattern": "abcdefghi", "redacted": True} def test_redacted_report_valueparts_username_not_tainted(): - ev = Evidence( - valueParts=[ - {"value": "SELECT * FROM users WHERE username = '"}, - {"value": "pepito"}, - {"value": "' AND password = '"}, - {"value": "secret", "source": 0}, - {"value": "'"}, - ] + string_evicence = taint_pyobject(pyobject="secret", source_name="SomeName", source_value="SomeValue") + + string_tainted = add_aspect( + "SELECT * FROM users WHERE username = '", + add_aspect("pepito", add_aspect("' AND password = '", add_aspect(string_evicence, "'"))), ) + ev = Evidence(value=string_tainted, dialect="POSTGRES") loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) - - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "SELECT * FROM users WHERE username = '"}, - {"redacted": True}, - {"value": "'"}, - {"value": " AND password = "}, - {"value": "'"}, - {"redacted": True}, - {"value": "'"}, - ] + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == { + "valueParts": [ + {"value": "SELECT * FROM users WHERE username = '"}, + {"redacted": True}, + {"value": "' AND password = '"}, + {"pattern": "******", "redacted": True, "source": 0}, + {"value": "'"}, + ] + } + + for v in result["sources"]: + assert v == {"name": "SomeName", "origin": OriginType.PARAMETER, "pattern": "abcdefghi", "redacted": True} def test_redacted_report_valueparts_username_tainted(): - ev = Evidence( - valueParts=[ - {"value": "SELECT * FROM users WHERE username = '"}, - {"value": "pepito", "source": 0}, - {"value": "' AND password = '"}, - {"value": "secret", "source": 0}, - {"value": "'"}, - ] + string_evicence = taint_pyobject(pyobject="secret", source_name="SomeName", source_value="SomeValue") + + string_tainted = add_aspect( + "SELECT * FROM users WHERE username = '", + add_aspect(string_evicence, add_aspect("' AND password = '", add_aspect(string_evicence, "'"))), ) + ev = Evidence(value=string_tainted, dialect="POSTGRES") loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) - - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "SELECT * FROM users WHERE username = '"}, - {"redacted": True}, - {"value": "'"}, - {"value": " AND password = "}, - {"value": "'"}, - {"redacted": True}, - {"value": "'"}, - ] + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == { + "valueParts": [ + {"value": "SELECT * FROM users WHERE username = '"}, + {"pattern": "******", "redacted": True, "source": 0}, + {"value": "' AND password = '"}, + {"pattern": "******", "redacted": True, "source": 0}, + {"value": "'"}, + ] + } + + for v in result["sources"]: + assert v == {"name": "SomeName", "origin": OriginType.PARAMETER, "pattern": "abcdefghi", "redacted": True} def test_regression_ci_failure(): - ev = Evidence( - valueParts=[ - {"value": "SELECT tbl_name FROM sqlite_"}, - {"value": "master", "source": 0}, - {"value": "WHERE tbl_name LIKE 'password'"}, - ] + string_evicence = taint_pyobject(pyobject="master", source_name="SomeName", source_value="master") + + string_tainted = add_aspect( + "SELECT tbl_name FROM sqlite_", add_aspect(string_evicence, "WHERE tbl_name LIKE 'password'") ) + ev = Evidence(value=string_tainted, dialect="POSTGRES") loc = Location(path="foobar.py", line=35, spanId=123) v = Vulnerability(type=VULN_SQL_INJECTION, evidence=ev, location=loc) - s = Source(origin="SomeOrigin", name="SomeName", value="SomeValue") - report = IastSpanReporter([s], {v}) - - redacted_report = SqlInjection._redact_report(report) - for v in redacted_report.vulnerabilities: - assert v.evidence.valueParts == [ - {"value": "SELECT tbl_name FROM sqlite_"}, - {"source": 0, "value": "master"}, - {"value": "WHERE tbl_name LIKE '"}, - {"redacted": True}, - {"value": "'"}, - ] + report = IastSpanReporter(vulnerabilities={v}) + report.add_ranges_to_evidence_and_extract_sources(v) + result = report.build_and_scrub_value_parts() + + assert result["vulnerabilities"] + + for v in result["vulnerabilities"]: + assert v["evidence"] == { + "valueParts": [ + {"value": "SELECT tbl_name FROM sqlite_"}, + {"source": 0, "value": "master"}, + {"value": "WHERE tbl_name LIKE '"}, + {"redacted": True}, + {"value": "'"}, + ] + } + + for v in result["sources"]: + assert v == {"name": "SomeName", "origin": OriginType.PARAMETER, "value": "master"} diff --git a/tests/appsec/iast/test_taint_utils.py b/tests/appsec/iast/test_taint_utils.py index 9ccf6df4507..a60cc2c547a 100644 --- a/tests/appsec/iast/test_taint_utils.py +++ b/tests/appsec/iast/test_taint_utils.py @@ -9,7 +9,7 @@ from ddtrace.appsec._iast._taint_tracking import taint_pyobject from ddtrace.appsec._iast._taint_utils import LazyTaintDict from ddtrace.appsec._iast._taint_utils import LazyTaintList -from ddtrace.appsec._iast._taint_utils import check_tainted_args +from ddtrace.appsec._iast._taint_utils import check_tainted_dbapi_args def setup(): @@ -192,17 +192,17 @@ def test_checked_tainted_args(): untainted_arg = "gallahad the pure" # Returns False: Untainted first argument - assert not check_tainted_args( + assert not check_tainted_dbapi_args( args=(untainted_arg,), kwargs=None, tracer=None, integration_name="sqlite", method=cursor.execute ) # Returns False: Untainted first argument - assert not check_tainted_args( + assert not check_tainted_dbapi_args( args=(untainted_arg, tainted_arg), kwargs=None, tracer=None, integration_name="sqlite", method=cursor.execute ) # Returns False: Integration name not in list - assert not check_tainted_args( + assert not check_tainted_dbapi_args( args=(tainted_arg,), kwargs=None, tracer=None, @@ -211,7 +211,7 @@ def test_checked_tainted_args(): ) # Returns False: Wrong function name - assert not check_tainted_args( + assert not check_tainted_dbapi_args( args=(tainted_arg,), kwargs=None, tracer=None, @@ -220,17 +220,17 @@ def test_checked_tainted_args(): ) # Returns True: - assert check_tainted_args( + assert check_tainted_dbapi_args( args=(tainted_arg, untainted_arg), kwargs=None, tracer=None, integration_name="sqlite", method=cursor.execute ) # Returns True: - assert check_tainted_args( + assert check_tainted_dbapi_args( args=(tainted_arg, untainted_arg), kwargs=None, tracer=None, integration_name="mysql", method=cursor.execute ) # Returns True: - assert check_tainted_args( + assert check_tainted_dbapi_args( args=(tainted_arg, untainted_arg), kwargs=None, tracer=None, integration_name="psycopg", method=cursor.execute ) diff --git a/tests/contrib/dbapi/test_dbapi_appsec.py b/tests/contrib/dbapi/test_dbapi_appsec.py index 819f969ede6..3ba165da8cf 100644 --- a/tests/contrib/dbapi/test_dbapi_appsec.py +++ b/tests/contrib/dbapi/test_dbapi_appsec.py @@ -36,7 +36,7 @@ def test_tainted_query(self): traced_cursor.execute(query) cursor.execute.assert_called_once_with(query) - mock_sql_injection_report.assert_called_once_with(evidence_value=query) + mock_sql_injection_report.assert_called_once_with(evidence_value=query, dialect="sqlite") @pytest.mark.skipif(not _is_python_version_supported(), reason="IAST compatible versions") def test_tainted_query_args(self): diff --git a/tests/contrib/django/django_app/appsec_urls.py b/tests/contrib/django/django_app/appsec_urls.py index c076b238a76..ffdbd413936 100644 --- a/tests/contrib/django/django_app/appsec_urls.py +++ b/tests/contrib/django/django_app/appsec_urls.py @@ -99,10 +99,10 @@ def sqli_http_request_header_name(request): def sqli_http_request_header_value(request): value = [x for x in request.META.values() if x == "master"][0] - with connection.cursor() as cursor: + query = add_aspect("SELECT 1 FROM sqlite_", value) # label iast_enabled_sqli_http_request_header_value - cursor.execute(add_aspect("SELECT 1 FROM sqlite_", value)) + cursor.execute(query) return HttpResponse(request.META["HTTP_USER_AGENT"], status=200) diff --git a/tests/contrib/django/test_django_appsec_iast.py b/tests/contrib/django/test_django_appsec_iast.py index 7298e06cd22..f3716c2f799 100644 --- a/tests/contrib/django/test_django_appsec_iast.py +++ b/tests/contrib/django/test_django_appsec_iast.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import json -import mock import pytest from ddtrace.appsec._constants import IAST @@ -27,9 +26,9 @@ def reset_context(): from ddtrace.appsec._iast._taint_tracking import create_context from ddtrace.appsec._iast._taint_tracking import reset_context - yield - reset_context() - _ = create_context() + yield + reset_context() + _ = create_context() def _aux_appsec_get_root_span( @@ -65,7 +64,7 @@ def _aux_appsec_get_root_span( @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_weak_hash(client, test_spans, tracer): - with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_asm_enabled=True, _iast_enabled=True, _deduplication_enabled=False)): oce.reconfigure() patch_iast({"weak_hash": True}) root_span, _ = _aux_appsec_get_root_span(client, test_spans, tracer, url="/appsec/weak-hash/") @@ -78,10 +77,7 @@ def test_django_weak_hash(client, test_spans, tracer): @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), override_env({"DD_IAST_ENABLED": "True"}): - oce.reconfigure() - tracer._iast_enabled = True - + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -98,7 +94,7 @@ def test_django_tainted_user_agent_iast_enabled(client, test_spans, tracer): @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_disabled(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False, _deduplication_enabled=False)): oce.reconfigure() root_span, response = _aux_appsec_get_root_span( @@ -120,9 +116,7 @@ def test_django_tainted_user_agent_iast_disabled(client, test_spans, tracer): @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_request_parameter(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -143,14 +137,20 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_request_parameter(clie line, hash_value = get_line_and_hash("iast_enabled_sqli_http_request_parameter", vuln_type, filename=TEST_FILE) assert loaded["sources"] == [ - {"origin": "http.request.parameter", "name": "q", "value": "SELECT 1 FROM sqlite_master"} + { + "name": "q", + "origin": "http.request.parameter", + "pattern": "abcdefghijklmnopqrstuvwxyzA", + "redacted": True, + } ] + assert loaded["vulnerabilities"][0]["type"] == vuln_type assert loaded["vulnerabilities"][0]["evidence"] == { "valueParts": [ - {"value": "SELECT ", "source": 0}, - {"redacted": True}, - {"value": " FROM sqlite_master", "source": 0}, + {"source": 0, "value": "SELECT "}, + {"pattern": "h", "redacted": True, "source": 0}, + {"source": 0, "value": " FROM sqlite_master"}, ] } assert loaded["vulnerabilities"][0]["location"]["path"] == TEST_FILE @@ -161,9 +161,7 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_request_parameter(clie @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_request_header_value(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -174,37 +172,34 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_request_header_value(c headers={"HTTP_USER_AGENT": "master"}, ) - vuln_type = "SQL_INJECTION" - loaded = json.loads(root_span.get_tag(IAST.JSON)) + assert response.status_code == 200 + assert response.content == b"master" - line, hash_value = get_line_and_hash( - "iast_enabled_sqli_http_request_header_value", vuln_type, filename=TEST_FILE - ) + loaded = json.loads(root_span.get_tag(IAST.JSON)) assert loaded["sources"] == [{"origin": "http.request.header", "name": "HTTP_USER_AGENT", "value": "master"}] - assert loaded["vulnerabilities"][0]["type"] == vuln_type - assert loaded["vulnerabilities"][0]["hash"] == hash_value + assert loaded["vulnerabilities"][0]["type"] == VULN_SQL_INJECTION assert loaded["vulnerabilities"][0]["evidence"] == { "valueParts": [ {"value": "SELECT "}, {"redacted": True}, {"value": " FROM sqlite_"}, - {"value": "master", "source": 0}, + {"source": 0, "value": "master"}, ] } + + line, hash_value = get_line_and_hash( + "iast_enabled_sqli_http_request_header_value", VULN_SQL_INJECTION, filename=TEST_FILE + ) assert loaded["vulnerabilities"][0]["location"]["path"] == TEST_FILE assert loaded["vulnerabilities"][0]["location"]["line"] == line - - assert response.status_code == 200 - assert response.content == b"master" + assert loaded["vulnerabilities"][0]["hash"] == hash_value @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_disabled_sqli_http_request_header_value(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -224,9 +219,7 @@ def test_django_tainted_user_agent_iast_disabled_sqli_http_request_header_value( @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_request_header_name(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -237,17 +230,13 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_request_header_name(cl headers={"master": "test/1.2.3"}, ) - vuln_type = "SQL_INJECTION" + assert response.status_code == 200 + assert response.content == b"test/1.2.3" loaded = json.loads(root_span.get_tag(IAST.JSON)) - line, hash_value = get_line_and_hash( - "iast_enabled_sqli_http_request_header_name", vuln_type, filename=TEST_FILE - ) - assert loaded["sources"] == [{"origin": "http.request.header.name", "name": "master", "value": "master"}] - assert loaded["vulnerabilities"][0]["type"] == vuln_type - assert loaded["vulnerabilities"][0]["hash"] == hash_value + assert loaded["vulnerabilities"][0]["type"] == VULN_SQL_INJECTION assert loaded["vulnerabilities"][0]["evidence"] == { "valueParts": [ {"value": "SELECT "}, @@ -256,19 +245,19 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_request_header_name(cl {"value": "master", "source": 0}, ] } + + line, hash_value = get_line_and_hash( + "iast_enabled_sqli_http_request_header_name", VULN_SQL_INJECTION, filename=TEST_FILE + ) assert loaded["vulnerabilities"][0]["location"]["path"] == TEST_FILE assert loaded["vulnerabilities"][0]["location"]["line"] == line - - assert response.status_code == 200 - assert response.content == b"test/1.2.3" + assert loaded["vulnerabilities"][0]["hash"] == hash_value @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_disabled_sqli_http_request_header_name(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -288,9 +277,7 @@ def test_django_tainted_user_agent_iast_disabled_sqli_http_request_header_name(c @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_iast_enabled_full_sqli_http_path_parameter(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -298,19 +285,15 @@ def test_django_iast_enabled_full_sqli_http_path_parameter(client, test_spans, t url="/appsec/sqli_http_path_parameter/sqlite_master/", headers={"HTTP_USER_AGENT": "test/1.2.3"}, ) - vuln_type = "SQL_INJECTION" + assert response.status_code == 200 + assert response.content == b"test/1.2.3" loaded = json.loads(root_span.get_tag(IAST.JSON)) - line, hash_value = get_line_and_hash( - "iast_enabled_full_sqli_http_path_parameter", vuln_type, filename=TEST_FILE - ) - assert loaded["sources"] == [ {"origin": "http.request.path.parameter", "name": "q_http_path_parameter", "value": "sqlite_master"} ] - assert loaded["vulnerabilities"][0]["type"] == vuln_type - assert loaded["vulnerabilities"][0]["hash"] == hash_value + assert loaded["vulnerabilities"][0]["type"] == VULN_SQL_INJECTION assert loaded["vulnerabilities"][0]["evidence"] == { "valueParts": [ {"value": "SELECT "}, @@ -319,19 +302,18 @@ def test_django_iast_enabled_full_sqli_http_path_parameter(client, test_spans, t {"value": "sqlite_master", "source": 0}, ] } + line, hash_value = get_line_and_hash( + "iast_enabled_full_sqli_http_path_parameter", VULN_SQL_INJECTION, filename=TEST_FILE + ) assert loaded["vulnerabilities"][0]["location"]["path"] == TEST_FILE assert loaded["vulnerabilities"][0]["location"]["line"] == line - - assert response.status_code == 200 - assert response.content == b"test/1.2.3" + assert loaded["vulnerabilities"][0]["hash"] == hash_value @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_iast_disabled_full_sqli_http_path_parameter(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -349,9 +331,7 @@ def test_django_iast_disabled_full_sqli_http_path_parameter(client, test_spans, @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_name(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -359,13 +339,11 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_name(client, t url="/appsec/sqli_http_request_cookie_name/", cookies={"master": "test/1.2.3"}, ) + assert response.status_code == 200 + assert response.content == b"test/1.2.3" loaded = json.loads(root_span.get_tag(IAST.JSON)) - line, hash_value = get_line_and_hash( - "iast_enabled_sqli_http_cookies_name", VULN_SQL_INJECTION, filename=TEST_FILE - ) - vulnerability = False for vuln in loaded["vulnerabilities"]: if vuln["type"] == VULN_SQL_INJECTION: @@ -374,7 +352,6 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_name(client, t assert vulnerability, "No {} reported".format(VULN_SQL_INJECTION) assert loaded["sources"] == [{"origin": "http.request.cookie.name", "name": "master", "value": "master"}] - assert vulnerability["hash"] == hash_value assert vulnerability["evidence"] == { "valueParts": [ {"value": "SELECT "}, @@ -383,19 +360,18 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_name(client, t {"value": "master", "source": 0}, ] } + line, hash_value = get_line_and_hash( + "iast_enabled_sqli_http_cookies_name", VULN_SQL_INJECTION, filename=TEST_FILE + ) assert vulnerability["location"]["path"] == TEST_FILE assert vulnerability["location"]["line"] == line - - assert response.status_code == 200 - assert response.content == b"test/1.2.3" + assert vulnerability["hash"] == hash_value @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_iast_disabled_sqli_http_cookies_name(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -413,9 +389,7 @@ def test_django_tainted_iast_disabled_sqli_http_cookies_name(client, test_spans, @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_value(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=True - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -423,10 +397,10 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_value(client, url="/appsec/sqli_http_request_cookie_value/", cookies={"master": "master"}, ) - vuln_type = "SQL_INJECTION" - loaded = json.loads(root_span.get_tag(IAST.JSON)) + assert response.status_code == 200 + assert response.content == b"master" - line, hash_value = get_line_and_hash("iast_enabled_sqli_http_cookies_value", vuln_type, filename=TEST_FILE) + loaded = json.loads(root_span.get_tag(IAST.JSON)) vulnerability = False for vuln in loaded["vulnerabilities"]: @@ -436,7 +410,7 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_value(client, assert vulnerability, "No {} reported".format(VULN_SQL_INJECTION) assert loaded["sources"] == [{"origin": "http.request.cookie.value", "name": "master", "value": "master"}] assert vulnerability["type"] == "SQL_INJECTION" - assert vulnerability["hash"] == hash_value + assert vulnerability["evidence"] == { "valueParts": [ {"value": "SELECT "}, @@ -445,19 +419,19 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_cookies_value(client, {"value": "master", "source": 0}, ] } + + line, hash_value = get_line_and_hash( + "iast_enabled_sqli_http_cookies_value", VULN_SQL_INJECTION, filename=TEST_FILE + ) assert vulnerability["location"]["line"] == line assert vulnerability["location"]["path"] == TEST_FILE - - assert response.status_code == 200 - assert response.content == b"master" + assert vulnerability["hash"] == hash_value @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_iast_disabled_sqli_http_cookies_value(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -482,9 +456,7 @@ def test_django_tainted_iast_disabled_sqli_http_cookies_value(client, test_spans @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_user_agent_iast_enabled_sqli_http_body(client, test_spans, tracer, payload, content_type): - with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)), override_env( - dict(DD_IAST_ENABLED="True") - ), mock.patch("ddtrace.contrib.dbapi._is_iast_enabled", return_value=True): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -518,9 +490,7 @@ def test_django_tainted_user_agent_iast_enabled_sqli_http_body(client, test_span @pytest.mark.django_db() @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_tainted_iast_disabled_sqli_http_body(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=False)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=False)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -538,9 +508,7 @@ def test_django_tainted_iast_disabled_sqli_http_body(client, test_spans, tracer) @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_querydict_django_with_iast(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True)), mock.patch( - "ddtrace.contrib.dbapi._is_iast_enabled", return_value=False - ), override_env({"DD_IAST_ENABLED": "True"}): + with override_global_config(dict(_iast_enabled=True)): root_span, response = _aux_appsec_get_root_span( client, test_spans, @@ -558,9 +526,7 @@ def test_querydict_django_with_iast(client, test_spans, tracer): @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_command_injection(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)), override_env( - dict(DD_IAST_ENABLED="True") - ): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): oce.reconfigure() patch_iast({"command_injection": True}) root_span, _ = _aux_appsec_get_root_span( @@ -590,9 +556,7 @@ def test_django_command_injection(client, test_spans, tracer): @pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") def test_django_header_injection(client, test_spans, tracer): - with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)), override_env( - dict(DD_IAST_ENABLED="True") - ): + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)): oce.reconfigure() patch_iast({"header_injection": True}) root_span, _ = _aux_appsec_get_root_span( diff --git a/tests/contrib/flask/test_flask_appsec_iast.py b/tests/contrib/flask/test_flask_appsec_iast.py index d3b7f603ab0..702375dcd8c 100644 --- a/tests/contrib/flask/test_flask_appsec_iast.py +++ b/tests/contrib/flask/test_flask_appsec_iast.py @@ -44,7 +44,6 @@ def setUp(self): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, _deduplication_enabled=False, ) ), override_env(IAST_ENV): @@ -77,7 +76,7 @@ def sqli_1(param_str): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, + _deduplication_enabled=False, ) ): resp = self.client.post("/sqli/sqlite_master/", data={"name": "test"}) @@ -128,7 +127,7 @@ def sqli_2(param_str): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, + _deduplication_enabled=False, ) ): resp = self.client.post( @@ -377,7 +376,6 @@ def sqli_7(): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, _deduplication_enabled=False, ) ), override_env(IAST_ENV): @@ -444,7 +442,7 @@ def sqli_8(): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, + _deduplication_enabled=False, ) ): if tuple(map(int, werkzeug_version.split("."))) >= (2, 3): @@ -505,6 +503,7 @@ def sqli_9(): with override_global_config( dict( _iast_enabled=True, + _deduplication_enabled=False, ) ): resp = self.client.get("/sqli/parameter/?table=sqlite_master") @@ -610,7 +609,7 @@ def header_injection(): with override_global_config( dict( _iast_enabled=True, - _asm_enabled=True, + _deduplication_enabled=False, ) ): resp = self.client.post("/header_injection/", data={"name": "test"})