From 79262a670e5579f010b9b394d388fc96c66f8106 Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Sat, 31 Aug 2024 07:40:56 -0700 Subject: [PATCH 1/4] ci: move two test suites to gitlab from circleci (#10466) This change moves the `ddtracerun` and `integration-snapshot` test suites from circleci to gitlab for billing purposes. It was necessary to adjust one of the ddtracerun tests to look for an optional environment variable specifying the location of the redis server, which had previously been `localhost` in both local and CI runs. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Brett Langdon --- .circleci/config.templ.yml | 19 ------------------- .gitlab/tests/core.yml | 15 +++++++++++++++ tests/commands/ddtrace_run_integration.py | 4 ++-- tests/contrib/config.py | 1 + 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/.circleci/config.templ.yml b/.circleci/config.templ.yml index 95edbead77b..356f0d68433 100644 --- a/.circleci/config.templ.yml +++ b/.circleci/config.templ.yml @@ -657,14 +657,6 @@ jobs: name: "Store core dumps" path: /tmp/core_dumps - integration_testagent: - <<: *machine_executor - steps: - - run_test: - snapshot: true - pattern: 'integration-snapshot*' - trace_agent_url: "" - vendor: <<: *contrib_job_small docker: @@ -682,17 +674,6 @@ jobs: snapshot: true docker_services: "localstack" - ddtracerun: - <<: *contrib_job - parallelism: 8 - docker: - - image: *ddtrace_dev_image - - image: *redis_image - steps: - - run_test: - pattern: 'ddtracerun' - trace_agent_url: "" - test_logging: <<: *contrib_job docker: diff --git a/.gitlab/tests/core.yml b/.gitlab/tests/core.yml index 54856fe4a26..51e727f3bc2 100644 --- a/.gitlab/tests/core.yml +++ b/.gitlab/tests/core.yml @@ -8,3 +8,18 @@ telemetry: parallel: 4 variables: SUITE_NAME: "telemetry" + +integration-testagent: + extends: .test_base_riot_snapshot + variables: + SUITE_NAME: "integration-snapshot*" + +ddtracerun: + extends: .test_base_riot + services: + - !reference [.test_base_riot, services] + - name: registry.ddbuild.io/redis:7.0.7 + alias: redis + variables: + SUITE_NAME: "ddtracerun" + TEST_REDIS_HOST: "redis" diff --git a/tests/commands/ddtrace_run_integration.py b/tests/commands/ddtrace_run_integration.py index 8a489afd26e..e52a0c9b8b0 100644 --- a/tests/commands/ddtrace_run_integration.py +++ b/tests/commands/ddtrace_run_integration.py @@ -11,7 +11,7 @@ if __name__ == "__main__": - r = redis.Redis(port=REDIS_CONFIG["port"]) + r = redis.Redis(host=REDIS_CONFIG["host"], port=REDIS_CONFIG["port"]) pin = Pin.get_from(r) assert pin @@ -36,7 +36,7 @@ assert span.error == 0 assert span.get_metric("network.destination.port") == REDIS_CONFIG["port"] assert span.get_metric("out.redis_db") == 0 - assert span.get_tag("out.host") == "localhost" + assert span.get_tag("out.host") == REDIS_CONFIG["host"] assert span.get_tag("redis.raw_command").startswith("mget 0 1 2 3") assert span.get_tag("redis.raw_command").endswith("...") diff --git a/tests/contrib/config.py b/tests/contrib/config.py index 7e75358f1f5..e99d2fba573 100644 --- a/tests/contrib/config.py +++ b/tests/contrib/config.py @@ -53,6 +53,7 @@ } REDIS_CONFIG = { + "host": os.getenv("TEST_REDIS_HOST", "localhost"), "port": int(os.getenv("TEST_REDIS_PORT", 6379)), } From 225dbdea156b4754cf85c8cb160e1554dd34f32e Mon Sep 17 00:00:00 2001 From: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Date: Sat, 31 Aug 2024 08:21:24 -0700 Subject: [PATCH 2/4] ci: move two test suites to gitlab (#10467) This change moves the `integration-agent` and `vendor` test suites from circleci to gitlab for billing purposes, while removing some unused logic from the `integration-agent` job. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Brett Langdon --- .circleci/config.templ.yml | 34 ---------------------------------- .gitlab/tests/core.yml | 10 ++++++++++ 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/.circleci/config.templ.yml b/.circleci/config.templ.yml index 356f0d68433..58b0a9c92ed 100644 --- a/.circleci/config.templ.yml +++ b/.circleci/config.templ.yml @@ -631,40 +631,6 @@ jobs: pattern: 'opentelemetry' snapshot: true - integration_agent: - <<: *machine_executor - parallelism: 2 - steps: - - attach_workspace: - at: . - - checkout - - setup_riot - - start_docker_services: - services: ddagent - - run: - environment: - RIOT_RUN_RECOMPILE_REQS: "<< pipeline.parameters.riot_run_latest >>" - command: | - ulimit -c unlimited - ./scripts/run-test-suite 'integration-latest*' <> 1 - - run: - command: | - mkdir -p /tmp/core_dumps - cp core.* /tmp/core_dumps || true - ./scripts/bt - when: on_fail - - store_artifacts: - name: "Store core dumps" - path: /tmp/core_dumps - - vendor: - <<: *contrib_job_small - docker: - - image: *ddtrace_dev_image - steps: - - run_test: - pattern: 'vendor' - botocore: <<: *machine_executor parallelism: 6 diff --git a/.gitlab/tests/core.yml b/.gitlab/tests/core.yml index 51e727f3bc2..dc5db8185b8 100644 --- a/.gitlab/tests/core.yml +++ b/.gitlab/tests/core.yml @@ -14,6 +14,16 @@ integration-testagent: variables: SUITE_NAME: "integration-snapshot*" +integration-agent: + extends: .test_base_riot + variables: + SUITE_NAME: "integration-latest*" + +vendor: + extends: .test_base_riot + variables: + SUITE_NAME: "vendor" + ddtracerun: extends: .test_base_riot services: From 666f6ecd39a1937f18643bf05ab44c4a10003899 Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Mon, 2 Sep 2024 10:54:07 +0200 Subject: [PATCH 3/4] chore(iast): testing fastapi and sqli (#10474) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .../fastapi/test_fastapi_appsec_iast.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/contrib/fastapi/test_fastapi_appsec_iast.py b/tests/contrib/fastapi/test_fastapi_appsec_iast.py index b732cebd1cb..405c5c1b89a 100644 --- a/tests/contrib/fastapi/test_fastapi_appsec_iast.py +++ b/tests/contrib/fastapi/test_fastapi_appsec_iast.py @@ -9,21 +9,30 @@ from fastapi.responses import JSONResponse import pytest +from ddtrace.appsec._constants import IAST from ddtrace.appsec._handlers import _on_asgi_request_parse_body from ddtrace.appsec._iast import oce from ddtrace.appsec._iast._patch import _on_iast_fastapi_patch +from ddtrace.appsec._iast.constants import VULN_SQL_INJECTION +from ddtrace.appsec._iast.taint_sinks.header_injection import patch as patch_header_injection +from ddtrace.contrib.sqlite3.patch import patch as patch_sqlite_sqli from ddtrace.internal import core +from tests.appsec.iast.iast_utils import get_line_and_hash from tests.utils import override_env from tests.utils import override_global_config IAST_ENV = {"DD_IAST_REQUEST_SAMPLING": "100"} +TEST_FILE_PATH = "tests/contrib/fastapi/test_fastapi_appsec_iast.py" + fastapi_version = tuple([int(v) for v in _fastapi_version.split(".")]) def _aux_appsec_prepare_tracer(tracer): _on_iast_fastapi_patch() + patch_sqlite_sqli() + patch_header_injection() oce.reconfigure() tracer._iast_enabled = True @@ -276,3 +285,56 @@ async def test_route(item_id): assert result["ranges_start"] == 0 assert result["ranges_length"] == 8 assert result["ranges_origin"] == "http.request.path.parameter" + + +def test_fastapi_sqli_path_param(fastapi_application, client, tracer, test_spans): + @fastapi_application.get("/index.html/{param_str}") + async def test_route(param_str): + import sqlite3 + + from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted + from ddtrace.appsec._iast._taint_tracking.aspects import add_aspect + + assert is_pyobject_tainted(param_str) + + con = sqlite3.connect(":memory:") + cur = con.cursor() + # label test_fastapi_sqli_path_parameter + cur.execute(add_aspect("SELECT 1 FROM ", param_str)) + + # test if asgi middleware is ok without any callback registered + core.reset_listeners(event_id="asgi.request.parse.body") + + with override_global_config(dict(_iast_enabled=True, _deduplication_enabled=False)), override_env(IAST_ENV): + # disable callback + _aux_appsec_prepare_tracer(tracer) + resp = client.get( + "/index.html/sqlite_master/", + ) + assert resp.status_code == 200 + + span = test_spans.pop_traces()[1][0] + assert span.get_metric(IAST.ENABLED) == 1.0 + + loaded = json.loads(span.get_tag(IAST.JSON)) + + assert loaded["sources"] == [ + {"origin": "http.request.path.parameter", "name": "param_str", "value": "sqlite_master"} + ] + + line, hash_value = get_line_and_hash( + "test_fastapi_sqli_path_parameter", VULN_SQL_INJECTION, filename=TEST_FILE_PATH + ) + vulnerability = loaded["vulnerabilities"][0] + assert vulnerability["type"] == VULN_SQL_INJECTION + assert vulnerability["evidence"] == { + "valueParts": [ + {"value": "SELECT "}, + {"redacted": True}, + {"value": " FROM "}, + {"value": "sqlite_master", "source": 0}, + ] + } + assert vulnerability["location"]["line"] == line + assert vulnerability["location"]["path"] == TEST_FILE_PATH + assert vulnerability["hash"] == hash_value From e36b8eafbe7d6817e89eb278db22066487818b68 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Martinez Date: Mon, 2 Sep 2024 15:46:03 +0200 Subject: [PATCH 4/4] chore(iast): more native utils and split aspects moved to fully native (#10456) ## Description - Add more native utility functions like `process_flag_added_args` that will make migrating the remaining aspects a lot easier. - Migrate `split`, `rsplit` and `splitlines`. Average 62% performance increase. ## Checklist - [X] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez --- .../_taint_tracking/Aspects/AspectSplit.cpp | 225 +++++++++++------- .../_taint_tracking/Aspects/AspectSplit.h | 16 +- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 3 +- .../_iast/_taint_tracking/Aspects/Helpers.h | 33 ++- .../TaintTracking/TaintRange.cpp | 3 +- .../TaintTracking/TaintRange.h | 22 ++ .../appsec/_iast/_taint_tracking/aspects.py | 65 +---- .../appsec/iast/aspects/test_split_aspect.py | 31 ++- 8 files changed, 223 insertions(+), 175 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp index 5036ead1407..ec2d324f741 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp @@ -1,114 +1,165 @@ #include "AspectSplit.h" #include "Initializer/Initializer.h" +#include "TaintedOps/TaintedOps.h" -template -py::list -api_split_text(const StrType& text, const optional& separator, const optional maxsplit) +static std::optional +handle_potential_re_split(const py::tuple& args, + const py::tuple& sliced_args, + const py::kwargs& kwargs, + const TaintRangeMapTypePtr& tx_map) { - const auto split = text.attr("split"); - const auto split_result = split(separator, maxsplit); + const py::module re = py::module::import("re"); + const py::object re_pattern_type = re.attr("Pattern"); + const py::object types_module = py::module::import("types"); - const auto tx_map = Initializer::get_tainting_map(); - if (not tx_map or tx_map->empty()) { - return split_result; - } + // re.split aspect, either with pattern as first arg or with re module + if (const py::object module_type = types_module.attr("ModuleType"); + isinstance(args[0], re_pattern_type) || + (isinstance(args[0], module_type) && std::string(py::str(args[0].attr("__name__"))) == "re" && + (std::string(py::str(args[0].attr("__package__"))).empty() || + std::string(py::str(args[0].attr("__package__"))) == "re"))) { - if (auto ranges = api_get_ranges(text); not ranges.empty()) { - set_ranges_on_splitted(text, ranges, split_result, tx_map, false); + const py::object split_func = args[0].attr("split"); + // Create a py::slice object to slice the args from index 1 to the end + py::list result = split_func(*sliced_args, **kwargs); + + if (const int offset = isinstance(args[0], re_pattern_type) ? -1 : 0; + args.size() >= (static_cast(3) + offset) && is_tainted(args[2 + offset].ptr(), tx_map)) { + for (auto& i : result) { + if (!i.is_none() and len(i) > 0) { + copy_and_shift_ranges_from_strings(args[2 + offset], i, 0, len(i), tx_map); + } + } + } + return result; } - return split_result; + return std::nullopt; } -template -py::list -api_rsplit_text(const StrType& text, const optional& separator, const optional maxsplit) +static py::object +split_text_common(const py::object& orig_function, + const int flag_added_args, + const py::args& args, + const py::kwargs& kwargs, + const std::string& split_func) { - const auto rsplit = text.attr("rsplit"); - const auto split_result = rsplit(separator, maxsplit); - const auto tx_map = Initializer::get_tainting_map(); - if (not tx_map or tx_map->empty()) { - return split_result; + PyObject* result_or_args = process_flag_added_args(orig_function.ptr(), flag_added_args, args.ptr(), kwargs.ptr()); + py::tuple args_tuple; + if (PyTuple_Check(result_or_args)) { + args_tuple = py::reinterpret_borrow(result_or_args); + } else { + return py::reinterpret_borrow(result_or_args); } - if (auto ranges = api_get_ranges(text); not ranges.empty()) { - set_ranges_on_splitted(text, ranges, split_result, tx_map, false); + const auto& text = args_tuple[0]; + + const py::tuple sliced_args = len(args) > 1 ? args[py::slice(1, len(args), 1)] : py::tuple(); // (,) + auto result_o = text.attr(split_func.c_str())(*sliced_args, **kwargs); // returns['', ''] WHY? + + const auto tx_map = Initializer::get_tainting_map(); + if (!tx_map || tx_map->empty()) { + return result_o; } - return split_result; + + TRY_CATCH_ASPECT("split_aspect", , { + if (split_func == "split") { + if (auto re_split_result = handle_potential_re_split(args_tuple, sliced_args, kwargs, tx_map); + re_split_result.has_value()) { + return *re_split_result; + } + } + + auto [ranges, ranges_error] = get_ranges(text.ptr(), tx_map); + if (!ranges_error and !ranges.empty()) { + set_ranges_on_splitted(text, ranges, result_o, tx_map, false); + } + }); + return result_o; } -template -py::list -api_splitlines_text(const StrType& text, bool keepends) +py::object +api_splitlines_text(const py::object& orig_function, + const int flag_added_args, + const py::args& args, + const py::kwargs& kwargs) { - const auto splitlines = text.attr("splitlines"); - const auto split_result = splitlines(keepends); - const auto tx_map = Initializer::get_tainting_map(); - if (not tx_map or tx_map->empty()) { - return split_result; + const auto result_or_args = py::reinterpret_borrow( + process_flag_added_args(orig_function.ptr(), flag_added_args, args.ptr(), kwargs.ptr())); + + py::tuple args_tuple; + if (py::isinstance(result_or_args)) { + args_tuple = result_or_args.cast(); + } else { + return result_or_args.cast(); } - if (auto ranges = api_get_ranges(text); not ranges.empty()) { - set_ranges_on_splitted(text, ranges, split_result, tx_map, keepends); + const auto& text = args_tuple[0]; + const py::tuple sliced_args = len(args) > 1 ? args[py::slice(1, len(args), 1)] : py::tuple(); + py::object result_o = text.attr("splitlines")(*sliced_args, **kwargs); + + const auto tx_map = Initializer::get_tainting_map(); + if (!tx_map || tx_map->empty()) { + return result_o; } - return split_result; + + TRY_CATCH_ASPECT("split_aspect", , { + auto [ranges, ranges_error] = get_ranges(text.ptr(), tx_map); + if (ranges_error || ranges.empty()) { + return result_o; + } + + // Retrieve keepends and check that is a boolean. If not, return the original value + // because it could be a method of a different type. + bool keepends_is_other_type = false; + bool keepends = false; + if (kwargs.contains("keepends")) { + if (py::isinstance(kwargs["keepends"])) { + keepends = kwargs["keepends"].cast(); + } else { + keepends_is_other_type = true; + } + } else { + if (args.size() > 1) { + if (py::isinstance(args[1])) { + keepends = args[1].cast(); + } else { + keepends_is_other_type = true; + } + } + } + + if (!keepends_is_other_type) { + set_ranges_on_splitted(text, ranges, result_o, tx_map, keepends); + } + }); + return result_o; } void pyexport_aspect_split(py::module& m) { - m.def("_aspect_split", - &api_split_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - m.def("_aspect_split", - &api_split_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - m.def("_aspect_split", - &api_split_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - m.def("_aspect_rsplit", - &api_rsplit_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - m.def("_aspect_rsplit", - &api_rsplit_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - m.def("_aspect_rsplit", - &api_rsplit_text, - "text"_a, - "separator"_a = py::none(), - "maxsplit"_a = -1, - py::return_value_policy::move); - // cppcheck-suppress assignBoolToPointer - m.def("_aspect_splitlines", - &api_splitlines_text, - "text"_a, - "keepends"_a = false, - py::return_value_policy::move); - // cppcheck-suppress assignBoolToPointer - m.def("_aspect_splitlines", - &api_splitlines_text, - "text"_a, - "keepends"_a = false, - py::return_value_policy::move); - // cppcheck-suppress assignBoolToPointer + m.def( + "_aspect_split", + [](const py::object& orig_function, const int flag_added_args, const py::args& args, const py::kwargs& kwargs) { + return split_text_common(orig_function, flag_added_args, args, kwargs, "split"); + }, + "orig_function"_a = py::none(), + "flag_added_args"_a = 0, + py::return_value_policy::move); + + m.def( + "_aspect_rsplit", + [](const py::object& orig_function, const int flag_added_args, const py::args& args, const py::kwargs& kwargs) { + return split_text_common(orig_function, flag_added_args, args, kwargs, "rsplit"); + }, + "orig_function"_a = py::none(), + "flag_added_args"_a = 0, + py::return_value_policy::move); + m.def("_aspect_splitlines", - &api_splitlines_text, - "text"_a, - "keepends"_a = false, + &api_splitlines_text, + "orig_function"_a = py::none(), + "flag_added_args"_a = 0, py::return_value_policy::move); -} \ No newline at end of file +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h index cdaf88a69b8..04be7ca5445 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h @@ -2,17 +2,11 @@ #include "Helpers.h" -template -py::list -api_split_text(const StrType& text, const optional& separator, optional maxsplit); - -template -py::list -api_rsplit_text(const StrType& text, const optional& separator, optional maxsplit); - -template -py::list -api_splitlines_text(const StrType& text, bool keepends); +py::object +api_splitlines_text(const py::object& orig_function, + int flag_added_args, + const py::args& args, + const py::kwargs& kwargs); void pyexport_aspect_split(py::module& m); \ No newline at end of file diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 430ff98f3cc..cc06a53bfd3 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -228,9 +228,8 @@ convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, TaintRange * @param tx_map: The taint map to apply the ranges. * @param include_separator: If the separator should be included in the splitted parts. */ -template bool -set_ranges_on_splitted(const StrType& source_str, +set_ranges_on_splitted(const py::object& source_str, const TaintRangeRefs& source_ranges, const py::list& split_result, const TaintRangeMapTypePtr& tx_map, diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index a4c15f9ebe2..513d3ad969b 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -51,9 +51,8 @@ template std::tuple convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, TaintRangeRefs ranges_orig); -template bool -set_ranges_on_splitted(const StrType& source_str, +set_ranges_on_splitted(const py::object& source_str, const TaintRangeRefs& source_ranges, const py::list& split_result, const TaintRangeMapTypePtr& tx_map, @@ -176,6 +175,36 @@ as_formatted_evidence(StrType& text, return StrType(EVIDENCE_MARKS::BLANK).attr("join")(res_vector); } +inline PyObject* +process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyObject* args, PyObject* kwargs) +{ + // If orig_function is not None and not the built-in str, bytes, or bytearray, slice args + auto orig_function_type = Py_TYPE(orig_function); + + if (orig_function != Py_None && orig_function_type != &PyUnicode_Type && orig_function_type != &PyByteArray_Type && + orig_function_type != &PyBytes_Type) { + + if (flag_added_args > 0) { + Py_ssize_t num_args = PyTuple_Size(args); + PyObject* sliced_args = PyTuple_New(num_args - flag_added_args); + for (Py_ssize_t i = 0; i < num_args - flag_added_args; ++i) { + PyTuple_SET_ITEM(sliced_args, i, PyTuple_GetItem(args, i + flag_added_args)); + Py_INCREF(PyTuple_GetItem(args, i + flag_added_args)); + } + // Call the original function with the sliced args and return its result + PyObject* result = PyObject_Call(orig_function, sliced_args, kwargs); + Py_DECREF(sliced_args); + return result; + } + // Else: call the original function with all args if no slicing is needed + return PyObject_Call(orig_function, args, kwargs); + } + + // If orig_function is None or one of the built-in types, just return args for further processing + Py_INCREF(args); // Increment reference count before returning + return args; +} + void pyexport_aspect_helpers(py::module& m); diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp index ae6e84b4b5c..bf1b4e0f44c 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp @@ -150,8 +150,9 @@ std::pair get_ranges(PyObject* string_input, const TaintRangeMapTypePtr& tx_map) { TaintRangeRefs result; - if (not is_tainteable(string_input)) + if (not is_tainteable(string_input)) { return std::make_pair(result, true); + } if (tx_map->empty()) { return std::make_pair(result, false); diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h index 8ba003e6c9b..6d6e1a9a279 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include @@ -145,5 +146,26 @@ get_internal_hash(PyObject* obj); void set_tainted_object(PyObject* str, TaintedObjectPtr tainted_object, const TaintRangeMapTypePtr& tx_map); +inline void +copy_and_shift_ranges_from_strings(const py::handle& str_1, + const py::handle& str_2, + const int offset, + const int new_length, + const TaintRangeMapTypePtr& tx_map) +{ + if (!tx_map) + return; + + auto [ranges, ranges_error] = get_ranges(str_1.ptr(), tx_map); + if (ranges_error) { + py::set_error(PyExc_TypeError, MSG_ERROR_TAINT_MAP); + return; + } + if (const bool result = set_ranges(str_2.ptr(), shift_taint_ranges(ranges, offset, new_length), tx_map); + not result) { + py::set_error(PyExc_TypeError, MSG_ERROR_SET_RANGES); + } +} + void pyexport_taintrange(py::module& m); diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index 2b5583f1229..f242ad29246 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -59,6 +59,9 @@ index_aspect = aspects.index_aspect _join_aspect = aspects.join_aspect slice_aspect = aspects.slice_aspect +split_aspect = _aspect_split +rsplit_aspect = _aspect_rsplit +splitlines_aspect = _aspect_splitlines ospathjoin_aspect = _aspect_ospathjoin ospathbasename_aspect = _aspect_ospathbasename ospathdirname_aspect = _aspect_ospathdirname @@ -79,9 +82,12 @@ "re_sub_aspect", "ospathjoin_aspect", "_aspect_split", + "split_aspect", "_aspect_rsplit", + "rsplit_aspect", "modulo_aspect", "_aspect_splitlines", + "splitlines_aspect", "ospathbasename_aspect", "ospathdirname_aspect", "ospathnormcase_aspect", @@ -92,65 +98,6 @@ ] -# TODO: Factorize the "flags_added_args" copypasta into a decorator - - -def split_aspect( - orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any -) -> Union[List[TEXT_TYPES], TEXT_TYPES]: - if orig_function is not None: - if orig_function != builtin_str: - if flag_added_args > 0: - args = args[flag_added_args:] - return orig_function(*args, **kwargs) - try: - # re.split aspect, either with pattern as first arg or with re module - if isinstance(args[0], Pattern) or ( - isinstance(args[0], ModuleType) and args[0].__name__ == "re" and args[0].__package__ in ("", "re") - ): - result = args[0].split(*args[1:], **kwargs) - offset = 0 - if isinstance(args[0], Pattern): - offset = -1 - - if len(args) >= (3 + offset) and is_pyobject_tainted(args[2 + offset]): - for i in result: - if len(i): - copy_and_shift_ranges_from_strings(args[2 + offset], i, 0, len(i)) - return result - - return _aspect_split(*args, **kwargs) - except Exception as e: - iast_taint_log_error("IAST propagation error. split_aspect. {}".format(e)) - return args[0].split(*args[1:], **kwargs) - - -def rsplit_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str: - if orig_function is not None: - if orig_function != builtin_str: - if flag_added_args > 0: - args = args[flag_added_args:] - return orig_function(*args, **kwargs) - try: - return _aspect_rsplit(*args, **kwargs) - except Exception as e: - iast_taint_log_error("IAST propagation error. rsplit_aspect. {}".format(e)) - return args[0].rsplit(*args[1:], **kwargs) - - -def splitlines_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str: - if orig_function: - if orig_function != builtin_str: - if flag_added_args > 0: - args = args[flag_added_args:] - return orig_function(*args, **kwargs) - try: - return _aspect_splitlines(*args, **kwargs) - except Exception as e: - iast_taint_log_error("IAST propagation error. splitlines_aspect. {}".format(e)) - return args[0].splitlines(*args[1:], **kwargs) - - def str_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str: if orig_function is not None: if orig_function != builtin_str: diff --git a/tests/appsec/iast/aspects/test_split_aspect.py b/tests/appsec/iast/aspects/test_split_aspect.py index 923df06eed4..4506aec87f6 100644 --- a/tests/appsec/iast/aspects/test_split_aspect.py +++ b/tests/appsec/iast/aspects/test_split_aspect.py @@ -19,6 +19,11 @@ from tests.utils import override_env +def wrap_somesplit(func, *args, **kwargs): + # Remove the orig_function and flag_added_args arguments + return func(None, 0, *args, **kwargs) + + # These tests are simple ones testing the calls and replacements since most of the # actual testing is in test_aspect_helpers' test for set_ranges_on_splitted which these # functions call internally. @@ -29,7 +34,7 @@ def test_aspect_split_simple(): set_ranges(s, (range1, range2)) ranges = get_ranges(s) assert ranges - res = _aspect_split(s) + res = wrap_somesplit(_aspect_split, s) assert res == ["abc", "def"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] @@ -42,7 +47,7 @@ def test_aspect_rsplit_simple(): set_ranges(s, (range1, range2)) ranges = get_ranges(s) assert ranges - res = _aspect_rsplit(s) + res = wrap_somesplit(_aspect_rsplit, s) assert res == ["abc", "def"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] @@ -55,7 +60,7 @@ def test_aspect_split_with_separator(): set_ranges(s, (range1, range2)) ranges = get_ranges(s) assert ranges - res = _aspect_split(s, ":") + res = wrap_somesplit(_aspect_split, s, ":") assert res == ["abc", "def"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))] @@ -68,7 +73,7 @@ def test_aspect_rsplit_with_separator(): set_ranges(s, (range1, range2)) ranges = get_ranges(s) assert ranges - res = _aspect_rsplit(s, ":") + res = wrap_somesplit(_aspect_rsplit, s, ":") assert res == ["abc", "def"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))] @@ -82,7 +87,7 @@ def test_aspect_split_with_maxsplit(): set_ranges(s, (range1, range2, range3)) ranges = get_ranges(s) assert ranges - res = _aspect_split(s, maxsplit=1) + res = wrap_somesplit(_aspect_split, s, maxsplit=1) assert res == ["abc", "def ghi"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [ @@ -90,13 +95,13 @@ def test_aspect_split_with_maxsplit(): TaintRange(3, 4, Source(" ghi", "sample_value", OriginType.PARAMETER)), ] - res = _aspect_split(s, maxsplit=2) + res = wrap_somesplit(_aspect_split, s, maxsplit=2) assert res == ["abc", "def", "ghi"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] - res = _aspect_split(s, maxsplit=0) + res = wrap_somesplit(_aspect_split, s, maxsplit=0) assert res == ["abc def ghi"] assert get_ranges(res[0]) == [range1, range2, range3] @@ -109,20 +114,20 @@ def test_aspect_rsplit_with_maxsplit(): set_ranges(s, (range1, range2, range3)) ranges = get_ranges(s) assert ranges - res = _aspect_rsplit(s, maxsplit=1) + res = wrap_somesplit(_aspect_rsplit, s, maxsplit=1) assert res == ["abc def", "ghi"] assert get_ranges(res[0]) == [ range1, TaintRange(3, 4, Source(" def", "sample_value", OriginType.PARAMETER)), ] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] - res = _aspect_rsplit(s, maxsplit=2) + res = wrap_somesplit(_aspect_rsplit, s, maxsplit=2) assert res == ["abc", "def", "ghi"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] - res = _aspect_rsplit(s, maxsplit=0) + res = wrap_somesplit(_aspect_rsplit, s, maxsplit=0) assert res == ["abc def ghi"] assert get_ranges(res[0]) == [range1, range2, range3] @@ -134,7 +139,7 @@ def test_aspect_splitlines_simple(): set_ranges(s, (range1, range2)) ranges = get_ranges(s) assert ranges - res = _aspect_splitlines(s) + res = wrap_somesplit(_aspect_splitlines, s) assert res == ["abc", "def"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] @@ -148,7 +153,7 @@ def test_aspect_splitlines_keepend_true(): set_ranges(s, (range1, range2, range3)) ranges = get_ranges(s) assert ranges - res = _aspect_splitlines(s, True) + res = wrap_somesplit(_aspect_splitlines, s, keepends=True) assert res == ["abc\n", "def\n", "hij\n"] assert get_ranges(res[0]) == [range1] assert get_ranges(res[1]) == [TaintRange(0, 4, Source("def\n", "sample_value", OriginType.PARAMETER))] @@ -171,7 +176,7 @@ def test_propagate_ranges_with_no_context(caplog): reset_context() with override_env({IAST.ENV_DEBUG: "true"}), caplog.at_level(logging.DEBUG): - result = _aspect_split(string_input, "|") + result = wrap_somesplit(_aspect_split, string_input, "|") assert result == ["abc", "def"] log_messages = [record.getMessage() for record in caplog.get_records("call")] assert not any("[IAST] " in message for message in log_messages), log_messages