Skip to content

Commit

Permalink
Merge branch 'main' into taegyunkim/findpython3
Browse files Browse the repository at this point in the history
  • Loading branch information
taegyunkim authored Oct 7, 2024
2 parents 6c9aab2 + 6ae8477 commit 7cffa3d
Show file tree
Hide file tree
Showing 17 changed files with 168 additions and 91 deletions.
13 changes: 12 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,21 @@ ddtrace/_version.py
artifacts/

# IAST cpp
ddtrace/appsec/_iast/_taint_tracking/cmake_install.cmake
ddtrace/appsec/_iast/_taint_tracking/CMakeCache.txt
ddtrace/appsec/_iast/_taint_tracking/Makefile
ddtrace/appsec/_iast/_taint_tracking/cmake-build-debug/*
ddtrace/appsec/_iast/_taint_tracking/_deps/*
ddtrace/appsec/_iast/_taint_tracking/CMakeFiles/*

ddtrace/appsec/_iast/_taint_tracking/tests/CMakeFiles/*
ddtrace/appsec/_iast/_taint_tracking/tests/cmake_install.cmake
ddtrace/appsec/_iast/_taint_tracking/tests/CMakeCache.txt
ddtrace/appsec/_iast/_taint_tracking/tests/Makefile
ddtrace/appsec/_iast/_taint_tracking/tests/CTestTestfile.cmake
ddtrace/appsec/_iast/_taint_tracking/tests/native_tests*
ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/Makefile
ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/CMakeFiles/*
ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/cmake_install.cmake
# CircleCI generated config
.circleci/config.gen.yml

Expand Down
13 changes: 7 additions & 6 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ aspect_join_str(PyObject* sep,

PyObject* new_result{ new_pyobject_id(result) };
set_tainted_object(new_result, result_to, tx_taint_map);
Py_DecRef(result);
Py_DECREF(result);
return new_result;
}

Expand Down Expand Up @@ -134,7 +134,7 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, const

PyObject* new_result{ new_pyobject_id(result) };
set_tainted_object(new_result, result_to, tx_taint_map);
Py_DecRef(result);
Py_DECREF(result);
return new_result;
}

Expand All @@ -158,9 +158,10 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)
PyObject* list_aux = PyList_New(0);
while ((item = PyIter_Next(iterator))) {
PyList_Append(list_aux, item);
Py_DECREF(item);
}
arg0 = list_aux;
Py_DecRef(iterator);
Py_DECREF(iterator);
decref_arg0 = true;
}
}
Expand All @@ -181,7 +182,7 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)

if (has_pyerr()) {
if (decref_arg0) {
Py_DecRef(arg0);
Py_DECREF(arg0);
}
return nullptr;
}
Expand All @@ -190,13 +191,13 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)
if (not ctx_map or ctx_map->empty() or get_pyobject_size(result) == 0) {
// Empty result cannot have taint ranges
if (decref_arg0) {
Py_DecRef(arg0);
Py_DECREF(arg0);
}
return result;
}
auto res = aspect_join(sep, result, arg0, ctx_map);
if (decref_arg0) {
Py_DecRef(arg0);
Py_DECREF(arg0);
}
return res;
});
Expand Down
36 changes: 23 additions & 13 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,16 @@ add_aspect(PyObject* result_o,
const size_t len_candidate_text{ get_pyobject_size(candidate_text) };
const size_t len_text_to_add{ get_pyobject_size(text_to_add) };

if (len_text_to_add == 0 and len_candidate_text > 0) {
return candidate_text;
}
if (len_text_to_add > 0 and len_candidate_text == 0 and text_to_add == result_o) {
return text_to_add;
// Appended from or with nothing, ranges didn't change
if ((len_text_to_add == 0 and len_candidate_text > 0) ||
(len_text_to_add > 0 and len_candidate_text == 0 and text_to_add == result_o)) {
return result_o;
}

const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map);
if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) {
const auto& res_new_id = new_pyobject_id(result_o);
Py_DecRef(result_o);
Py_DECREF(result_o);
// If left side is already at the maximum taint ranges, we just reuse its
// ranges, we don't need to look at left side.
set_tainted_object(res_new_id, to_candidate_text, tx_taint_map);
Expand All @@ -44,12 +43,20 @@ add_aspect(PyObject* result_o,
}
if (!to_text_to_add) {
const auto& res_new_id = new_pyobject_id(result_o);
Py_DecRef(result_o);
Py_DECREF(result_o);
set_tainted_object(res_new_id, to_candidate_text, tx_taint_map);
return res_new_id;
}

auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text);
if (!to_candidate_text) {
const auto tainted = initializer->allocate_tainted_object();
tainted->add_ranges_shifted(to_text_to_add, static_cast<RANGE_START>(len_candidate_text));
set_tainted_object(result_o, tainted, tx_taint_map);
}

// At this point we have both to_candidate_text and to_text_to_add to we add the
// ranges from both to result_o
const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text);
tainted->add_ranges_shifted(to_text_to_add, static_cast<RANGE_START>(len_candidate_text));
set_tainted_object(result_o, tainted, tx_taint_map);

Expand Down Expand Up @@ -84,6 +91,9 @@ api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)

// PyNumber_Add actually works for any type!
result_o = PyNumber_Add(candidate_text, text_to_add);
if (result_o == nullptr) {
return nullptr;
}

TRY_CATCH_ASPECT("add_aspect", return result_o, , {
const auto tx_map = Initializer::get_tainting_map();
Expand Down Expand Up @@ -116,16 +126,17 @@ api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
PyObject*
api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
{
PyObject* result_o = nullptr;

if (nargs != 2) {
py::set_error(PyExc_ValueError, MSG_ERROR_N_PARAMS);
return nullptr;
}
PyObject* candidate_text = args[0];
PyObject* text_to_add = args[1];

result_o = PyNumber_InPlaceAdd(candidate_text, text_to_add);
PyObject* result_o = PyNumber_InPlaceAdd(candidate_text, text_to_add);
if (result_o == nullptr) {
return nullptr;
}

TRY_CATCH_ASPECT("add_inplace_aspect", return result_o, , {
const auto tx_map = Initializer::get_tainting_map();
Expand All @@ -152,7 +163,6 @@ api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
is_notinterned_notfasttainted_unicode(text_to_add)) {
return result_o;
}
candidate_text = add_aspect(result_o, candidate_text, text_to_add, tx_map);
return candidate_text;
return add_aspect(result_o, candidate_text, text_to_add, tx_map);
});
}
27 changes: 3 additions & 24 deletions ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Initializer::Initializer()
{
// Fill the taintedobjects stack
for (int i = 0; i < TAINTEDOBJECTS_STACK_SIZE; i++) {
available_taintedobjects_stack.push(new TaintedObject());
available_taintedobjects_stack.push(make_shared<TaintedObject>());
}

// Fill the ranges stack
Expand All @@ -41,9 +41,6 @@ Initializer::clear_tainting_map(const TaintRangeMapTypePtr& tx_map)
return;
}
std::lock_guard<std::mutex> lock(active_map_addreses_mutex);
for (const auto& [fst, snd] : *tx_map) {
snd.second->decref();
}
tx_map->clear();
active_map_addreses.erase(tx_map.get());
}
Expand Down Expand Up @@ -114,12 +111,12 @@ TaintedObjectPtr
Initializer::allocate_tainted_object()
{
if (!available_taintedobjects_stack.empty()) {
const auto& toptr = available_taintedobjects_stack.top();
const auto toptr = available_taintedobjects_stack.top();
available_taintedobjects_stack.pop();
return toptr;
}
// Stack is empty, create new object
return new TaintedObject();
return make_shared<TaintedObject>();
}

TaintedObjectPtr
Expand Down Expand Up @@ -147,24 +144,6 @@ Initializer::allocate_tainted_object_copy(const TaintedObjectPtr& from)
return allocate_ranges_into_taint_object_copy(from->ranges_);
}

void
Initializer::release_tainted_object(TaintedObjectPtr tobj)
{
if (!tobj) {
return;
}

tobj->reset();
if (available_taintedobjects_stack.size() < TAINTEDOBJECTS_STACK_SIZE) {
available_taintedobjects_stack.push(tobj);
return;
}

// Stack full, just delete the object (but to a reset before so ranges are
// reused or freed)
delete tobj;
}

TaintRangePtr
Initializer::allocate_taint_range(const RANGE_START start, const RANGE_LENGTH length, const Source& origin)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ class Initializer
*/
TaintedObjectPtr allocate_tainted_object_copy(const TaintedObjectPtr& from);

void release_tainted_object(TaintedObjectPtr tobj);

// FIXME: these should be static functions of TaintRange
// IMPORTANT: if the returned object is not assigned to the map, you have
// responsibility of calling release_taint_range on it or you'll have a leak.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,8 @@ set_ranges(PyObject* str, const TaintRangeRefs& ranges, const TaintRangeMapTypeP
auto new_tainted_object = initializer->allocate_ranges_into_taint_object(ranges);

set_fast_tainted_if_notinterned_unicode(str);
new_tainted_object->incref();
if (it != tx_map->end()) {
it->second.second->decref();
it->second.second.reset();
it->second = std::make_pair(get_internal_hash(str), new_tainted_object);
return true;
}
Expand Down Expand Up @@ -312,7 +311,6 @@ get_tainted_object(PyObject* str, const TaintRangeMapTypePtr& tx_map)
}

if (get_internal_hash(str) != it->second.first) {
it->second.second->decref();
tx_map->erase(it);
return nullptr;
}
Expand Down Expand Up @@ -368,15 +366,13 @@ set_tainted_object(PyObject* str, TaintedObjectPtr tainted_object, const TaintRa
// If the tainted object is different, we need to decref the previous one
// and incref the new one. But if it's the same object, we can avoid both
// operations, since they would be redundant.
it->second.second->decref();
tainted_object->incref();
it->second.second.reset();
it->second = std::make_pair(get_internal_hash(str), tainted_object);
}
// Update the hash, because for bytearrays it could have changed after the extend operation
it->second.first = get_internal_hash(str);
return;
}
tainted_object->incref();
tx_map->insert({ obj_id, std::make_pair(get_internal_hash(str), tainted_object) });
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace py = pybind11;
class TaintedObject;

// Alias
using TaintedObjectPtr = TaintedObject*;
using TaintedObjectPtr = shared_ptr<TaintedObject>;

#ifdef NDEBUG // Decide wether to use abseil

Expand Down Expand Up @@ -134,7 +134,7 @@ api_is_unicode_and_not_fast_tainted(const py::handle& str)
return is_notinterned_notfasttainted_unicode(str.ptr());
}

TaintedObject*
TaintedObjectPtr
get_tainted_object(PyObject* str, const TaintRangeMapTypePtr& tx_taint_map);

Py_hash_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,34 +140,11 @@ void
TaintedObject::reset()
{
move_ranges_to_stack();
rc_ = 0;
if (initializer) {
ranges_.reserve(RANGES_INITIAL_RESERVE);
}
}

void
TaintedObject::incref()
{
rc_++;
}

void
TaintedObject::decref()
{
if (--rc_ <= 0) {
release();
}
}

void
TaintedObject::release()
{
// If rc_ is negative, there is a bug.
// assert(rc_ == 0);
initializer->release_tainted_object(this);
}

void
pyexport_taintedobject(const py::module& m)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class TaintedObject

private:
TaintRangeRefs ranges_;
size_t rc_{};

public:
constexpr static int TAINT_RANGE_LIMIT = 100;
Expand Down Expand Up @@ -36,7 +35,7 @@ class TaintedObject

[[nodiscard]] TaintRangeRefs get_ranges_copy() const { return ranges_; }

void add_ranges_shifted(TaintedObject* tainted_object,
void add_ranges_shifted(TaintedObjectPtr tainted_object,
RANGE_START offset,
RANGE_LENGTH max_length = -1,
RANGE_START orig_offset = -1);
Expand All @@ -53,12 +52,6 @@ class TaintedObject
void move_ranges_to_stack();

void reset();

void incref();

void decref();

void release();
};

void
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ new_pyobject_id(PyObject* tainted_object)
const auto res = PyObject_CallFunctionObjArgs(bytes_join_ptr.ptr(), val, NULL);
Py_XDECREF(val);
Py_XDECREF(empty_bytes);
if (res == nullptr) {
return tainted_object;
}
return res;
}

Expand All @@ -138,6 +141,9 @@ new_pyobject_id(PyObject* tainted_object)
Py_XDECREF(val);
Py_XDECREF(empty_bytes);
Py_XDECREF(empty_bytearray);
if (res == nullptr) {
return tainted_object;
}
return res;
}
return tainted_object;
Expand Down
14 changes: 12 additions & 2 deletions ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,22 @@ template<typename... Args>
bool
args_are_text_and_same_type(PyObject* first, PyObject* second, Args... args)
{
if (first == nullptr || second == nullptr) {
return false;
}

const auto type_first = PyObject_Type(first);
const auto type_second = PyObject_Type(second);

// Check if both first and second are valid text types and of the same type
if (first == nullptr || second == nullptr || !is_text(first) || !is_text(second) ||
PyObject_Type(first) != PyObject_Type(second)) {
if (!is_text(first) || !is_text(second) || type_first != type_second) {
Py_XDECREF(type_first);
Py_XDECREF(type_second);
return false;
}

Py_XDECREF(type_first);
Py_XDECREF(type_second);
// Recursively check the rest of the arguments
return args_are_text_and_same_type(second, args...);
}
Expand Down
Loading

0 comments on commit 7cffa3d

Please sign in to comment.