From 0f37a569f6adfdf66e82c2f1e32751b1b0ff8c60 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Wed, 25 Sep 2024 10:35:22 +0800 Subject: [PATCH 1/3] helpers: add scope_guard problem: we regularly need to manage resources in the face of exceptions and don't have a scope_guard implementation in either our required boost version or standard library solution: add an open source implementation that's simple and easy to maintain --- src/common/libintern/scope_guard.hpp | 176 +++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 src/common/libintern/scope_guard.hpp diff --git a/src/common/libintern/scope_guard.hpp b/src/common/libintern/scope_guard.hpp new file mode 100644 index 000000000..765356858 --- /dev/null +++ b/src/common/libintern/scope_guard.hpp @@ -0,0 +1,176 @@ +/* + * Created on: 13/02/2018 + * Author: ricab + * + * See README.md for documentation of this header's public interface. + * SPDX-License-Identifier: Unlicense + */ + +#ifndef SCOPE_GUARD_HPP_ +#define SCOPE_GUARD_HPP_ + +#include +#include + +#if __cplusplus >= 201703L +#define SG_NODISCARD [[nodiscard]] +#ifdef SG_REQUIRE_NOEXCEPT_IN_CPP17 +#define SG_REQUIRE_NOEXCEPT +#endif +#else +#define SG_NODISCARD +#endif + +namespace sg { +namespace detail { +/* --- Some custom type traits --- */ + +// Type trait determining whether a type is callable with no arguments +template +struct is_noarg_callable_t : public std::false_type {}; // in general, false + +template +struct is_noarg_callable_t () ())> : public std::true_type { +}; // only true when call expression valid + +// Type trait determining whether a no-argument callable returns void +template +struct returns_void_t : public std::is_same () ())> {}; + +/* Type trait determining whether a no-arg callable is nothrow invocable if +required. This is where SG_REQUIRE_NOEXCEPT logic is encapsulated. */ +template +struct is_nothrow_invocable_if_required_t + : public +#ifdef SG_REQUIRE_NOEXCEPT + std::is_nothrow_invocable /* Note: _r variants not enough to + confirm void return: any return can be + discarded so all returns are + compatible with void */ +#else + std::true_type +#endif +{ +}; + +// logic AND of two or more type traits +template +struct and_t : public and_t> {}; // for more than two arguments + +template +struct and_t : public std::conditional::type {}; // for two arguments + +// Type trait determining whether a type is a proper scope_guard callback. +template +struct is_proper_sg_callback_t : public and_t, + returns_void_t, + is_nothrow_invocable_if_required_t, + std::is_nothrow_destructible> {}; + +/* --- The actual scope_guard template --- */ + +template::value>::type> +class scope_guard; + +/* --- Now the friend maker --- */ + +template +detail::scope_guard make_scope_guard (Callback &&callback) noexcept ( + std::is_nothrow_constructible::value); /* +we need this in the inner namespace due to MSVC bugs preventing +sg::detail::scope_guard from befriending a sg::make_scope_guard +template instance in the parent namespace (see https://is.gd/xFfFhE). */ + +/* --- The template specialization that actually defines the class --- */ + +template +class SG_NODISCARD scope_guard final { + public: + typedef Callback callback_type; + + scope_guard (scope_guard &&other) noexcept ( + std::is_nothrow_constructible::value); + + ~scope_guard () noexcept; // highlight noexcept dtor + + void dismiss () noexcept; + + public: + scope_guard () = delete; + scope_guard (const scope_guard &) = delete; + scope_guard &operator= (const scope_guard &) = delete; + scope_guard &operator= (scope_guard &&) = delete; + + private: + explicit scope_guard (Callback &&callback) noexcept ( + std::is_nothrow_constructible::value); /* + meant for friends only */ + + friend scope_guard make_scope_guard (Callback &&) noexcept ( + std::is_nothrow_constructible::value); /* +only make_scope_guard can create scope_guards from scratch (i.e. non-move) +*/ + + private: + Callback m_callback; + bool m_active; +}; + +} // namespace detail + +/* --- Now the single public maker function --- */ + +using detail::make_scope_guard; // see comment on declaration above + +} // namespace sg + +//////////////////////////////////////////////////////////////////////////////// +template +sg::detail::scope_guard::scope_guard (Callback &&callback) noexcept ( + std::is_nothrow_constructible::value) + : m_callback (std::forward (callback)) /* use () instead of {} because + of DR 1467 (https://is.gd/WHmWuo), which still impacts older compilers + (e.g. GCC 4.x and clang <=3.6, see https://godbolt.org/g/TE9tPJ and + https://is.gd/Tsmh8G) */ + , + m_active{true} +{ +} + +//////////////////////////////////////////////////////////////////////////////// +template +sg::detail::scope_guard::scope_guard::~scope_guard () noexcept /* +need the extra injected-class-name here to make different compilers happy */ +{ + if (m_active) + m_callback (); +} + +//////////////////////////////////////////////////////////////////////////////// +template +sg::detail::scope_guard::scope_guard (scope_guard &&other) noexcept ( + std::is_nothrow_constructible::value) + : m_callback (std::forward (other.m_callback)) // idem + , + m_active{std::move (other.m_active)} +{ + other.m_active = false; +} + +//////////////////////////////////////////////////////////////////////////////// +template +inline void sg::detail::scope_guard::dismiss () noexcept +{ + m_active = false; +} + +//////////////////////////////////////////////////////////////////////////////// +template +inline auto sg::detail::make_scope_guard (Callback &&callback) noexcept ( + std::is_nothrow_constructible::value) -> detail::scope_guard +{ + return detail::scope_guard{std::forward (callback)}; +} + +#endif /* SCOPE_GUARD_HPP_ */ From 03005189585550eccdf09aeff68b25e26e85e591 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Tue, 3 Sep 2024 10:46:06 -0700 Subject: [PATCH 2/3] interner: add finalize/open/close to interner problem: refcounted strings for resource types are turning out to be expensive enough to actually slow us down. We compare them and copy them so often they show up in the traces as 20-30% of runtime even as just shared_ptrs solution: avoid the possible denial of service issue from resource types by adding an interface to the interner type that finalizes it, preventing new strings from being added until it is explicitly "opened" for addition. This way after initialization of the graph/database we finalize the interners for subsystems and resource types, then open them back up for updates in update_resource. The danger is in JobSpec, which can no longer create new resource types. This also effectively makes it a parse error for a user to specify an unknown resource, which will happen as soon as the jobspec hits qmanager. This changes the output of one test of resource_query from emitting "unsatisfied" as info to emitting a descriptive parse error because it was unsatisfied due to unknown resources. That's why one test output file is modified. --- resource/libjobspec/jobspec.cpp | 8 ++- resource/modules/resource_match.cpp | 5 ++ resource/schema/data_std.cpp | 1 + resource/schema/data_std.hpp | 3 +- resource/utilities/command.cpp | 15 ++++-- resource/utilities/resource-query.cpp | 3 ++ src/common/libintern/interner.cpp | 31 +++++++++++ src/common/libintern/interner.hpp | 51 +++++++++++++++---- .../expected/satisfiability/002.R.out | 13 ----- 9 files changed, 101 insertions(+), 29 deletions(-) diff --git a/resource/libjobspec/jobspec.cpp b/resource/libjobspec/jobspec.cpp index feb2d7b99..34cae0faf 100644 --- a/resource/libjobspec/jobspec.cpp +++ b/resource/libjobspec/jobspec.cpp @@ -126,7 +126,13 @@ Resource::Resource (const YAML::Node &resnode) if (!resnode["type"].IsScalar ()) { throw parse_error (resnode["type"], "Value of \"type\" must be a scalar"); } - type = resource_model::resource_type_t{resnode["type"].as ()}; + try { + type = resource_model::resource_type_t{resnode["type"].as ()}; + } catch (std::system_error e) { + // resource_type must be closed or full + throw parse_error (resnode["type"], + "Value of \"type\" must be a resource type known to fluxion"); + } field_count++; if (!resnode["count"]) { diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp index e2c73c1e1..f8554555a 100644 --- a/resource/modules/resource_match.cpp +++ b/resource/modules/resource_match.cpp @@ -1083,6 +1083,7 @@ static int grow_resource_db (std::shared_ptr &ctx, json_t *resou struct idset *grow_set = NULL; json_t *r_lite = NULL; json_t *jgf = NULL; + auto guard = resource_type_t::storage_t::open_for_scope (); if ((rc = unpack_resources (resources, &grow_set, &r_lite, &jgf, duration)) < 0) { flux_log_error (ctx->h, "%s: unpack_resources", __FUNCTION__); @@ -1461,6 +1462,10 @@ static int init_resource_graph (std::shared_ptr &ctx) } } } + + // prevent users from consuming unbounded memory with arbitrary resource types + subsystem_t::storage_t::finalize (); + resource_type_t::storage_t::finalize (); return 0; } diff --git a/resource/schema/data_std.cpp b/resource/schema/data_std.cpp index 88dcc4151..8407ac653 100644 --- a/resource/schema/data_std.cpp +++ b/resource/schema/data_std.cpp @@ -17,6 +17,7 @@ subsystem_t containment_sub{"containment"}; resource_type_t cluster_rt{"cluster"}; resource_type_t core_rt{"core"}; +resource_type_t socket_rt{"socket"}; resource_type_t gpu_rt{"gpu"}; resource_type_t node_rt{"node"}; resource_type_t rack_rt{"rack"}; diff --git a/resource/schema/data_std.hpp b/resource/schema/data_std.hpp index 8dbeabd3f..6bbca0fcf 100644 --- a/resource/schema/data_std.hpp +++ b/resource/schema/data_std.hpp @@ -36,11 +36,12 @@ extern subsystem_t containment_sub; constexpr uint64_t resource_type_id{1}; struct resource_type_tag {}; -using resource_type_t = intern::interned_string>; +using resource_type_t = intern::interned_string>; extern resource_type_t slot_rt; extern resource_type_t cluster_rt; extern resource_type_t rack_rt; extern resource_type_t node_rt; +extern resource_type_t socket_rt; extern resource_type_t gpu_rt; extern resource_type_t core_rt; diff --git a/resource/utilities/command.cpp b/resource/utilities/command.cpp index 15825639f..782d0dd0f 100644 --- a/resource/utilities/command.cpp +++ b/resource/utilities/command.cpp @@ -429,13 +429,18 @@ static int update_run (std::shared_ptr &ctx, } gettimeofday (&st, NULL); - if ((rc = ctx->traverser->run (str, ctx->writers, rd, id, at, d)) != 0) { - std::cerr << "ERROR: traverser run () returned error " << std::endl; - if (ctx->traverser->err_message () != "") { - std::cerr << "ERROR: " << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); + + { + auto guard = resource_type_t::storage_t::open_for_scope (); + if ((rc = ctx->traverser->run (str, ctx->writers, rd, id, at, d)) != 0) { + std::cerr << "ERROR: traverser run () returned error " << std::endl; + if (ctx->traverser->err_message () != "") { + std::cerr << "ERROR: " << ctx->traverser->err_message (); + ctx->traverser->clear_err_message (); + } } } + ctx->writers->emit (o); std::ostream &out = (ctx->params.r_fname != "") ? ctx->params.r_out : std::cout; out << o.str (); diff --git a/resource/utilities/resource-query.cpp b/resource/utilities/resource-query.cpp index f4c5a78ff..6d6b6dff7 100644 --- a/resource/utilities/resource-query.cpp +++ b/resource/utilities/resource-query.cpp @@ -587,6 +587,9 @@ static int init_resource_graph (std::shared_ptr &ctx) return -1; } + // prevent users from consuming unbounded memory with arbitrary resource types + subsystem_t::storage_t::finalize (); + resource_type_t::storage_t::finalize (); return rc; } diff --git a/src/common/libintern/interner.cpp b/src/common/libintern/interner.cpp index 7c0d4b70e..ffae1fe78 100644 --- a/src/common/libintern/interner.cpp +++ b/src/common/libintern/interner.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,10 @@ struct dense_inner_storage { std::unordered_map> ids_by_string; // reader/writer lock to protect entries std::unique_ptr mtx = std::make_unique (); + // if the object is finalized and not opened, reject new strings + bool finalized = false; + // whether this object is allowed to accept new strings after finalization from each thread + std::unordered_map open_map; }; dense_inner_storage &get_dense_inner_storage (size_t unique_id) @@ -56,6 +61,22 @@ dense_inner_storage &get_dense_inner_storage (size_t unique_id) return groups[unique_id]; } +void dense_storage_finalize (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.finalized = true; +} +void dense_storage_open (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.open_map[std::this_thread::get_id ()] = true; +} +void dense_storage_close (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.open_map[std::this_thread::get_id ()] = false; +} + struct sparse_inner_storage; struct sparse_string_node { // This must be carefully managed, it holds onto the sparse_inner_storage that is keeping the @@ -153,6 +174,15 @@ view_and_id get_both (dense_inner_storage &storage, const std::string_view s, ch throw std::system_error (ENOMEM, std::generic_category (), "Too many strings for configured size"); + // if storage is finalized and the thread isn't in the open map, we must not add another + // here, check while under the shared lock + if (storage.finalized && !storage.open_map[std::this_thread::get_id ()]) { + using namespace std::string_literals; + std::string err = + "This interner is finalized and must be open to add strings, found new string: '"s + + std::string (s) + "'"s; + throw std::system_error (ENOMEM, std::generic_category (), err); + } } // release shared lock // writer lock scope @@ -167,6 +197,7 @@ view_and_id get_both (dense_inner_storage &storage, const std::string_view s, ch const std::string *get_by_id (dense_inner_storage &storage, size_t string_id) { + auto sl = std::shared_lock (*storage.mtx); return storage.strings_by_id.at (string_id); } diff --git a/src/common/libintern/interner.hpp b/src/common/libintern/interner.hpp index f990c851a..cc26e20ee 100644 --- a/src/common/libintern/interner.hpp +++ b/src/common/libintern/interner.hpp @@ -20,6 +20,7 @@ #include #include +#include "scope_guard.hpp" #include #include #include @@ -36,12 +37,20 @@ struct dense_inner_storage; view_and_id get_both (dense_inner_storage &ds, std::string_view s, char bytes_supported); const std::string *get_by_id (dense_inner_storage &ds, size_t string_id); +void dense_storage_finalize (dense_inner_storage &storage); +void dense_storage_open (dense_inner_storage &storage); +void dense_storage_close (dense_inner_storage &storage); + dense_inner_storage &get_dense_inner_storage (size_t unique_id); std::size_t hash_combine (std::size_t lhs, std::size_t rhs); }; // namespace detail /// interner storage class providing dense, in-order IDs of configurable size +/// +/// allows addition of strings until finalized, then after that only when explicitly opened for +/// additions, this is not for thread safety, it is to prevent denial of service from addition of +/// invalid types through interfaces that take user input template requires (sizeof (Id) <= sizeof (size_t)) struct dense_storage { @@ -55,6 +64,18 @@ struct dense_storage { dense_storage &operator= (dense_storage const &) = delete; dense_storage &operator= (dense_storage &&) = delete; + private: + static detail::dense_inner_storage &get_storage () + { + // C++ guarantees atomic initialization of a static like this, template expansion makes + // __PRETTY_FUNCTION__ unique for valid programs, using function here to avoid having to + // include heavy headers in this header + static detail::dense_inner_storage &s = + ::intern::detail::get_dense_inner_storage (dense_storage::unique_id ()); + return s; + }; + + public: static detail::view_and_id get_both (std::string_view s); static id_instance_t get_id (std::string_view s); static const std::string *get_by_id (id_storage_t string_id); @@ -65,16 +86,28 @@ struct dense_storage { return uid; } - private: - static detail::dense_inner_storage &get_storage () + /// stop new strings from being added unless the object is explicitly opened + static void finalize () { - // C++ guarantees atomic initialization of a static like this, template expansion makes - // __PRETTY_FUNCTION__ unique for valid programs, using function here to avoid having to - // include heavy headers in this header - static detail::dense_inner_storage &s = - ::intern::detail::get_dense_inner_storage (dense_storage::unique_id ()); - return s; - }; + detail::dense_storage_finalize (get_storage ()); + } + /// open the interner for additions and auto-close on scope exit + [[nodiscard]] static auto open_for_scope () + { + open (); + return sg::make_scope_guard ([] () { close (); }); + } + + /// open the interner for additions + static auto open () + { + detail::dense_storage_open (get_storage ()); + } + /// close the interner for additions + static void close () + { + detail::dense_storage_close (get_storage ()); + } }; template diff --git a/t/data/resource/expected/satisfiability/002.R.out b/t/data/resource/expected/satisfiability/002.R.out index 670ed4ebc..abb40b1b3 100644 --- a/t/data/resource/expected/satisfiability/002.R.out +++ b/t/data/resource/expected/satisfiability/002.R.out @@ -37,16 +37,3 @@ INFO: ============================= INFO: ============================= INFO: Unsatisfiable request INFO: ============================= -INFO: ============================= -INFO: No matching resources found -INFO: Unsatisfiable request -INFO: JOBID=7 -INFO: ============================= -INFO: ============================= -INFO: No matching resources found -INFO: Unsatisfiable request -INFO: JOBID=8 -INFO: ============================= -INFO: ============================= -INFO: Unsatisfiable request -INFO: ============================= From 852543f711df3e8b986d1175293de7c2a448b372 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Wed, 25 Sep 2024 11:32:29 +0800 Subject: [PATCH 3/3] t4001: add test for unknown resource in jobspec problem: we now validate resources used in jobspecs against known resources, new resources in _updates_ should work but new ones from users in jobs or matches should fail, we have no tests for this other than some accidental ones solution: add a jobspec with an invalid resource, and ensure that it fails to match --- .../jobspecs/basics/bad_res_type.yaml | 30 +++++++++++++++++++ t/t4001-match-allocate.t | 5 ++++ 2 files changed, 35 insertions(+) create mode 100644 t/data/resource/jobspecs/basics/bad_res_type.yaml diff --git a/t/data/resource/jobspecs/basics/bad_res_type.yaml b/t/data/resource/jobspecs/basics/bad_res_type.yaml new file mode 100644 index 000000000..286d2ebce --- /dev/null +++ b/t/data/resource/jobspecs/basics/bad_res_type.yaml @@ -0,0 +1,30 @@ +version: 9999 +resources: + - type: cluster + count: 1 + with: + - type: rack + count: 1 + with: + - type: node + count: 1 + with: + - type: slot + count: 1 + label: default + with: + - type: invalid_res_type + count: 1 + with: + - type: core + count: 1 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/t4001-match-allocate.t b/t/t4001-match-allocate.t index bf25d41fb..4e21369f5 100755 --- a/t/t4001-match-allocate.t +++ b/t/t4001-match-allocate.t @@ -56,6 +56,11 @@ test_expect_success 'handling of a malformed jobspec works' ' test_expect_code 2 flux ion-resource match allocate ${malform} ' +test_expect_success 'handling of an invalid resource type works' ' + test_expect_code 1 flux ion-resource match allocate \ + "${SHARNESS_TEST_SRCDIR}/data/resource/jobspecs/basics/bad_res_type.yaml" +' + test_expect_success 'invalid duration is caught' ' test_must_fail flux ion-resource match allocate ${duration_too_large} && test_must_fail flux ion-resource match allocate ${duration_negative}