diff --git a/source/common/json/BUILD b/source/common/json/BUILD index 9fd6cdf8a848..6967d116defc 100644 --- a/source/common/json/BUILD +++ b/source/common/json/BUILD @@ -43,6 +43,7 @@ envoy_cc_library( ":json_internal_lib", "//source/common/common:assert_lib", "//source/common/common:thread_lib", + "@utf8_range//:utf8_validity", ], ) diff --git a/source/common/json/json_internal.cc b/source/common/json/json_internal.cc index f5c8cc5c21d5..a4d57a297cb3 100644 --- a/source/common/json/json_internal.cc +++ b/source/common/json/json_internal.cc @@ -787,7 +787,7 @@ ObjectSharedPtr Factory::loadFromProtobufStruct(const ProtobufWkt::Struct& proto std::string Factory::serialize(absl::string_view str) { nlohmann::json j(str); - return j.dump(); + return j.dump(-1, ' ', false, nlohmann::detail::error_handler_t::replace); } std::vector Factory::jsonToMsgpack(const std::string& json_string) { diff --git a/source/common/json/json_sanitizer.cc b/source/common/json/json_sanitizer.cc index d45a7bdc8013..1e9dd9c5d4f4 100644 --- a/source/common/json/json_sanitizer.cc +++ b/source/common/json/json_sanitizer.cc @@ -5,6 +5,7 @@ #include "source/common/json/json_internal.h" #include "absl/strings/str_format.h" +#include "utf8_validity.h" namespace Envoy { namespace Json { @@ -65,7 +66,7 @@ absl::string_view sanitize(std::string& buffer, absl::string_view str) { if (need_slow == 0) { return str; // Fast path, should be executed most of the time. } - TRY_ASSERT_MAIN_THREAD { + if (utf8_range::IsStructurallyValid(str)) { // The Nlohmann JSON library supports serialization and is not too slow. A // hand-rolled sanitizer can be a little over 2x faster at the cost of added // production complexity. The main drawback is that this code cannot be used @@ -74,9 +75,7 @@ absl::string_view sanitize(std::string& buffer, absl::string_view str) { // adds complexity to the production code base. buffer = Nlohmann::Factory::serialize(str); return stripDoubleQuotes(buffer); - } - END_TRY - catch (std::exception&) { + } else { // If Nlohmann throws an error, emit a hex escape for any character // requiring it. This can occur for invalid utf-8 sequences, and we don't // want to crash the server if such a sequence makes its way into a string diff --git a/test/common/json/json_sanitizer_fuzz_test.cc b/test/common/json/json_sanitizer_fuzz_test.cc index 81372869e25e..ed6d33758ea1 100644 --- a/test/common/json/json_sanitizer_fuzz_test.cc +++ b/test/common/json/json_sanitizer_fuzz_test.cc @@ -27,6 +27,11 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { absl::string_view proto_sanitized = Envoy::Json::stripDoubleQuotes(buffer2); RELEASE_ASSERT(Envoy::Json::TestUtil::utf8Equivalent(sanitized, proto_sanitized, errmsg), errmsg); + } else { + std::string decoded, errmsg; + EXPECT_TRUE(Json::TestUtil::decodeEscapedJson(sanitized, decoded, errmsg)) + << input << ": " << errmsg; + EXPECT_EQ(input, decoded); } } }