From 96fc75edcc6c8e8d2d199960045eac829ae36156 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 9 Oct 2024 10:25:57 -0400 Subject: [PATCH] Rethink and simplify the encoder context class (#849) Signed-off-by: Juan Cruz Viotti --- src/runtime/CMakeLists.txt | 3 +- src/runtime/cache.cc | 66 +++++++ src/runtime/encoder_any.cc | 5 +- src/runtime/encoder_string.cc | 24 +-- .../sourcemeta/jsonbinpack/runtime_encoder.h | 4 +- .../jsonbinpack/runtime_encoder_cache.h | 48 +++++ .../jsonbinpack/runtime_encoder_context.h | 120 ------------ test/runtime/CMakeLists.txt | 2 +- test/runtime/encode_cache_test.cc | 182 +++++++++++++++++ test/runtime/encode_context_test.cc | 183 ------------------ 10 files changed, 315 insertions(+), 322 deletions(-) create mode 100644 src/runtime/cache.cc create mode 100644 src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_cache.h delete mode 100644 src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_context.h create mode 100644 test/runtime/encode_cache_test.cc delete mode 100644 test/runtime/encode_context_test.cc diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 00c3cb2b..81db0a8e 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -5,13 +5,14 @@ noa_library(NAMESPACE sourcemeta PROJECT jsonbinpack NAME runtime encoder.h input_stream.h output_stream.h - encoder_context.h + encoder_cache.h encoding.h SOURCES input_stream.cc output_stream.cc varint.h unreachable.h + cache.cc loader.cc loader_v1_any.h diff --git a/src/runtime/cache.cc b/src/runtime/cache.cc new file mode 100644 index 00000000..0b09cb27 --- /dev/null +++ b/src/runtime/cache.cc @@ -0,0 +1,66 @@ +#include + +namespace sourcemeta::jsonbinpack { + +auto Cache::record(const sourcemeta::jsontoolkit::JSON::String &value, + const std::uint64_t offset, const Type type) -> void { + // Encoding a shared string has some overhead, such as the + // shared string marker + the offset, so its not worth + // doing for strings that are too small. + constexpr auto MINIMUM_STRING_LENGTH{3}; + + // We don't want to allow the context to grow + // forever, otherwise an attacker could force the + // program to exhaust memory given an input + // document that contains a high number of large strings. + constexpr auto MAXIMUM_BYTE_SIZE{20971520}; + + const auto value_size{value.size()}; + if (value_size < MINIMUM_STRING_LENGTH || value_size >= MAXIMUM_BYTE_SIZE) { + return; + } + + // Remove the oldest entries to make space if needed + while (!this->data.empty() && + this->byte_size + value_size >= MAXIMUM_BYTE_SIZE) { + this->remove_oldest(); + } + + auto result{this->data.insert({std::make_pair(value, type), offset})}; + if (result.second) { + this->byte_size += value_size; + this->order.emplace(offset, result.first->first); + } else if (offset > result.first->second) { + this->order.erase(result.first->second); + // If the string already exists, we want to + // bump the offset for locality purposes. + result.first->second = offset; + this->order.emplace(offset, result.first->first); + } + + // Otherwise we are doing something wrong + assert(this->order.size() == this->data.size()); +} + +auto Cache::remove_oldest() -> void { + assert(!this->data.empty()); + // std::map are by definition ordered by key, + // so the begin iterator points to the entry + // with the lowest offset, a.k.a. the oldest. + const auto iterator{this->order.cbegin()}; + this->byte_size -= iterator->second.get().first.size(); + this->data.erase(iterator->second.get()); + this->order.erase(iterator); +} + +auto Cache::find(const sourcemeta::jsontoolkit::JSON::String &value, + const Type type) const -> std::optional { + const auto result{this->data.find(std::make_pair(value, type))}; + if (result == this->data.cend()) { + return std::nullopt; + } + + return result->second; +} + +} // namespace sourcemeta::jsonbinpack diff --git a/src/runtime/encoder_any.cc b/src/runtime/encoder_any.cc index 52b29cb3..909c9214 100644 --- a/src/runtime/encoder_any.cc +++ b/src/runtime/encoder_any.cc @@ -110,7 +110,7 @@ auto Encoder::ANY_PACKED_TYPE_TAG_BYTE_PREFIX( } else if (document.is_string()) { const sourcemeta::jsontoolkit::JSON::String value{document.to_string()}; const auto size{document.byte_size()}; - const auto shared{this->context_.find(value, Context::Type::Standalone)}; + const auto shared{this->cache_.find(value, Cache::Type::Standalone)}; if (size < uint_max<5>) { const std::uint8_t type{shared.has_value() ? TYPE_SHARED_STRING : TYPE_STRING}; @@ -119,8 +119,7 @@ auto Encoder::ANY_PACKED_TYPE_TAG_BYTE_PREFIX( if (shared.has_value()) { this->put_varint(this->position() - shared.value()); } else { - this->context_.record(value, this->position(), - Context::Type::Standalone); + this->cache_.record(value, this->position(), Cache::Type::Standalone); this->put_string_utf8(value, size); } } else if (size >= uint_max<5> && size < uint_max<5> * 2 && diff --git a/src/runtime/encoder_string.cc b/src/runtime/encoder_string.cc index a860db8c..4ff6e798 100644 --- a/src/runtime/encoder_string.cc +++ b/src/runtime/encoder_string.cc @@ -21,7 +21,7 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED( const sourcemeta::jsontoolkit::JSON::String value{document.to_string()}; const auto size{value.size()}; assert(document.size() == size); - const auto shared{this->context_.find(value, Context::Type::Standalone)}; + const auto shared{this->cache_.find(value, Cache::Type::Standalone)}; // (1) Write 0x00 if shared, else do nothing if (shared.has_value()) { @@ -35,7 +35,7 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED( if (shared.has_value()) { this->put_varint(this->position() - shared.value()); } else { - this->context_.record(value, this->position(), Context::Type::Standalone); + this->cache_.record(value, this->position(), Cache::Type::Standalone); this->put_string_utf8(value, size); } } @@ -48,7 +48,7 @@ auto Encoder::ROOF_VARINT_PREFIX_UTF8_STRING_SHARED( const auto size{value.size()}; assert(document.size() == size); assert(size <= options.maximum); - const auto shared{this->context_.find(value, Context::Type::Standalone)}; + const auto shared{this->cache_.find(value, Cache::Type::Standalone)}; // (1) Write 0x00 if shared, else do nothing if (shared.has_value()) { @@ -62,7 +62,7 @@ auto Encoder::ROOF_VARINT_PREFIX_UTF8_STRING_SHARED( if (shared.has_value()) { this->put_varint(this->position() - shared.value()); } else { - this->context_.record(value, this->position(), Context::Type::Standalone); + this->cache_.record(value, this->position(), Cache::Type::Standalone); this->put_string_utf8(value, size); } } @@ -77,7 +77,7 @@ auto Encoder::BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED( assert(options.minimum <= options.maximum); assert(is_byte(options.maximum - options.minimum + 1)); assert(is_within(size, options.minimum, options.maximum)); - const auto shared{this->context_.find(value, Context::Type::Standalone)}; + const auto shared{this->cache_.find(value, Cache::Type::Standalone)}; // (1) Write 0x00 if shared, else do nothing if (shared.has_value()) { @@ -91,7 +91,7 @@ auto Encoder::BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED( if (shared.has_value()) { this->put_varint(this->position() - shared.value()); } else { - this->context_.record(value, this->position(), Context::Type::Standalone); + this->cache_.record(value, this->position(), Cache::Type::Standalone); this->put_string_utf8(value, size); } } @@ -127,22 +127,22 @@ auto Encoder::PREFIX_VARINT_LENGTH_STRING_SHARED( const sourcemeta::jsontoolkit::JSON::String value{document.to_string()}; const auto shared{ - this->context_.find(value, Context::Type::PrefixLengthVarintPlusOne)}; + this->cache_.find(value, Cache::Type::PrefixLengthVarintPlusOne)}; if (shared.has_value()) { const auto new_offset{this->position()}; this->put_byte(0); this->put_varint(this->position() - shared.value()); // Bump the context cache for locality purposes - this->context_.record(value, new_offset, - Context::Type::PrefixLengthVarintPlusOne); + this->cache_.record(value, new_offset, + Cache::Type::PrefixLengthVarintPlusOne); } else { const auto size{value.size()}; assert(document.size() == size); - this->context_.record(value, this->position(), - Context::Type::PrefixLengthVarintPlusOne); + this->cache_.record(value, this->position(), + Cache::Type::PrefixLengthVarintPlusOne); this->put_varint(size + 1); // Also record a standalone variant of it - this->context_.record(value, this->position(), Context::Type::Standalone); + this->cache_.record(value, this->position(), Cache::Type::Standalone); this->put_string_utf8(value, size); } } diff --git a/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder.h b/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder.h index ec1cedf2..b1ecde9d 100644 --- a/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder.h +++ b/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder.h @@ -3,7 +3,7 @@ #include "runtime_export.h" -#include +#include #include #include @@ -66,7 +66,7 @@ class SOURCEMETA_JSONBINPACK_RUNTIME_EXPORT Encoder : private OutputStream { #endif private: - Context context_; + Cache cache_; }; } // namespace sourcemeta::jsonbinpack diff --git a/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_cache.h b/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_cache.h new file mode 100644 index 00000000..7d730bc2 --- /dev/null +++ b/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_cache.h @@ -0,0 +1,48 @@ +#ifndef SOURCEMETA_JSONBINPACK_RUNTIME_ENCODER_CACHE_H_ +#define SOURCEMETA_JSONBINPACK_RUNTIME_ENCODER_CACHE_H_ +#ifndef DOXYGEN + +#include "runtime_export.h" + +#include + +#include // std::reference_wrapper +#include // std::map +#include // std::optional +#include // std::pair + +namespace sourcemeta::jsonbinpack { + +class SOURCEMETA_JSONBINPACK_RUNTIME_EXPORT Cache { +public: + enum class Type { Standalone, PrefixLengthVarintPlusOne }; + auto record(const sourcemeta::jsontoolkit::JSON::String &value, + const std::uint64_t offset, const Type type) -> void; + auto find(const sourcemeta::jsontoolkit::JSON::String &value, + const Type type) const -> std::optional; + +#ifndef DOXYGEN + // This method is considered private. We only expose it for testing purposes + auto remove_oldest() -> void; +#endif + +private: +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + std::uint64_t byte_size{0}; + using Entry = std::pair; + std::map data; + std::map> order; +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif +}; + +} // namespace sourcemeta::jsonbinpack + +#endif +#endif diff --git a/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_context.h b/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_context.h deleted file mode 100644 index 8f17cfb7..00000000 --- a/src/runtime/include/sourcemeta/jsonbinpack/runtime_encoder_context.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef SOURCEMETA_JSONBINPACK_RUNTIME_ENCODER_CONTEXT_H_ -#define SOURCEMETA_JSONBINPACK_RUNTIME_ENCODER_CONTEXT_H_ -#ifndef DOXYGEN - -#include "runtime_export.h" - -#include - -#include // assert -#include // std::cbegin, std::cend -#include // std::map -#include // std::optional, std::nullopt -#include // std::pair, std::make_pair - -// Encoding a shared string has some overhead, such as the -// shared string marker + the offset, so its not worth -// doing for strings that are too small. -static constexpr auto MINIMUM_STRING_LENGTH{3}; - -// We don't want to allow the context to grow -// forever, otherwise an attacker could force the -// program to exhaust memory given an input -// document that contains a high number of large strings. -static constexpr auto MAXIMUM_BYTE_SIZE{20971520}; - -namespace sourcemeta::jsonbinpack { - -class SOURCEMETA_JSONBINPACK_RUNTIME_EXPORT Context { -public: - enum class Type { Standalone, PrefixLengthVarintPlusOne }; - - auto record(const sourcemeta::jsontoolkit::JSON::String &value, - const std::uint64_t offset, const Type type) -> void { - const auto value_size{value.size()}; - if (value_size < MINIMUM_STRING_LENGTH) { - return; - // The value is too big for the context to start with - } else if (value_size >= MAXIMUM_BYTE_SIZE) { - return; - } - - // Remove the oldest entries to make space if needed - while (!this->strings.empty() && - this->byte_size + value_size >= MAXIMUM_BYTE_SIZE) { - this->remove_oldest(); - } - - // If the string already exists, we want to - // bump the offset for locality purposes. - const auto maybe_entry{this->find(value, type)}; - if (maybe_entry.has_value()) { - const auto key{std::make_pair(value, type)}; - const auto previous_offset{this->strings[key]}; - if (offset > previous_offset) { - this->strings[key] = offset; - this->offsets.erase(previous_offset); - this->offsets.insert({offset, std::make_pair(value, type)}); - } - } else { - const auto result{ - this->offsets.insert({offset, std::make_pair(value, type)})}; - // Prevent recording two strings to the same offset - assert(result.second); - if (result.second) { - this->strings.insert({std::make_pair(value, type), offset}); - this->byte_size += value_size; - } - } - } - - auto remove_oldest() -> void { - assert(!this->strings.empty()); - // std::map are by definition ordered by key, - // so the begin iterator points to the entry - // with the lowest offset, a.k.a. the oldest. - const auto iterator{std::cbegin(this->offsets)}; - this->strings.erase( - std::make_pair(iterator->second.first, iterator->second.second)); - this->byte_size -= iterator->second.first.size(); - this->offsets.erase(iterator); - } - - auto find(const sourcemeta::jsontoolkit::JSON::String &value, - const Type type) const -> std::optional { - const auto result{this->strings.find(std::make_pair(value, type))}; - if (result == this->strings.cend()) { - return std::nullopt; - } - - return result->second; - } - -private: -// Exporting symbols that depends on the standard C++ library is considered -// safe. -// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN -#if defined(_MSC_VER) -#pragma warning(disable : 4251 4275) -#endif - // TODO: Keep a reference to the string instead of copying it - std::map, - std::uint64_t> - strings; - // A mirror of the above map to be able to sort by offset. - // While this means we need 2x the amount of memory to keep track - // of strings, it allows us to efficiently put an upper bound - // on the amount of memory being consumed by this class. - std::map> - offsets; - std::uint64_t byte_size = 0; -#if defined(_MSC_VER) -#pragma warning(default : 4251 4275) -#endif -}; - -} // namespace sourcemeta::jsonbinpack - -#endif -#endif diff --git a/test/runtime/CMakeLists.txt b/test/runtime/CMakeLists.txt index 962392f3..26a4aa28 100644 --- a/test/runtime/CMakeLists.txt +++ b/test/runtime/CMakeLists.txt @@ -10,7 +10,7 @@ add_executable(sourcemeta_jsonbinpack_runtime_unit decode_utils.h encode_any_test.cc encode_array_test.cc - encode_context_test.cc + encode_cache_test.cc encode_integer_test.cc encode_number_test.cc encode_object_test.cc diff --git a/test/runtime/encode_cache_test.cc b/test/runtime/encode_cache_test.cc new file mode 100644 index 00000000..ce1e1bb8 --- /dev/null +++ b/test/runtime/encode_cache_test.cc @@ -0,0 +1,182 @@ +#include + +#include +#include +#include + +#include + +TEST(JSONBinPack_Encoder, cache_record_string) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + const auto result_1{cache.find("foo", CacheType::Standalone)}; + EXPECT_FALSE(result_1.has_value()); + cache.record("foo", 2, CacheType::Standalone); + const auto result_2{cache.find("foo", CacheType::Standalone)}; + EXPECT_TRUE(result_2.has_value()); + EXPECT_EQ(result_2.value(), 2); +} + +TEST(JSONBinPack_Encoder, cache_record_string_too_short) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + const auto result_1{cache.find("fo", CacheType::Standalone)}; + EXPECT_FALSE(result_1.has_value()); + cache.record("fo", 2, CacheType::Standalone); + const auto result_2{cache.find("fo", CacheType::Standalone)}; + EXPECT_FALSE(result_2.has_value()); +} + +TEST(JSONBinPack_Encoder, cache_record_string_empty) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + const auto result_1{cache.find("fo", CacheType::Standalone)}; + EXPECT_FALSE(result_1.has_value()); + cache.record("", 2, CacheType::Standalone); + const auto result_2{cache.find("", CacheType::Standalone)}; + EXPECT_FALSE(result_2.has_value()); +} + +TEST(JSONBinPack_Encoder, cache_has_on_unknown_string) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + const auto result{cache.find("foobarbaz", CacheType::Standalone)}; + EXPECT_FALSE(result.has_value()); +} + +TEST(JSONBinPack_Encoder, cache_increase_offset) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record("foo", 2, CacheType::Standalone); + cache.record("foo", 4, CacheType::Standalone); + const auto result{cache.find("foo", CacheType::Standalone)}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 4); +} + +TEST(JSONBinPack_Encoder, cache_do_not_decrease_offset) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record("foo", 4, CacheType::Standalone); + cache.record("foo", 2, CacheType::Standalone); + const auto result{cache.find("foo", CacheType::Standalone)}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 4); +} + +TEST(JSONBinPack_Encoder, cache_not_record_too_big) { + const auto length{25000000}; + const std::string too_big(length, 'x'); + EXPECT_EQ(too_big.size(), length); + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record(too_big, 1, CacheType::Standalone); + const auto result{cache.find(too_big, CacheType::Standalone)}; + EXPECT_FALSE(result.has_value()); +} + +TEST(JSONBinPack_Encoder, cache_remove_oldest) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record("foo", 10, CacheType::Standalone); + cache.record("bar", 3, CacheType::Standalone); + cache.record("baz", 7, CacheType::PrefixLengthVarintPlusOne); + + EXPECT_TRUE(cache.find("foo", CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find("bar", CacheType::Standalone).has_value()); + EXPECT_TRUE( + cache.find("baz", CacheType::PrefixLengthVarintPlusOne).has_value()); + + cache.remove_oldest(); + + EXPECT_TRUE(cache.find("foo", CacheType::Standalone).has_value()); + EXPECT_FALSE(cache.find("bar", CacheType::Standalone).has_value()); + EXPECT_TRUE( + cache.find("baz", CacheType::PrefixLengthVarintPlusOne).has_value()); + + cache.remove_oldest(); + + EXPECT_TRUE(cache.find("foo", CacheType::Standalone).has_value()); + EXPECT_FALSE(cache.find("bar", CacheType::Standalone).has_value()); + EXPECT_FALSE( + cache.find("baz", CacheType::PrefixLengthVarintPlusOne).has_value()); + + cache.remove_oldest(); + + EXPECT_FALSE(cache.find("foo", CacheType::Standalone).has_value()); + EXPECT_FALSE(cache.find("bar", CacheType::Standalone).has_value()); + EXPECT_FALSE( + cache.find("baz", CacheType::PrefixLengthVarintPlusOne).has_value()); +} + +TEST(JSONBinPack_Encoder, cache_is_a_circular_buffer) { + const auto length{5000000}; + const std::string string_1(length, 'u'); + const std::string string_2(length, 'v'); + const std::string string_3(length, 'w'); + const std::string string_4(length, 'x'); + const std::string string_5(length, 'y'); + const std::string string_6(length, 'z'); + + EXPECT_EQ(string_1.size(), length); + EXPECT_EQ(string_2.size(), length); + EXPECT_EQ(string_3.size(), length); + EXPECT_EQ(string_4.size(), length); + EXPECT_EQ(string_5.size(), length); + EXPECT_EQ(string_6.size(), length); + + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + + cache.record(string_1, length * 0, CacheType::Standalone); + cache.record(string_2, length * 1, CacheType::Standalone); + cache.record(string_3, length * 2, CacheType::Standalone); + cache.record(string_4, length * 3, CacheType::Standalone); + + EXPECT_TRUE(cache.find(string_1, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_2, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_3, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_4, CacheType::Standalone).has_value()); + + cache.record(string_5, length * 4, CacheType::Standalone); + + EXPECT_FALSE(cache.find(string_1, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_2, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_3, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_4, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_5, CacheType::Standalone).has_value()); + + cache.record(string_6, length * 5, CacheType::Standalone); + + EXPECT_FALSE(cache.find(string_1, CacheType::Standalone).has_value()); + EXPECT_FALSE(cache.find(string_2, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_3, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_4, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_5, CacheType::Standalone).has_value()); + EXPECT_TRUE(cache.find(string_6, CacheType::Standalone).has_value()); +} + +TEST(JSONBinPack_Encoder, cache_same_string_different_type) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record("foo", 10, CacheType::Standalone); + cache.record("foo", 20, CacheType::PrefixLengthVarintPlusOne); + + const auto result_1{cache.find("foo", CacheType::Standalone)}; + const auto result_2{cache.find("foo", CacheType::PrefixLengthVarintPlusOne)}; + + EXPECT_TRUE(result_1.has_value()); + EXPECT_TRUE(result_2.has_value()); + + EXPECT_EQ(result_1.value(), 10); + EXPECT_EQ(result_2.value(), 20); +} + +TEST(JSONBinPack_Encoder, cache_no_fallback_type) { + sourcemeta::jsonbinpack::Cache cache; + using CacheType = sourcemeta::jsonbinpack::Cache::Type; + cache.record("foo", 10, CacheType::Standalone); + EXPECT_TRUE(cache.find("foo", CacheType::Standalone).has_value()); + EXPECT_FALSE( + cache.find("foo", CacheType::PrefixLengthVarintPlusOne).has_value()); +} diff --git a/test/runtime/encode_context_test.cc b/test/runtime/encode_context_test.cc deleted file mode 100644 index 0317b9cc..00000000 --- a/test/runtime/encode_context_test.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include - -#include -#include -#include - -#include - -TEST(JSONBinPack_Encoder, context_record_string) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - const auto result_1{context.find("foo", ContextType::Standalone)}; - EXPECT_FALSE(result_1.has_value()); - context.record("foo", 2, ContextType::Standalone); - const auto result_2{context.find("foo", ContextType::Standalone)}; - EXPECT_TRUE(result_2.has_value()); - EXPECT_EQ(result_2.value(), 2); -} - -TEST(JSONBinPack_Encoder, context_record_string_too_short) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - const auto result_1{context.find("fo", ContextType::Standalone)}; - EXPECT_FALSE(result_1.has_value()); - context.record("fo", 2, ContextType::Standalone); - const auto result_2{context.find("fo", ContextType::Standalone)}; - EXPECT_FALSE(result_2.has_value()); -} - -TEST(JSONBinPack_Encoder, context_record_string_empty) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - const auto result_1{context.find("fo", ContextType::Standalone)}; - EXPECT_FALSE(result_1.has_value()); - context.record("", 2, ContextType::Standalone); - const auto result_2{context.find("", ContextType::Standalone)}; - EXPECT_FALSE(result_2.has_value()); -} - -TEST(JSONBinPack_Encoder, context_has_on_unknown_string) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - const auto result{context.find("foobarbaz", ContextType::Standalone)}; - EXPECT_FALSE(result.has_value()); -} - -TEST(JSONBinPack_Encoder, context_increase_offset) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record("foo", 2, ContextType::Standalone); - context.record("foo", 4, ContextType::Standalone); - const auto result{context.find("foo", ContextType::Standalone)}; - EXPECT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 4); -} - -TEST(JSONBinPack_Encoder, context_do_not_decrease_offset) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record("foo", 4, ContextType::Standalone); - context.record("foo", 2, ContextType::Standalone); - const auto result{context.find("foo", ContextType::Standalone)}; - EXPECT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 4); -} - -TEST(JSONBinPack_Encoder, context_not_record_too_big) { - const auto length{25000000}; - const std::string too_big(length, 'x'); - EXPECT_EQ(too_big.size(), length); - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record(too_big, 1, ContextType::Standalone); - const auto result{context.find(too_big, ContextType::Standalone)}; - EXPECT_FALSE(result.has_value()); -} - -TEST(JSONBinPack_Encoder, context_remove_oldest) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record("foo", 10, ContextType::Standalone); - context.record("bar", 3, ContextType::Standalone); - context.record("baz", 7, ContextType::PrefixLengthVarintPlusOne); - - EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find("bar", ContextType::Standalone).has_value()); - EXPECT_TRUE( - context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value()); - - context.remove_oldest(); - - EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value()); - EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value()); - EXPECT_TRUE( - context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value()); - - context.remove_oldest(); - - EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value()); - EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value()); - EXPECT_FALSE( - context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value()); - - context.remove_oldest(); - - EXPECT_FALSE(context.find("foo", ContextType::Standalone).has_value()); - EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value()); - EXPECT_FALSE( - context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value()); -} - -TEST(JSONBinPack_Encoder, context_is_a_circular_buffer) { - const auto length{5000000}; - const std::string string_1(length, 'u'); - const std::string string_2(length, 'v'); - const std::string string_3(length, 'w'); - const std::string string_4(length, 'x'); - const std::string string_5(length, 'y'); - const std::string string_6(length, 'z'); - - EXPECT_EQ(string_1.size(), length); - EXPECT_EQ(string_2.size(), length); - EXPECT_EQ(string_3.size(), length); - EXPECT_EQ(string_4.size(), length); - EXPECT_EQ(string_5.size(), length); - EXPECT_EQ(string_6.size(), length); - - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - - context.record(string_1, length * 0, ContextType::Standalone); - context.record(string_2, length * 1, ContextType::Standalone); - context.record(string_3, length * 2, ContextType::Standalone); - context.record(string_4, length * 3, ContextType::Standalone); - - EXPECT_TRUE(context.find(string_1, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_2, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value()); - - context.record(string_5, length * 4, ContextType::Standalone); - - EXPECT_FALSE(context.find(string_1, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_2, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_5, ContextType::Standalone).has_value()); - - context.record(string_6, length * 5, ContextType::Standalone); - - EXPECT_FALSE(context.find(string_1, ContextType::Standalone).has_value()); - EXPECT_FALSE(context.find(string_2, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_5, ContextType::Standalone).has_value()); - EXPECT_TRUE(context.find(string_6, ContextType::Standalone).has_value()); -} - -TEST(JSONBinPack_Encoder, context_same_string_different_type) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record("foo", 10, ContextType::Standalone); - context.record("foo", 20, ContextType::PrefixLengthVarintPlusOne); - - const auto result_1{context.find("foo", ContextType::Standalone)}; - const auto result_2{ - context.find("foo", ContextType::PrefixLengthVarintPlusOne)}; - - EXPECT_TRUE(result_1.has_value()); - EXPECT_TRUE(result_2.has_value()); - - EXPECT_EQ(result_1.value(), 10); - EXPECT_EQ(result_2.value(), 20); -} - -TEST(JSONBinPack_Encoder, context_no_fallback_type) { - sourcemeta::jsonbinpack::Context context; - using ContextType = sourcemeta::jsonbinpack::Context::Type; - context.record("foo", 10, ContextType::Standalone); - EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value()); - EXPECT_FALSE( - context.find("foo", ContextType::PrefixLengthVarintPlusOne).has_value()); -}