From 901ea9289d85335731f5ea86ed80c2005bd4f192 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 5 Nov 2024 11:50:19 +0800 Subject: [PATCH] Initial --- .../resolve_dependency_modules/simdjson.cmake | 5 +++- .../simdjson/fix-control-char.patch | 24 +++++++++++++++++++ .../prestosql/tests/JsonFunctionsTest.cpp | 3 +++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 CMake/resolve_dependency_modules/simdjson/fix-control-char.patch diff --git a/CMake/resolve_dependency_modules/simdjson.cmake b/CMake/resolve_dependency_modules/simdjson.cmake index 69e7f204494b..f072020b9625 100644 --- a/CMake/resolve_dependency_modules/simdjson.cmake +++ b/CMake/resolve_dependency_modules/simdjson.cmake @@ -27,6 +27,9 @@ message(STATUS "Building simdjson from source") FetchContent_Declare( simdjson URL ${VELOX_SIMDJSON_SOURCE_URL} - URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM} + PATCH_COMMAND + git init && git apply + ${CMAKE_CURRENT_LIST_DIR}/simdjson/fix-control-char.patch) FetchContent_MakeAvailable(simdjson) diff --git a/CMake/resolve_dependency_modules/simdjson/fix-control-char.patch b/CMake/resolve_dependency_modules/simdjson/fix-control-char.patch new file mode 100644 index 000000000000..6fc71ae29639 --- /dev/null +++ b/CMake/resolve_dependency_modules/simdjson/fix-control-char.patch @@ -0,0 +1,24 @@ +diff --git a/src/fallback.cpp b/src/fallback.cpp +index f8e87be0..1bbbd67a 100644 +--- a/src/fallback.cpp ++++ b/src/fallback.cpp +@@ -130,7 +130,6 @@ simdjson_inline bool validate_string() { + } else if (simdjson_unlikely(buf[idx] & 0x80)) { + validate_utf8_character(); + } else { +- if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } +diff --git a/src/generic/stage1/json_structural_indexer.h b/src/generic/stage1/json_structural_indexer.h +index cfdedf01..d7c58816 100644 +--- a/src/generic/stage1/json_structural_indexer.h ++++ b/src/generic/stage1/json_structural_indexer.h +@@ -243,7 +243,6 @@ simdjson_inline void json_structural_indexer::next(const simd::simd8x64 + #endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); +- unescaped_chars_error |= block.non_quote_inside_string(unescaped); + } + + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { diff --git a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp index 067d374411f3..56b9bdc1bf28 100644 --- a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp @@ -701,6 +701,9 @@ TEST_F(JsonFunctionsTest, jsonExtract) { VELOX_ASSERT_THROW( jsonExtract(kJson, "concat($..category)"), "Invalid JSON path"); VELOX_ASSERT_THROW(jsonExtract(kJson, "$.store.keys()"), "Invalid JSON path"); + + // Test json with control character. + EXPECT_EQ(jsonExtract("{\"c1\":\"ab\ncd\"}", "$.c1"), "\"ab\ncd\""); } } // namespace