diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index ec6a25fca233bd..c79795ee2f2a3d 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -1076,10 +1076,13 @@ Status NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc // prevent from endless loop _next_row = _total_rows + 1; fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "Parse json data for array failed. code: {}, error info: {}", - e.error(), e.what()); + fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), + e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1175,7 +1178,10 @@ Status NewJsonReader::_simdjson_handle_flat_array_complex_json( fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1243,7 +1249,10 @@ Status NewJsonReader::_simdjson_handle_nested_complex_json( fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1492,6 +1501,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) { *size -= 3; } memcpy(&_simdjson_ondemand_padding_buffer.front(), json_str, *size); + _original_doc_size = *size; auto error = _ondemand_json_parser ->iterate(std::string_view(_simdjson_ondemand_padding_buffer.data(), *size), diff --git a/be/src/vec/exec/format/json/new_json_reader.h b/be/src/vec/exec/format/json/new_json_reader.h index d2e5353244641d..bbe535bd2193e1 100644 --- a/be/src/vec/exec/format/json/new_json_reader.h +++ b/be/src/vec/exec/format/json/new_json_reader.h @@ -248,6 +248,7 @@ class NewJsonReader : public GenericReader { // simdjson static constexpr size_t _init_buffer_size = 1024 * 1024 * 8; size_t _padded_size = _init_buffer_size + simdjson::SIMDJSON_PADDING; + size_t _original_doc_size = 0; std::string _simdjson_ondemand_padding_buffer; std::string _simdjson_ondemand_unscape_padding_buffer; // char _simdjson_ondemand_padding_buffer[_padded_size];