Skip to content

Commit

Permalink
[chore](json reader) add original data to error messge for tracing (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
eldenmoon authored Sep 2, 2023
1 parent 9898c08 commit 6b56896
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
20 changes: 15 additions & 5 deletions be/src/vec/exec/format/json/new_json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1076,10 +1076,13 @@ Status NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc
// prevent from endless loop
_next_row = _total_rows + 1;
fmt::memory_buffer error_msg;
fmt::format_to(error_msg, "Parse json data for array failed. code: {}, error info: {}",
e.error(), e.what());
fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(),
e.what());
RETURN_IF_ERROR(_state->append_error_msg_to_file(
[&]() -> std::string { return ""; },
[&]() -> std::string {
return std::string(_simdjson_ondemand_padding_buffer.data(),
_original_doc_size);
},
[&]() -> std::string { return fmt::to_string(error_msg); }, eof));
_counter->num_rows_filtered++;
// Before continuing to process other rows, we need to first clean the fail parsed row.
Expand Down Expand Up @@ -1175,7 +1178,10 @@ Status NewJsonReader::_simdjson_handle_flat_array_complex_json(
fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(),
e.what());
RETURN_IF_ERROR(_state->append_error_msg_to_file(
[&]() -> std::string { return ""; },
[&]() -> std::string {
return std::string(_simdjson_ondemand_padding_buffer.data(),
_original_doc_size);
},
[&]() -> std::string { return fmt::to_string(error_msg); }, eof));
_counter->num_rows_filtered++;
// Before continuing to process other rows, we need to first clean the fail parsed row.
Expand Down Expand Up @@ -1243,7 +1249,10 @@ Status NewJsonReader::_simdjson_handle_nested_complex_json(
fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(),
e.what());
RETURN_IF_ERROR(_state->append_error_msg_to_file(
[&]() -> std::string { return ""; },
[&]() -> std::string {
return std::string(_simdjson_ondemand_padding_buffer.data(),
_original_doc_size);
},
[&]() -> std::string { return fmt::to_string(error_msg); }, eof));
_counter->num_rows_filtered++;
// Before continuing to process other rows, we need to first clean the fail parsed row.
Expand Down Expand Up @@ -1492,6 +1501,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) {
*size -= 3;
}
memcpy(&_simdjson_ondemand_padding_buffer.front(), json_str, *size);
_original_doc_size = *size;
auto error =
_ondemand_json_parser
->iterate(std::string_view(_simdjson_ondemand_padding_buffer.data(), *size),
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/format/json/new_json_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class NewJsonReader : public GenericReader {
// simdjson
static constexpr size_t _init_buffer_size = 1024 * 1024 * 8;
size_t _padded_size = _init_buffer_size + simdjson::SIMDJSON_PADDING;
size_t _original_doc_size = 0;
std::string _simdjson_ondemand_padding_buffer;
std::string _simdjson_ondemand_unscape_padding_buffer;
// char _simdjson_ondemand_padding_buffer[_padded_size];
Expand Down

0 comments on commit 6b56896

Please sign in to comment.