From 036a1c3b6dabb8a5997468135639d90c153eb7a5 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 15 Aug 2024 12:01:28 +0800 Subject: [PATCH] [Fix](Row store) fix row store with invalid json string in variant type Previous we allow invalid text as variant in PR #37794 and store as string type.But in encoding rowstore we CHECK the json is valid and store as jsonb binary field.In this PR we support the invalid json encoding as row store --- .../serde/data_type_object_serde.cpp | 21 +++++++++++++++---- .../data/variant_p0/variant_with_rowstore.out | 3 +++ .../variant_p0/variant_with_rowstore.groovy | 18 ++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index c19a5f185959a9..da9f373081841a 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -98,8 +98,13 @@ void DataTypeObjectSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWr JsonbParser json_parser; // encode as jsonb bool succ = json_parser.parse(value_str.data(), value_str.size()); - // maybe more graceful, it is ok to check here since data could be parsed - CHECK(succ); + if (!succ) { + // not a valid json insert raw text + result.writeStartString(); + result.writeString(value_str.data(), value_str.size()); + result.writeEndString(); + return; + } result.writeStartBinary(); result.writeBinary(json_parser.getWriter().getOutput()->getBuffer(), json_parser.getWriter().getOutput()->getSize()); @@ -109,8 +114,16 @@ void DataTypeObjectSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWr void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { auto& variant = assert_cast(column); Field field; - auto blob = static_cast(arg); - field.assign_jsonb(blob->getBlob(), blob->getBlobLen()); + if (arg->isBinary()) { + const auto* blob = static_cast(arg); + field.assign_jsonb(blob->getBlob(), blob->getBlobLen()); + } else if (arg->isString()) { + // not a valid jsonb type, insert as string + const auto* str = static_cast(arg); + field.assign_string(str->getBlob(), str->getBlobLen()); + } else { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb type"); + } variant.insert(field); } diff --git a/regression-test/data/variant_p0/variant_with_rowstore.out b/regression-test/data/variant_p0/variant_with_rowstore.out index 6c34622bec85f2..763825b37a648c 100644 --- a/regression-test/data/variant_p0/variant_with_rowstore.out +++ b/regression-test/data/variant_p0/variant_with_rowstore.out @@ -32,3 +32,6 @@ -- !point_select -- -1 {"a":1123} {"a":1123} +-- !sql -- +1 1|[""] + diff --git a/regression-test/suites/variant_p0/variant_with_rowstore.groovy b/regression-test/suites/variant_p0/variant_with_rowstore.groovy index 771f776b3e77e4..d1946b8123c04c 100644 --- a/regression-test/suites/variant_p0/variant_with_rowstore.groovy +++ b/regression-test/suites/variant_p0/variant_with_rowstore.groovy @@ -108,4 +108,22 @@ suite("regression_test_variant_rowstore", "variant_type"){ // stmt.setInt(1, -3) // qe_point_select stmt } + + sql "DROP TABLE IF EXISTS table_rs_invalid_json" + sql """ + CREATE TABLE table_rs_invalid_json + ( + col0 BIGINT NOT NULL, + coljson VARIANT NOT NULL, INDEX colvariant_idx(coljson) USING INVERTED + ) + UNIQUE KEY(col0) + DISTRIBUTED BY HASH(col0) BUCKETS 4 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "store_row_column"="true", + "replication_num" = "1" + ); + """ + sql """insert into table_rs_invalid_json values (1, '1|[""]')""" + qt_sql "select * from table_rs_invalid_json where col0 = 1" } \ No newline at end of file