From 0e0eb12c37e977cc33cd2f45cf274ff4819ffcab Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Tue, 10 Sep 2024 10:44:40 +0800 Subject: [PATCH] fix json cast --- be/src/vec/data_types/data_type_factory.cpp | 2 +- be/src/vec/functions/function_cast.h | 52 +++++++------------ .../data/variant_p0/predefine/load.out | 4 +- .../mv/variant/variant_mv.groovy | 1 + .../suites/variant_p0/predefine/load.groovy | 1 + 5 files changed, 23 insertions(+), 37 deletions(-) diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 98373684ca97482..67f81b7b3fc1650 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -299,7 +299,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared(); break; case TypeIndex::DateTimeV2: - nested = std::make_shared(); + nested = std::make_shared(scale > 0 ? scale : 0); break; case TypeIndex::DateTime: nested = std::make_shared(); diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index b275c05ec791723..721d31061ded43f 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -837,7 +837,7 @@ struct ConvertNothingToJsonb { } }; -template +template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { @@ -851,16 +851,12 @@ struct ConvertImplFromJsonb { auto& null_map = null_map_col->get_data(); auto col_to = ColumnType::create(); - //IColumn & col_to = *res; - // size_t size = col_from.size(); col_to->reserve(input_rows_count); auto& res = col_to->get_data(); res.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { const auto& val = column_string->get_data_at(i); - // ReadBuffer read_buffer((char*)(val.data), val.size); - // RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); if (val.size == 0) { null_map[i] = 1; @@ -883,6 +879,15 @@ struct ConvertImplFromJsonb { res[i] = 0; continue; } + if (value->isString()) { + // convert by parse + const auto& data = static_cast(value)->getBlob(); + size_t len = static_cast(value)->getBlobLen(); + ReadBuffer rb((char*)(data), len); + bool parsed = try_parse_impl(res[i], rb, context); + null_map[i] = !parsed; + continue; + } if constexpr (type_index == TypeIndex::UInt8) { // cast from json value to boolean type if (value->isTrue()) { @@ -915,17 +920,6 @@ struct ConvertImplFromJsonb { res[i] = 1; } else if (value->isFalse()) { res[i] = 0; - } else if (value->isString()) { - const auto& data = static_cast(value)->getBlob(); - size_t len = static_cast(value)->getBlobLen(); - int128_t val = 0; - ReadBuffer rb((char*)(data), len); - if (!read_int_text_impl(val, rb)) { - return Status::InvalidArgument( - "parse number fail, string: '{}'", - std::string(rb.position(), rb.count()).c_str()); - } - res[i] = static_cast(val); } else { null_map[i] = 1; res[i] = 0; @@ -943,17 +937,6 @@ struct ConvertImplFromJsonb { res[i] = 0; } else if (value->isInt()) { res[i] = ((const JsonbIntVal*)value)->val(); - } else if (value->isString()) { - const auto& data = static_cast(value)->getBlob(); - size_t len = static_cast(value)->getBlobLen(); - double val = 0; - ReadBuffer rb((char*)(data), len); - if (!read_float_text_fast_impl(val, rb)) { - return Status::InvalidArgument( - "parse number fail, string: '{}'", - std::string(rb.position(), rb.count()).c_str()); - } - res[i] = static_cast(val); } else { null_map[i] = 1; res[i] = 0; @@ -1999,19 +1982,20 @@ class FunctionCast final : public IFunctionBase { bool jsonb_string_as_string) const { switch (to_type->get_type_id()) { case TypeIndex::UInt8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int16: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int32: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int128: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Float64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::String: if (!jsonb_string_as_string) { // Conversion from String through parsing. diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out index 5e42ab59a89d777..5334f8e6fe92b83 100644 --- a/regression-test/data/variant_p0/predefine/load.out +++ b/regression-test/data/variant_p0/predefine/load.out @@ -95,8 +95,8 @@ v1.predefine_col4 text Yes false \N NONE 8 {"PREDEFINE_COL4":"2020-01-01 01:00:00"} -- !sql -- -1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} -2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} 3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} 4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} diff --git a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy index 0d0415456f726ff..214c3d89b75967e 100644 --- a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy @@ -22,6 +22,7 @@ suite("variant_mv") { sql "set runtime_filter_mode=OFF"; sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" sql "SET enable_agg_state = true" + sql "SET use_variant_as_complex_variant = false" sql """ drop table if exists github_events1 diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy index f39e51d9316dd44..b512e337f98c8d5 100644 --- a/regression-test/suites/variant_p0/predefine/load.groovy +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -70,6 +70,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql """insert into test_predefine1 values(6, '{"PREDEFINE_COL2" : 1.11111}')""" sql """insert into test_predefine1 values(7, '{"PREDEFINE_COL3" : "11111.00000"}')""" sql """insert into test_predefine1 values(8, '{"PREDEFINE_COL4" : "2020-01-01-01"}')""" + sql """select * from test_predefine1 order by id limit 1""" qt_sql """desc test_predefine1""" qt_sql """select * from test_predefine1 order by id""" sql """set use_variant_as_complex_variant = false"""