Skip to content

Commit

Permalink
fix json cast
Browse files Browse the repository at this point in the history
  • Loading branch information
eldenmoon committed Sep 10, 2024
1 parent 480d8f0 commit 0e0eb12
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 37 deletions.
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool
nested = std::make_shared<vectorized::DataTypeDateV2>();
break;
case TypeIndex::DateTimeV2:
nested = std::make_shared<DataTypeDateTimeV2>();
nested = std::make_shared<DataTypeDateTimeV2>(scale > 0 ? scale : 0);
break;
case TypeIndex::DateTime:
nested = std::make_shared<vectorized::DataTypeDateTime>();
Expand Down
52 changes: 18 additions & 34 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ struct ConvertNothingToJsonb {
}
};

template <TypeIndex type_index, typename ColumnType>
template <TypeIndex type_index, typename ColumnType, typename ToDataType>
struct ConvertImplFromJsonb {
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
const size_t result, size_t input_rows_count) {
Expand All @@ -851,16 +851,12 @@ struct ConvertImplFromJsonb {
auto& null_map = null_map_col->get_data();
auto col_to = ColumnType::create();

//IColumn & col_to = *res;
// size_t size = col_from.size();
col_to->reserve(input_rows_count);
auto& res = col_to->get_data();
res.resize(input_rows_count);

for (size_t i = 0; i < input_rows_count; ++i) {
const auto& val = column_string->get_data_at(i);
// ReadBuffer read_buffer((char*)(val.data), val.size);
// RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to));

if (val.size == 0) {
null_map[i] = 1;
Expand All @@ -883,6 +879,15 @@ struct ConvertImplFromJsonb {
res[i] = 0;
continue;
}
if (value->isString()) {
// convert by parse
const auto& data = static_cast<const JsonbBlobVal*>(value)->getBlob();
size_t len = static_cast<const JsonbBlobVal*>(value)->getBlobLen();
ReadBuffer rb((char*)(data), len);
bool parsed = try_parse_impl<ToDataType>(res[i], rb, context);
null_map[i] = !parsed;
continue;
}
if constexpr (type_index == TypeIndex::UInt8) {
// cast from json value to boolean type
if (value->isTrue()) {
Expand Down Expand Up @@ -915,17 +920,6 @@ struct ConvertImplFromJsonb {
res[i] = 1;
} else if (value->isFalse()) {
res[i] = 0;
} else if (value->isString()) {
const auto& data = static_cast<const JsonbBlobVal*>(value)->getBlob();
size_t len = static_cast<const JsonbBlobVal*>(value)->getBlobLen();
int128_t val = 0;
ReadBuffer rb((char*)(data), len);
if (!read_int_text_impl(val, rb)) {
return Status::InvalidArgument(
"parse number fail, string: '{}'",
std::string(rb.position(), rb.count()).c_str());
}
res[i] = static_cast<ColumnType::value_type>(val);
} else {
null_map[i] = 1;
res[i] = 0;
Expand All @@ -943,17 +937,6 @@ struct ConvertImplFromJsonb {
res[i] = 0;
} else if (value->isInt()) {
res[i] = ((const JsonbIntVal*)value)->val();
} else if (value->isString()) {
const auto& data = static_cast<const JsonbBlobVal*>(value)->getBlob();
size_t len = static_cast<const JsonbBlobVal*>(value)->getBlobLen();
double val = 0;
ReadBuffer rb((char*)(data), len);
if (!read_float_text_fast_impl(val, rb)) {
return Status::InvalidArgument(
"parse number fail, string: '{}'",
std::string(rb.position(), rb.count()).c_str());
}
res[i] = static_cast<ColumnType::value_type>(val);
} else {
null_map[i] = 1;
res[i] = 0;
Expand Down Expand Up @@ -1999,19 +1982,20 @@ class FunctionCast final : public IFunctionBase {
bool jsonb_string_as_string) const {
switch (to_type->get_type_id()) {
case TypeIndex::UInt8:
return &ConvertImplFromJsonb<TypeIndex::UInt8, ColumnUInt8>::execute;
return &ConvertImplFromJsonb<TypeIndex::UInt8, ColumnUInt8, DataTypeUInt8>::execute;
case TypeIndex::Int8:
return &ConvertImplFromJsonb<TypeIndex::Int8, ColumnInt8>::execute;
return &ConvertImplFromJsonb<TypeIndex::Int8, ColumnInt8, DataTypeInt8>::execute;
case TypeIndex::Int16:
return &ConvertImplFromJsonb<TypeIndex::Int16, ColumnInt16>::execute;
return &ConvertImplFromJsonb<TypeIndex::Int16, ColumnInt16, DataTypeInt16>::execute;
case TypeIndex::Int32:
return &ConvertImplFromJsonb<TypeIndex::Int32, ColumnInt32>::execute;
return &ConvertImplFromJsonb<TypeIndex::Int32, ColumnInt32, DataTypeInt32>::execute;
case TypeIndex::Int64:
return &ConvertImplFromJsonb<TypeIndex::Int64, ColumnInt64>::execute;
return &ConvertImplFromJsonb<TypeIndex::Int64, ColumnInt64, DataTypeInt64>::execute;
case TypeIndex::Int128:
return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128>::execute;
return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128, DataTypeInt128>::execute;
case TypeIndex::Float64:
return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64>::execute;
return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64,
DataTypeFloat64>::execute;
case TypeIndex::String:
if (!jsonb_string_as_string) {
// Conversion from String through parsing.
Expand Down
4 changes: 2 additions & 2 deletions regression-test/data/variant_p0/predefine/load.out
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ v1.predefine_col4 text Yes false \N NONE
8 {"PREDEFINE_COL4":"2020-01-01 01:00:00"}

-- !sql --
1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"}
2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"}
1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"}
2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"}
3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"}
4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"}

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ suite("variant_mv") {
sql "set runtime_filter_mode=OFF";
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
sql "SET enable_agg_state = true"
sql "SET use_variant_as_complex_variant = false"

sql """
drop table if exists github_events1
Expand Down
1 change: 1 addition & 0 deletions regression-test/suites/variant_p0/predefine/load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
sql """insert into test_predefine1 values(6, '{"PREDEFINE_COL2" : 1.11111}')"""
sql """insert into test_predefine1 values(7, '{"PREDEFINE_COL3" : "11111.00000"}')"""
sql """insert into test_predefine1 values(8, '{"PREDEFINE_COL4" : "2020-01-01-01"}')"""
sql """select * from test_predefine1 order by id limit 1"""
qt_sql """desc test_predefine1"""
qt_sql """select * from test_predefine1 order by id"""
sql """set use_variant_as_complex_variant = false"""
Expand Down

0 comments on commit 0e0eb12

Please sign in to comment.