Skip to content

Commit

Permalink
[Fix](Json) fix some cast issue (apache#38683)
Browse files Browse the repository at this point in the history
1. casting to boolean/double type should consider all numeric types
including int , double and boolean
2. casting to floating point type should consisder boolean types and int types
  • Loading branch information
eldenmoon committed Aug 7, 2024
1 parent 843afcc commit 7cac459
Show file tree
Hide file tree
Showing 12 changed files with 354 additions and 507 deletions.
27 changes: 25 additions & 2 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,12 +863,19 @@ struct ConvertImplFromJsonb {
res[i] = 0;
continue;
}

if constexpr (type_index == TypeIndex::UInt8) {
// cast from json value to boolean type
if (value->isTrue()) {
res[i] = 1;
} else if (value->isFalse()) {
res[i] = 0;
} else if (value->isInt()) {
res[i] = ((const JsonbIntVal*)value)->val() == 0 ? 0 : 1;
} else if (value->isDouble()) {
res[i] = static_cast<ColumnType::value_type>(
((const JsonbDoubleVal*)value)->val()) == 0
? 0
: 1;
} else {
null_map[i] = 1;
res[i] = 0;
Expand All @@ -878,15 +885,31 @@ struct ConvertImplFromJsonb {
type_index == TypeIndex::Int32 ||
type_index == TypeIndex::Int64 ||
type_index == TypeIndex::Int128) {
// cast from json value to integer types
if (value->isInt()) {
res[i] = ((const JsonbIntVal*)value)->val();
} else if (value->isDouble()) {
res[i] = static_cast<ColumnType::value_type>(
((const JsonbDoubleVal*)value)->val());
} else if (value->isTrue()) {
res[i] = 1;
} else if (value->isFalse()) {
res[i] = 0;
} else {
null_map[i] = 1;
res[i] = 0;
}
} else if constexpr (type_index == TypeIndex::Float64) {
} else if constexpr (type_index == TypeIndex::Float64 ||
type_index == TypeIndex::Float32) {
// cast from json value to floating point types
if (value->isDouble()) {
res[i] = ((const JsonbDoubleVal*)value)->val();
} else if (value->isFloat()) {
res[i] = ((const JsonbFloatVal*)value)->val();
} else if (value->isTrue()) {
res[i] = 1;
} else if (value->isFalse()) {
res[i] = 0;
} else if (value->isInt()) {
res[i] = ((const JsonbIntVal*)value)->val();
} else {
Expand Down
223 changes: 0 additions & 223 deletions be/test/vec/function/function_jsonb_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1230,229 +1230,6 @@ TEST(FunctionJsonbTEST, JsonbExtractDoubleTest) {
static_cast<void>(check_function<DataTypeFloat64, true>(func_name, input_types, data_set));
}

TEST(FunctionJsonbTEST, JsonbCastToOtherTest) {
std::string func_name = "CAST";
InputTypeSet input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::UInt8}};

// cast to boolean
DataSet data_set = {
{{STRING("null"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()},
{{STRING("true"), static_cast<uint8_t>(TypeIndex::UInt8)}, BOOLEAN(1)},
{{STRING("false"), static_cast<uint8_t>(TypeIndex::UInt8)}, BOOLEAN(0)},
{{STRING("100"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, //int8
{{STRING("10000"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // int16
{{STRING("1000000000"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // int32
{{STRING("1152921504606846976"), static_cast<uint8_t>(TypeIndex::UInt8)},
Null()}, // int64
{{STRING("6.18"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // double
{{STRING(R"("abcd")"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // string
{{STRING("{}"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), static_cast<uint8_t>(TypeIndex::UInt8)},
Null()}, // object
{{STRING("[]"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // empty array
{{STRING("[123, 456]"), static_cast<uint8_t>(TypeIndex::UInt8)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), static_cast<uint8_t>(TypeIndex::UInt8)},
Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"),
static_cast<uint8_t>(TypeIndex::UInt8)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"),
static_cast<uint8_t>(TypeIndex::UInt8)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeUInt8, true>(func_name, input_types, const_dataset));
}
input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::Int8}};
// cast to TINYINT
data_set = {
{{STRING("null"), static_cast<int8_t>(TypeIndex::Int8)}, Null()},
{{STRING("true"), static_cast<int8_t>(TypeIndex::Int8)}, Null()},
{{STRING("false"), static_cast<int8_t>(TypeIndex::Int8)}, Null()},
{{STRING("100"), static_cast<int8_t>(TypeIndex::Int8)}, TINYINT(100)}, //int8
{{STRING("10000"), static_cast<int8_t>(TypeIndex::Int8)}, TINYINT(16)}, // int16
{{STRING("1000000000"), static_cast<int8_t>(TypeIndex::Int8)}, TINYINT(0)}, // int32
{{STRING("1152921504606846976"), static_cast<int8_t>(TypeIndex::Int8)},
TINYINT(0)}, // int64
{{STRING("6.18"), static_cast<int8_t>(TypeIndex::Int8)}, Null()}, // double
{{STRING(R"("abcd")"), static_cast<int8_t>(TypeIndex::Int8)}, Null()}, // string
{{STRING("{}"), static_cast<int8_t>(TypeIndex::Int8)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), static_cast<int8_t>(TypeIndex::Int8)},
Null()}, // object
{{STRING("[]"), static_cast<int8_t>(TypeIndex::Int8)}, Null()}, // empty array
{{STRING("[123, 456]"), static_cast<int8_t>(TypeIndex::Int8)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), static_cast<int8_t>(TypeIndex::Int8)},
Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"),
static_cast<int8_t>(TypeIndex::Int8)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"),
static_cast<int8_t>(TypeIndex::Int8)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeInt8, true>(func_name, input_types, const_dataset));
}

input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::Int16}};
// cast to SMALLINT
data_set = {
{{STRING("null"), static_cast<int16_t>(TypeIndex::Int16)}, Null()},
{{STRING("true"), static_cast<int16_t>(TypeIndex::Int16)}, Null()},
{{STRING("false"), static_cast<int16_t>(TypeIndex::Int16)}, Null()},
{{STRING("100"), static_cast<int16_t>(TypeIndex::Int16)}, SMALLINT(100)}, //int8
{{STRING("10000"), static_cast<int16_t>(TypeIndex::Int16)}, SMALLINT(10000)}, // int16
{{STRING("1000000000"), static_cast<int16_t>(TypeIndex::Int16)},
SMALLINT(-13824)}, // int32
{{STRING("1152921504606846976"), static_cast<int16_t>(TypeIndex::Int16)},
SMALLINT(0)}, // int64
{{STRING("6.18"), static_cast<int16_t>(TypeIndex::Int16)}, Null()}, // double
{{STRING(R"("abcd")"), static_cast<int16_t>(TypeIndex::Int16)}, Null()}, // string
{{STRING("{}"), static_cast<int16_t>(TypeIndex::Int16)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), static_cast<int16_t>(TypeIndex::Int16)},
Null()}, // object
{{STRING("[]"), static_cast<int16_t>(TypeIndex::Int16)}, Null()}, // empty array
{{STRING("[123, 456]"), static_cast<int16_t>(TypeIndex::Int16)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), static_cast<int16_t>(TypeIndex::Int16)},
Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"),
static_cast<int16_t>(TypeIndex::Int16)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"),
static_cast<int16_t>(TypeIndex::Int16)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeInt16, true>(func_name, input_types, const_dataset));
}

input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::Int32}};
// cast to INT
data_set = {
{{STRING("null"), static_cast<int32_t>(TypeIndex::Int32)}, Null()},
{{STRING("true"), static_cast<int32_t>(TypeIndex::Int32)}, Null()},
{{STRING("false"), static_cast<int32_t>(TypeIndex::Int32)}, Null()},
{{STRING("100"), static_cast<int32_t>(TypeIndex::Int32)}, INT(100)}, //int8
{{STRING("10000"), static_cast<int32_t>(TypeIndex::Int32)}, INT(10000)}, // int16
{{STRING("1000000000"), static_cast<int32_t>(TypeIndex::Int32)},
INT(1000000000)}, // int32
{{STRING("1152921504606846976"), static_cast<int32_t>(TypeIndex::Int32)},
INT(0)}, // int64
{{STRING("6.18"), static_cast<int32_t>(TypeIndex::Int32)}, Null()}, // double
{{STRING(R"("abcd")"), static_cast<int32_t>(TypeIndex::Int32)}, Null()}, // string
{{STRING("{}"), static_cast<int32_t>(TypeIndex::Int32)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), static_cast<int32_t>(TypeIndex::Int32)},
Null()}, // object
{{STRING("[]"), static_cast<int32_t>(TypeIndex::Int32)}, Null()}, // empty array
{{STRING("[123, 456]"), static_cast<int32_t>(TypeIndex::Int32)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), static_cast<int32_t>(TypeIndex::Int32)},
Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"),
static_cast<int32_t>(TypeIndex::Int32)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"),
static_cast<int32_t>(TypeIndex::Int32)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeInt32, true>(func_name, input_types, const_dataset));
}

input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::Int64}};
// cast to BIGINT
data_set = {
{{STRING("null"), BIGINT(1)}, Null()},
{{STRING("true"), BIGINT(1)}, Null()},
{{STRING("false"), BIGINT(1)}, Null()},
{{STRING("100"), BIGINT(1)}, BIGINT(100)}, //int8
{{STRING("10000"), BIGINT(1)}, BIGINT(10000)}, // int16
{{STRING("1000000000"), BIGINT(1)}, BIGINT(1000000000)}, // int32
{{STRING("1152921504606846976"), BIGINT(1)}, BIGINT(1152921504606846976)}, // int64
{{STRING("6.18"), BIGINT(1)}, Null()}, // double
{{STRING(R"("abcd")"), BIGINT(1)}, Null()}, // string
{{STRING("{}"), BIGINT(1)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), BIGINT(1)}, Null()}, // object
{{STRING("[]"), BIGINT(1)}, Null()}, // empty array
{{STRING("[123, 456]"), BIGINT(1)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), BIGINT(1)}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), BIGINT(1)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), BIGINT(1)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeInt64, true>(func_name, input_types, const_dataset));
}

input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::Float64}};
// cast to DOUBLE
data_set = {
{{STRING("null"), DOUBLE(1)}, Null()},
{{STRING("true"), DOUBLE(1)}, Null()},
{{STRING("false"), DOUBLE(1)}, Null()},
{{STRING("100"), DOUBLE(1)}, DOUBLE(100)}, //int8
{{STRING("10000"), DOUBLE(1)}, DOUBLE(10000)}, // int16
{{STRING("1000000000"), DOUBLE(1)}, DOUBLE(1000000000)}, // int32
{{STRING("1152921504606846976"), DOUBLE(1)}, DOUBLE(1152921504606846976)}, // int64
{{STRING("6.18"), DOUBLE(1)}, DOUBLE(6.18)}, // double
{{STRING(R"("abcd")"), DOUBLE(1)}, Null()}, // string
{{STRING("{}"), DOUBLE(1)}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), DOUBLE(1)}, Null()}, // object
{{STRING("[]"), DOUBLE(1)}, Null()}, // empty array
{{STRING("[123, 456]"), DOUBLE(1)}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), DOUBLE(1)}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), DOUBLE(1)},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), DOUBLE(1)},
Null()}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeFloat64, true>(func_name, input_types, const_dataset));
}

input_types = {Nullable {TypeIndex::JSONB}, ConstedNotnull {TypeIndex::String}};
// cast to STRING
data_set = {
{{STRING("null"), STRING("1")}, STRING("null")},
{{STRING("true"), STRING("1")}, STRING("true")},
{{STRING("false"), STRING("1")}, STRING("false")},
{{STRING("100"), STRING("1")}, STRING("100")}, //int8
{{STRING("10000"), STRING("1")}, STRING("10000")}, // int16
{{STRING("1000000000"), STRING("1")}, STRING("1000000000")}, // int32
{{STRING("1152921504606846976"), STRING("1")}, STRING("1152921504606846976")}, // int64
{{STRING("6.18"), STRING("1")}, STRING("6.18")}, // double
{{STRING(R"("abcd")"), STRING("1")}, STRING(R"("abcd")")}, // string
{{STRING("{}"), STRING("1")}, STRING("{}")}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("1")},
STRING(R"({"k1":"v31","k2":300})")}, // object
{{STRING("[]"), STRING("1")}, STRING("[]")}, // empty array
{{STRING("[123, 456]"), STRING("1")}, STRING("[123,456]")}, // int array
{{STRING(R"(["abc", "def"])"), STRING("1")},
STRING(R"(["abc","def"])")}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("1")},
STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("1")},
STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array
};
for (const auto& row : data_set) {
DataSet const_dataset = {row};
static_cast<void>(
check_function<DataTypeString, true>(func_name, input_types, const_dataset));
}
}

TEST(FunctionJsonbTEST, JsonbCastFromOtherTest) {
// CAST Nullable(X) to Nullable(JSONB)
static_cast<void>(check_function<DataTypeJsonb, true>(
Expand Down
37 changes: 35 additions & 2 deletions regression-test/data/datatype_p0/json/json_cast.out
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
true

-- !sql8 --
\N
1000

-- !sql9 --
1000.1111

-- !sql10 --
\N
true

-- !sql11 --
["CXO0N: 1045901740","HMkTa: 1348450505","44 HHD: 915015173","j9WoJ: -1517316688"]
Expand All @@ -53,3 +53,36 @@ true
-- !sql18 --
\N

-- !sql19 --
1

-- !sql19 --
0

-- !sql20 --
1.0

-- !sql20 --
0.0

-- !sql21 --
true

-- !sql22 --
1024.0

-- !sql23 --
1024.0

-- !sql24 --
1024.0

-- !sql24 --
[1,2,3]

-- !sql25 --
[1,2,3]

-- !sql26 --
["2020-01-01"]

Loading

0 comments on commit 7cac459

Please sign in to comment.