Skip to content

Commit

Permalink
[feature](datatype) add BE config to allow zero date (#34961)
Browse files Browse the repository at this point in the history
Co-authored-by: Gabriel <[email protected]>
  • Loading branch information
2 people authored and Doris-Extras committed May 23, 2024
1 parent a52ee6e commit a6f7747
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 13 deletions.
1 change: 1 addition & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,7 @@ DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
// max_write_buffer_number for rocksdb
DEFINE_Int32(rocksdb_max_write_buffer_number, "5");

DEFINE_mBool(allow_zero_date, "false");
DEFINE_Bool(allow_invalid_decimalv2_literal, "false");
DEFINE_mString(kerberos_ccache_path, "");
DEFINE_mString(kerberos_krb5_conf_path, "/etc/krb5.conf");
Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,8 @@ DECLARE_mBool(enable_delete_when_cumu_compaction);
// max_write_buffer_number for rocksdb
DECLARE_Int32(rocksdb_max_write_buffer_number);

// Convert date 0000-00-00 to 0000-01-01. It's recommended to set to false.
DECLARE_mBool(allow_zero_date);
// Allow invalid decimalv2 literal for compatible with old version. Recommend set it false strongly.
DECLARE_mBool(allow_invalid_decimalv2_literal);
// Allow to specify kerberos credentials cache path.
Expand Down
10 changes: 6 additions & 4 deletions be/src/vec/io/io_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ template <typename T>
bool read_date_v2_text_impl(T& x, ReadBuffer& buf) {
static_assert(std::is_same_v<UInt32, T>);
auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x);
auto ans = dv.from_date_str(buf.position(), buf.count());
auto ans = dv.from_date_str(buf.position(), buf.count(), config::allow_zero_date);

// only to match the is_all_read() check to prevent return null
buf.position() = buf.end();
Expand All @@ -355,7 +355,8 @@ template <typename T>
bool read_date_v2_text_impl(T& x, ReadBuffer& buf, const cctz::time_zone& local_time_zone) {
static_assert(std::is_same_v<UInt32, T>);
auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x);
auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone);
auto ans =
dv.from_date_str(buf.position(), buf.count(), local_time_zone, config::allow_zero_date);

// only to match the is_all_read() check to prevent return null
buf.position() = buf.end();
Expand All @@ -367,7 +368,7 @@ template <typename T>
bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, UInt32 scale = -1) {
static_assert(std::is_same_v<UInt64, T>);
auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x);
auto ans = dv.from_date_str(buf.position(), buf.count(), scale);
auto ans = dv.from_date_str(buf.position(), buf.count(), scale, config::allow_zero_date);

// only to match the is_all_read() check to prevent return null
buf.position() = buf.end();
Expand All @@ -380,7 +381,8 @@ bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, const cctz::time_zone& lo
UInt32 scale = -1) {
static_assert(std::is_same_v<UInt64, T>);
auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x);
auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, scale);
auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, scale,
config::allow_zero_date);

// only to match the is_all_read() check to prevent return null
buf.position() = buf.end();
Expand Down
19 changes: 13 additions & 6 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1974,18 +1974,20 @@ void DateV2Value<T>::format_datetime(uint32_t* date_val, bool* carry_bits) const
// YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format
// 0 1 2 3 4 5 6 7
template <typename T>
bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* = -1*/) {
return from_date_str_base(date_str, len, scale, nullptr);
bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* = -1*/,
bool convert_zero) {
return from_date_str_base(date_str, len, scale, nullptr, convert_zero);
}
// when we parse
template <typename T>
bool DateV2Value<T>::from_date_str(const char* date_str, int len,
const cctz::time_zone& local_time_zone, int scale /* = -1*/) {
return from_date_str_base(date_str, len, scale, &local_time_zone);
const cctz::time_zone& local_time_zone, int scale /* = -1*/,
bool convert_zero) {
return from_date_str_base(date_str, len, scale, &local_time_zone, convert_zero);
}
template <typename T>
bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale,
const cctz::time_zone* local_time_zone) {
const cctz::time_zone* local_time_zone, bool convert_zero) {
const char* ptr = date_str;
const char* end = date_str + len;
// ONLY 2, 6 can follow by a space
Expand Down Expand Up @@ -2187,7 +2189,12 @@ bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale
return false;
}
if (is_invalid(date_val[0], date_val[1], date_val[2], 0, 0, 0, 0)) {
return false;
if (date_val[0] == 0 && date_val[1] == 0 && date_val[2] == 0 && convert_zero) {
date_val[1] = 1;
date_val[2] = 1;
} else {
return false;
}
}

// In check_range_and_set_time, for Date type the time part will be truncated. So if the timezone offset should make
Expand Down
6 changes: 3 additions & 3 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -847,9 +847,9 @@ class DateV2Value {
// 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS'
// 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS'
// 'YYYYMMDDTHHMMSS'
bool from_date_str(const char* str, int len, int scale = -1);
bool from_date_str(const char* str, int len, int scale = -1, bool convert_zero = false);
bool from_date_str(const char* str, int len, const cctz::time_zone& local_time_zone,
int scale = -1);
int scale = -1, bool convert_zero = false);

// Convert this value to string
// this will check type to decide which format to convert
Expand Down Expand Up @@ -1245,7 +1245,7 @@ class DateV2Value {
bool disable_lut = false);

bool from_date_str_base(const char* date_str, int len, int scale,
const cctz::time_zone* local_time_zone);
const cctz::time_zone* local_time_zone, bool convert_zero);

// Used to construct from int value
int64_t standardize_timevalue(int64_t value);
Expand Down
24 changes: 24 additions & 0 deletions be/test/vec/exprs/vexpr_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,30 @@ TEST(TEST_VEXPR, LITERALTEST) {
EXPECT_EQ(v, dt);
EXPECT_EQ("2021-04-07", literal.value());
}
{
DateV2Value<DateV2ValueType> data_time_value;
const char* date = "00000000";
EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), true);

DateV2Value<DateV2ValueType> data_time_value1;
const char* date1 = "00000101";
EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, true), true);
EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64());

EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false);
}
{
DateV2Value<DateTimeV2ValueType> data_time_value;
const char* date = "00000000111111";
EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), true);

DateV2Value<DateTimeV2ValueType> data_time_value1;
const char* date1 = "00000101111111";
EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, true), true);
EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64());

EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false);
}
// jsonb
{
std::string j = R"([null,true,false,100,6.18,"abc"])";
Expand Down

0 comments on commit a6f7747

Please sign in to comment.