Skip to content

Commit

Permalink
fix precision loss for newly inserted rows in partial update
Browse files Browse the repository at this point in the history
  • Loading branch information
bobhan1 committed Sep 2, 2024
1 parent 312ab3b commit e68de4d
Show file tree
Hide file tree
Showing 12 changed files with 270 additions and 17 deletions.
4 changes: 4 additions & 0 deletions be/src/exec/tablet_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) {
}
}
_timestamp_ms = pschema.timestamp_ms();
if (pschema.has_nano_seconds()) {
_nano_seconds = pschema.nano_seconds();
}
_timezone = pschema.timezone();

for (const auto& col : pschema.partial_update_input_columns()) {
Expand Down Expand Up @@ -281,6 +284,7 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const {
pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id);
pschema->set_timestamp_ms(_timestamp_ms);
pschema->set_timezone(_timezone);
pschema->set_nano_seconds(_nano_seconds);
pschema->set_sequence_map_col_name(_sequence_map_column);
for (auto col : _partial_update_input_columns) {
*pschema->add_partial_update_input_columns() = col;
Expand Down
3 changes: 3 additions & 0 deletions be/src/exec/tablet_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class OlapTableSchemaParam {
int32_t auto_increment_column_unique_id() const { return _auto_increment_column_unique_id; }
void set_timestamp_ms(int64_t timestamp_ms) { _timestamp_ms = timestamp_ms; }
int64_t timestamp_ms() const { return _timestamp_ms; }
void set_nano_seconds(int32_t nano_seconds) { _nano_seconds = nano_seconds; }
int32_t nano_seconds() const { return _nano_seconds; }
void set_timezone(std::string timezone) { _timezone = timezone; }
std::string timezone() const { return _timezone; }
bool is_strict_mode() const { return _is_strict_mode; }
Expand All @@ -118,6 +120,7 @@ class OlapTableSchemaParam {
std::string _sequence_map_column {};
int32_t _auto_increment_column_unique_id;
int64_t _timestamp_ms = 0;
int32_t _nano_seconds {0};
std::string _timezone;
};

Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/delta_writer_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ void DeltaWriterV2::_build_current_tablet_schema(int64_t index_id,
_partial_update_info->init(*_tablet_schema, table_schema_param->is_partial_update(),
table_schema_param->partial_update_input_columns(),
table_schema_param->is_strict_mode(),
table_schema_param->timestamp_ms(), table_schema_param->timezone(),
table_schema_param->timestamp_ms(),
table_schema_param->nano_seconds(), table_schema_param->timezone(),
table_schema_param->auto_increment_coulumn());
}

Expand Down
28 changes: 19 additions & 9 deletions be/src/olap/partial_update_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ namespace doris {

void PartialUpdateInfo::init(const TabletSchema& tablet_schema, bool partial_update,
const std::set<string>& partial_update_cols, bool is_strict_mode,
int64_t timestamp_ms, const std::string& timezone,
const std::string& auto_increment_column, int64_t cur_max_version) {
int64_t timestamp_ms, int32_t nano_seconds,
const std::string& timezone, const std::string& auto_increment_column,
int64_t cur_max_version) {
is_partial_update = partial_update;
partial_update_input_columns = partial_update_cols;
max_version_in_flush_phase = cur_max_version;
this->timestamp_ms = timestamp_ms;
this->nano_seconds = nano_seconds;
this->timezone = timezone;
missing_cids.clear();
update_cids.clear();
Expand Down Expand Up @@ -79,6 +81,7 @@ void PartialUpdateInfo::to_pb(PartialUpdateInfoPB* partial_update_info_pb) const
can_insert_new_rows_in_partial_update);
partial_update_info_pb->set_is_strict_mode(is_strict_mode);
partial_update_info_pb->set_timestamp_ms(timestamp_ms);
partial_update_info_pb->set_nano_seconds(nano_seconds);
partial_update_info_pb->set_timezone(timezone);
partial_update_info_pb->set_is_input_columns_contains_auto_inc_column(
is_input_columns_contains_auto_inc_column);
Expand Down Expand Up @@ -115,6 +118,9 @@ void PartialUpdateInfo::from_pb(PartialUpdateInfoPB* partial_update_info_pb) {
partial_update_info_pb->is_input_columns_contains_auto_inc_column();
is_schema_contains_auto_inc_column =
partial_update_info_pb->is_schema_contains_auto_inc_column();
if (partial_update_info_pb->has_nano_seconds()) {
nano_seconds = partial_update_info_pb->nano_seconds();
}
default_values.clear();
for (const auto& value : partial_update_info_pb->default_values()) {
default_values.push_back(value);
Expand All @@ -136,16 +142,20 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids(
if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 &&
to_lower(column.default_value()).find(to_lower("CURRENT_TIMESTAMP")) !=
std::string::npos)) {
LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}",
column.default_value());
DateV2Value<DateTimeV2ValueType> dtv;
dtv.from_unixtime(timestamp_ms / 1000, timezone);
default_value = dtv.debug_string();
auto pos = to_lower(column.default_value()).find('(');
if (pos == std::string::npos) {
DateV2Value<DateTimeV2ValueType> dtv;
dtv.from_unixtime(timestamp_ms / 1000, timezone);
default_value = dtv.debug_string();
} else {
int precision = std::stoi(column.default_value().substr(pos + 1));
DateV2Value<DateTimeV2ValueType> dtv;
dtv.from_unixtime(timestamp_ms / 1000, nano_seconds, timezone, precision);
default_value = dtv.debug_string();
}
} else if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATEV2 &&
to_lower(column.default_value()).find(to_lower("CURRENT_DATE")) !=
std::string::npos)) {
LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}",
column.default_value());
DateV2Value<DateV2ValueType> dv;
dv.from_unixtime(timestamp_ms / 1000, timezone);
default_value = dv.debug_string();
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/partial_update_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct RowsetId;
struct PartialUpdateInfo {
void init(const TabletSchema& tablet_schema, bool partial_update,
const std::set<std::string>& partial_update_cols, bool is_strict_mode,
int64_t timestamp_ms, const std::string& timezone,
int64_t timestamp_ms, int32_t nano_seconds, const std::string& timezone,
const std::string& auto_increment_column, int64_t cur_max_version = -1);
void to_pb(PartialUpdateInfoPB* partial_update_info) const;
void from_pb(PartialUpdateInfoPB* partial_update_info);
Expand All @@ -59,6 +59,7 @@ struct PartialUpdateInfo {
bool can_insert_new_rows_in_partial_update {true};
bool is_strict_mode {false};
int64_t timestamp_ms {0};
int32_t nano_seconds {0};
std::string timezone;
bool is_input_columns_contains_auto_inc_column = false;
bool is_schema_contains_auto_inc_column = false;
Expand Down
12 changes: 6 additions & 6 deletions be/src/olap/rowset_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,12 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
}
// set partial update columns info
_partial_update_info = std::make_shared<PartialUpdateInfo>();
_partial_update_info->init(*_tablet_schema, table_schema_param->is_partial_update(),
table_schema_param->partial_update_input_columns(),
table_schema_param->is_strict_mode(),
table_schema_param->timestamp_ms(), table_schema_param->timezone(),
table_schema_param->auto_increment_coulumn(),
_max_version_in_flush_phase);
_partial_update_info->init(
*_tablet_schema, table_schema_param->is_partial_update(),
table_schema_param->partial_update_input_columns(),
table_schema_param->is_strict_mode(), table_schema_param->timestamp_ms(),
table_schema_param->nano_seconds(), table_schema_param->timezone(),
table_schema_param->auto_increment_coulumn(), _max_version_in_flush_phase);
}

} // namespace doris
1 change: 1 addition & 0 deletions be/src/vec/sink/writer/vtablet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,7 @@ Status VTabletWriter::_init(RuntimeState* state, RuntimeProfile* profile) {
_schema.reset(new OlapTableSchemaParam());
RETURN_IF_ERROR(_schema->init(table_sink.schema));
_schema->set_timestamp_ms(state->timestamp_ms());
_schema->set_nano_seconds(state->nano_seconds());
_schema->set_timezone(state->timezone());
_location = _pool->add(new OlapTableLocationParam(table_sink.location));
_nodes_info = _pool->add(new DorisNodesInfo(table_sink.nodes_info));
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/sink/writer/vtablet_writer_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Status VTabletWriterV2::_init(RuntimeState* state, RuntimeProfile* profile) {
_schema.reset(new OlapTableSchemaParam());
RETURN_IF_ERROR(_schema->init(table_sink.schema));
_schema->set_timestamp_ms(state->timestamp_ms());
_schema->set_nano_seconds(state->nano_seconds());
_schema->set_timezone(state->timezone());
_location = _pool->add(new OlapTableLocationParam(table_sink.location));
_nodes_info = _pool->add(new DorisNodesInfo(table_sink.nodes_info));
Expand Down
1 change: 1 addition & 0 deletions gensrc/proto/descriptors.proto
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,6 @@ message POlapTableSchemaParam {
optional string timezone = 12;
optional int32 auto_increment_column_unique_id = 13 [default = -1];
optional string sequence_map_col_name = 14;
optional int32 nano_seconds = 15 [default = 0];
};

1 change: 1 addition & 0 deletions gensrc/proto/olap_file.proto
Original file line number Diff line number Diff line change
Expand Up @@ -624,4 +624,5 @@ message PartialUpdateInfoPB {
optional bool is_schema_contains_auto_inc_column = 10 [default = false];
repeated string default_values = 11;
optional int64 max_version_in_flush_phase = 12 [default = -1];
optional int32 nano_seconds = 13 [default = 0];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql1 --
1 1
2 2
3 3
4 4

-- !sql2 --
1 1
2 2
3 3
4 4

-- !sql3 --
1 1 999 999
2 2 999 999
3 3 999 999
4 4 999 999

-- !sql4 --
1 1 \N \N
2 2 \N \N
3 3 \N \N
4 4 \N \N

-- !sql1 --
1 1
2 2
3 3
4 4

-- !sql2 --
1 1
2 2
3 3
4 4

-- !sql3 --
1 1 999 999
2 2 999 999
3 3 999 999
4 4 999 999

-- !sql4 --
1 1 \N \N
2 2 \N \N
3 3 \N \N
4 4 \N \N

-- !sql1 --
1 1
2 2
3 3
4 4

-- !sql2 --
1 1
2 2
3 3
4 4

-- !sql3 --
1 1 999 999
2 2 999 999
3 3 999 999
4 4 999 999

-- !sql4 --
1 1 \N \N
2 2 \N \N
3 3 \N \N
4 4 \N \N

-- !sql1 --
1 1
2 2
3 3
4 4

-- !sql2 --
1 1
2 2
3 3
4 4

-- !sql3 --
1 1 999 999
2 2 999 999
3 3 999 999
4 4 999 999

-- !sql4 --
1 1 \N \N
2 2 \N \N
3 3 \N \N
4 4 \N \N

Loading

0 comments on commit e68de4d

Please sign in to comment.