Skip to content

Commit

Permalink
[Fix](Variant) casting to decimal type may lost precision and scale w…
Browse files Browse the repository at this point in the history
…hen _normalize_predicate

use TypeDescriptor to prevent from info lost
  • Loading branch information
eldenmoon committed Aug 20, 2024
1 parent afa5684 commit 059b7f9
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 27 deletions.
2 changes: 1 addition & 1 deletion be/src/olap/iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class StorageReadOptions {
Version version;
int64_t tablet_id = 0;
// slots that cast may be eliminated in storage layer
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
RowRanges row_ranges;
size_t topn_limit = 0;
};
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/rowset_reader_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct RowsetReaderContext {
const std::set<int32_t>* output_columns = nullptr;
RowsetId rowset_id;
// slots that cast may be eliminated in storage layer
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
int64_t ttl_seconds = 0;
size_t topn_limit = 0;
};
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1884,7 +1884,7 @@ bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) {
// Use variant cast dst type
field_type = TabletColumn::get_field_type_by_type(
_opts.target_cast_type_for_variants[_schema->column(cid)->name()]);
_opts.target_cast_type_for_variants[_schema->column(cid)->name()].type);
}
switch (predicate->type()) {
case PredicateType::EQ:
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/tablet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ TabletColumn TabletReader::materialize_column(const TabletColumn& orig) {
}
TabletColumn column_with_cast_type = orig;
auto cast_type = _reader_context.target_cast_type_for_variants.at(orig.name());
column_with_cast_type.set_type(TabletColumn::get_field_type_by_type(cast_type));
FieldType filed_type = TabletColumn::get_field_type_by_type(cast_type.type);
column_with_cast_type.set_type(filed_type);
return column_with_cast_type;
}

Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/tablet_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class TabletReader {
std::vector<FunctionFilter> function_filters;
std::vector<RowsetMetaSharedPtr> delete_predicates;
// slots that cast may be eliminated in storage layer
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;

std::vector<RowSetSplits> rs_splits;
// For unique key table with merge-on-write
Expand Down
38 changes: 19 additions & 19 deletions be/src/pipeline/exec/scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "pipeline/exec/meta_scan_operator.h"
#include "pipeline/exec/olap_scan_operator.h"
#include "pipeline/exec/operator.h"
#include "runtime/types.h"
#include "util/runtime_profile.h"
#include "vec/exprs/vcast_expr.h"
#include "vec/exprs/vcompound_pred.h"
Expand Down Expand Up @@ -125,14 +126,14 @@ Status ScanLocalState<Derived>::_normalize_conjuncts(RuntimeState* state) {
// The conjuncts is always on output tuple, so use _output_tuple_desc;
std::vector<SlotDescriptor*> slots = p._output_tuple_desc->slots();

auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
switch (type) {
#define M(NAME) \
case TYPE_##NAME: { \
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
slot->type().precision, slot->type().scale); \
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
break; \
auto init_value_range = [&](SlotDescriptor* slot, const TypeDescriptor& type_desc) {
switch (type_desc.type) {
#define M(NAME) \
case TYPE_##NAME: { \
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
type_desc.precision, type_desc.scale); \
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
break; \
}
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
M(TINYINT) \
Expand Down Expand Up @@ -173,7 +174,7 @@ Status ScanLocalState<Derived>::_normalize_conjuncts(RuntimeState* state) {
continue;
}
}
init_value_range(slot, slot->type().type);
init_value_range(slot, slot->type());
}

get_cast_types_for_variants();
Expand Down Expand Up @@ -586,7 +587,7 @@ Status ScanLocalState<Derived>::_normalize_in_and_eq_predicate(vectorized::VExpr
ColumnValueRange<T>& range,
PushDownType* pdt) {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->is_nullable(), slot->type().precision, slot->type().scale);
slot->is_nullable(), range.precision(), range.scale());
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
HybridSetBase::IteratorBase* iter = nullptr;
Expand Down Expand Up @@ -737,7 +738,7 @@ Status ScanLocalState<Derived>::_normalize_not_in_and_not_eq_predicate(
ColumnValueRange<T>& range, PushDownType* pdt) {
bool is_fixed_range = range.is_fixed_value_range();
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(
range.column_name(), slot->is_nullable(), slot->type().precision, slot->type().scale);
range.column_name(), slot->is_nullable(), range.precision(), range.scale());
PushDownType temp_pdt = PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
Expand Down Expand Up @@ -917,14 +918,14 @@ Status ScanLocalState<Derived>::_normalize_is_null_predicate(vectorized::VExpr*
if (reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name ==
"is_null_pred") {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->is_nullable(), slot->type().precision, slot->type().scale);
slot->is_nullable(), range.precision(), range.scale());
temp_range.set_contain_null(true);
range.intersection(temp_range);
*pdt = temp_pdt;
} else if (reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name ==
"is_not_null_pred") {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->is_nullable(), slot->type().precision, slot->type().scale);
slot->is_nullable(), range.precision(), range.scale());
temp_range.set_contain_null(false);
range.intersection(temp_range);
*pdt = temp_pdt;
Expand Down Expand Up @@ -1161,7 +1162,7 @@ Status ScanLocalState<Derived>::_normalize_match_predicate(vectorized::VExpr* ex

// create empty range as temp range, temp range should do intersection on range
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->is_nullable(), slot->type().precision, slot->type().scale);
slot->is_nullable(), range.precision(), range.scale());
// Normalize match conjuncts like 'where col match value'

auto match_checker = [](const std::string& fn_name) { return is_match_condition(fn_name); };
Expand Down Expand Up @@ -1324,7 +1325,7 @@ Status ScanLocalState<Derived>::_get_topn_filters(RuntimeState* state) {
template <typename Derived>
void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
const vectorized::VExpr* expr,
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types) {
std::unordered_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types) {
auto& p = _parent->cast<typename Derived::Parent>();
const auto* cast_expr = dynamic_cast<const vectorized::VCastExpr*>(expr);
if (cast_expr != nullptr) {
Expand All @@ -1337,10 +1338,9 @@ void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
}
std::vector<SlotDescriptor*> slots = output_tuple_desc()->slots();
SlotDescriptor* src_slot_desc = p._slot_id_to_slot_desc[src_slot->slot_id()];
PrimitiveType cast_dst_type =
cast_expr->get_target_type()->get_type_as_type_descriptor().type;
TypeDescriptor type_desc = cast_expr->get_target_type()->get_type_as_type_descriptor();
if (src_slot_desc->type().is_variant_type()) {
colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type);
colname_to_cast_types[src_slot_desc->col_name()].push_back(type_desc);
}
}
for (const auto& child : expr->children()) {
Expand All @@ -1350,7 +1350,7 @@ void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(

template <typename Derived>
void ScanLocalState<Derived>::get_cast_types_for_variants() {
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>> colname_to_cast_types;
std::unordered_map<std::string, std::vector<TypeDescriptor>> colname_to_cast_types;
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
auto& conjunct = *it;
if (conjunct->root()) {
Expand Down
5 changes: 3 additions & 2 deletions be/src/pipeline/exec/scan_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "pipeline/common/runtime_filter_consumer.h"
#include "pipeline/dependency.h"
#include "runtime/descriptors.h"
#include "runtime/types.h"
#include "vec/exec/scan/vscan_node.h"
#include "vec/exprs/vectorized_fn_call.h"
#include "vec/exprs/vin_predicate.h"
Expand Down Expand Up @@ -340,7 +341,7 @@ class ScanLocalState : public ScanLocalStateBase {
void get_cast_types_for_variants();
void _filter_and_collect_cast_type_for_variant(
const vectorized::VExpr* expr,
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types);
std::unordered_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types);

Status _get_topn_filters(RuntimeState* state);

Expand All @@ -357,7 +358,7 @@ class ScanLocalState : public ScanLocalStateBase {
std::vector<FunctionFilter> _push_down_functions;

// colname -> cast dst type
std::map<std::string, PrimitiveType> _cast_types_for_variants;
std::map<std::string, TypeDescriptor> _cast_types_for_variants;

// slot id -> ColumnValueRange
// Parsed from conjuncts
Expand Down
4 changes: 3 additions & 1 deletion regression-test/suites/variant_p0/sql/implicit_cast.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ SELECT v["payload"]["member"]["id"] FROM ghdata where v["payload"]["member"]["id
select k, json_extract(v, '$.repo') from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10;
-- SELECT v["payload"]["member"]["id"], count() FROM ghdata where v["payload"]["member"]["id"] is not null group by v["payload"]["member"]["id"] order by 1, 2 desc LIMIT 10;
select k, v["id"], v["type"], v["repo"]["name"] from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10;
SELECT v["payload"]["pusher_type"] FROM ghdata where v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
SELECT v["payload"]["pusher_type"] FROM ghdata where v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
-- implicit cast to decimal type
SELECT v["id"] FROM ghdata where v["id"] not in (7273, 10.118626, -69352) order by cast(v["id"] as decimal) limit 10;

0 comments on commit 059b7f9

Please sign in to comment.