Skip to content

Commit

Permalink
[opt](inverted index) Determine if in_list can execute fast_execute.
Browse files Browse the repository at this point in the history
  • Loading branch information
zzzxl1993 authored and yiguolei committed Aug 30, 2024
1 parent e072ce7 commit d5fd4e2
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 3 deletions.
13 changes: 10 additions & 3 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,10 +917,17 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return false;
}

if ((pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) &&
pred->predicate_params()->marked_by_runtime_filter) {
if (pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) {
auto predicate_param = pred->predicate_params();
// in_list or not_in_list predicate produced by runtime filter
return false;
if (predicate_param->marked_by_runtime_filter) {
return false;
}
// the in_list or not_in_list value count cannot be greater than threshold
int32_t threshold = _opts.runtime_state->query_options().in_list_value_count_threshold;
if (pred_in_compound && predicate_param->values.size() > threshold) {
return false;
}
}

// UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors
Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers&
pred_result_sign +=
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
if (function_name == "in" || function_name == "not_in") {
if (arguments.size() - 1 > _in_list_value_count_threshold) {
return pred_result_sign;
}
// Generating 'result_sign' from 'inlist' requires sorting the values.
std::set<std::string> values;
for (size_t i = 1; i < arguments.size(); i++) {
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exprs/vexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ class VExpr {
uint32_t _index_unique_id = 0;
bool _can_fast_execute = false;
bool _enable_inverted_index_query = true;
uint32_t _in_list_value_count_threshold = 10;
};

} // namespace vectorized
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exprs/vin_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <vector>

#include "common/status.h"
#include "runtime/runtime_state.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
Expand Down Expand Up @@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
VExpr::register_function_context(state, context);
_prepare_finished = true;
_can_fast_execute = can_fast_execute();
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
return Status::OK();
}

Expand Down
10 changes: 10 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index";
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index";

public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold";

/**
* If set false, user couldn't submit analyze SQL and FE won't allocate any related resources.
*/
Expand Down Expand Up @@ -2027,6 +2029,13 @@ public void setIgnoreShapePlanNodes(String ignoreShapePlanNodes) {
})
public boolean enableFallbackOnMissingInvertedIndex = true;

@VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
"in条件value数量大于这个threshold后将不会走fast_execute",
"When the number of values in the IN condition exceeds this threshold,"
+ " fast_execute will not be used."
})
public int inListValueCountThreshold = 10;

public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
this.enableESParallelScroll = enableESParallelScroll;
}
Expand Down Expand Up @@ -3530,6 +3539,7 @@ public TQueryOptions toThrift() {
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);

tResult.setKeepCarriageReturn(keepCarriageReturn);
tResult.setInListValueCountThreshold(inListValueCountThreshold);
return tResult;
}

Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ struct TQueryOptions {

126: optional i32 runtime_bloom_filter_max_size = 16777216;

127: optional i32 in_list_value_count_threshold = 10;
128: optional bool enable_verbose_profile = false;
129: optional i32 rpc_verbose_profile_max_instance_count = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,9 @@
-- !sql --
2

-- !sql --
852

-- !sql --
852

Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """

sql """ set enable_common_expr_pushdown = true; """

sql """ set in_list_value_count_threshold = 0; """
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
sql """ set in_list_value_count_threshold = 10; """
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """

} finally {
}
} finally {
Expand Down

0 comments on commit d5fd4e2

Please sign in to comment.