diff --git a/be/src/vec/common/hash_table/join_hash_table.h b/be/src/vec/common/hash_table/join_hash_table.h index 10ca6c9b2ddaa5..a869ad419ad530 100644 --- a/be/src/vec/common/hash_table/join_hash_table.h +++ b/be/src/vec/common/hash_table/join_hash_table.h @@ -89,7 +89,7 @@ class JoinHashTable { auto find_batch(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, uint32_t build_idx, int probe_rows, uint32_t* __restrict probe_idxs, bool& probe_visited, - uint32_t* __restrict build_idxs) { + uint32_t* __restrict build_idxs, bool has_mark_join_conjunct = false) { if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) { if (_empty_build_side) { @@ -100,12 +100,25 @@ class JoinHashTable { if constexpr (with_other_conjuncts || (is_mark_join && JoinOpType != TJoinOp::RIGHT_SEMI_JOIN)) { - constexpr bool null_aware_without_other_conjuncts = - (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) && - !with_other_conjuncts; - return _find_batch_conjunct( + if constexpr (!with_other_conjuncts) { + constexpr bool is_null_aware_join = + JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN; + constexpr bool is_left_half_join = JoinOpType == TJoinOp::LEFT_SEMI_JOIN || + JoinOpType == TJoinOp::LEFT_ANTI_JOIN; + + /// For null aware join or left half(semi/anti) join without other conjuncts and without + /// mark join conjunct. + /// If one row on probe side has one match in build side, we should stop searching the + /// hash table for this row. + if (is_null_aware_join || (is_left_half_join && !has_mark_join_conjunct)) { + return _find_batch_conjunct( + keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, + build_idxs); + } + } + + return _find_batch_conjunct( keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, build_idxs); } @@ -314,7 +327,7 @@ class JoinHashTable { return std::tuple {probe_idx, 0U, matched_cnt}; } - template + template auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, uint32_t build_idx, int probe_rows, uint32_t* __restrict probe_idxs, uint32_t* __restrict build_idxs) { @@ -345,7 +358,7 @@ class JoinHashTable { probe_idxs[matched_cnt] = probe_idx; matched_cnt++; - if constexpr (null_aware_without_other_conjuncts) { + if constexpr (only_need_to_match_one) { build_idx = 0; break; } diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index b4212405aeda17..9a15df637aaf60 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -213,6 +213,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash } auto& mcol = mutable_block.mutable_columns(); + const bool has_mark_join_conjunct = !_parent->_mark_join_conjuncts.empty(); int current_offset = 0; if constexpr ((JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || @@ -258,7 +259,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash need_null_map_for_probe && ignore_null > (hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), probe_index, build_index, probe_rows, _probe_indexs.data(), - _probe_visited, _build_indexs.data()); + _probe_visited, _build_indexs.data(), has_mark_join_conjunct); probe_index = new_probe_idx; build_index = new_build_idx; current_offset = new_current_offset;