diff --git a/be/src/vec/sink/vrow_distribution.cpp b/be/src/vec/sink/vrow_distribution.cpp index 2b635872e8ba8f..741148b66f5ef7 100644 --- a/be/src/vec/sink/vrow_distribution.cpp +++ b/be/src/vec/sink/vrow_distribution.cpp @@ -66,8 +66,10 @@ Status VRowDistribution::_save_missing_values( } cur_row_values.push_back(node); } - //For duplicate cur_values, they will be filtered in FE - _partitions_need_create.emplace_back(cur_row_values); + if (!_deduper.contains(cur_row_values)) { + _deduper.insert(cur_row_values); + _partitions_need_create.emplace_back(cur_row_values); + } } // to avoid too large mem use diff --git a/be/src/vec/sink/vrow_distribution.h b/be/src/vec/sink/vrow_distribution.h index 5267b488400b8f..fffe0e3f7f1887 100644 --- a/be/src/vec/sink/vrow_distribution.h +++ b/be/src/vec/sink/vrow_distribution.h @@ -24,7 +24,9 @@ #include #include +#include #include +#include #include #include "common/status.h" @@ -133,6 +135,10 @@ class VRowDistribution { Status automatic_create_partition(); void clear_batching_stats(); + // for auto partition + std::unique_ptr _batching_block; + bool _deal_batched = false; // If true, send batched block before any block's append. + private: std::pair _get_partition_function(); @@ -170,17 +176,29 @@ class VRowDistribution { int64_t rows); void _reset_find_tablets(int64_t rows); + struct NullableStringListHash { + std::size_t _hash(const TNullableStringLiteral& arg) const { + if (arg.is_null) { + return 0; + } + return std::hash()(arg.value); + } + std::size_t operator()(const std::vector& arg) const { + std::size_t result = 0; + for (const auto& v : arg) { + result = (result << 1) ^ _hash(v); + } + return result; + } + }; + RuntimeState* _state = nullptr; int _batch_size = 0; // for auto partitions std::vector> _partitions_need_create; - -public: - std::unique_ptr _batching_block; - bool _deal_batched = false; // If true, send batched block before any block's append. -private: size_t _batching_rows = 0, _batching_bytes = 0; + std::unordered_set, NullableStringListHash> _deduper; OlapTableBlockConvertor* _block_convertor = nullptr; OlapTabletFinder* _tablet_finder = nullptr;