From df5563971bcc9476bcab97501c325027d6729f16 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Thu, 8 Aug 2024 10:48:53 +0800 Subject: [PATCH] [Performance](opt) opt the order by performance in permutation (#38985) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes Beforeļ¼š ``` select l_quantity from lineitem order by l_quantity limit 10000020; +--------------+ | ReturnedRows | +--------------+ | 10000020 | +--------------+ 1 row in set (2 min 24.42 sec) ``` after: ``` mysql [tpch]>select l_quantity from lineitem order by l_quantity limit 10000020; +--------------+ | ReturnedRows | +--------------+ | 10000020 | +--------------+ 1 row in set (28.42 sec) ``` --- be/src/vec/columns/column_decimal.h | 25 +++++++++++++++++-------- be/src/vec/columns/column_string.cpp | 9 ++++----- be/src/vec/columns/column_vector.cpp | 3 ++- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 24982b7504c893..cc1661312a8dc2 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -21,6 +21,7 @@ #pragma once #include +#include #include #include #include @@ -269,14 +270,22 @@ class ColumnDecimal final : public COWHelper> { for (U i = 0; i < s; ++i) res[i] = i; auto sort_end = res.end(); - if (limit && limit < s) sort_end = res.begin() + limit; - - if (reverse) - std::partial_sort(res.begin(), sort_end, res.end(), - [this](size_t a, size_t b) { return data[a] > data[b]; }); - else - std::partial_sort(res.begin(), sort_end, res.end(), - [this](size_t a, size_t b) { return data[a] < data[b]; }); + if (limit && limit < s / 8.0) { + sort_end = res.begin() + limit; + if (reverse) + std::partial_sort(res.begin(), sort_end, res.end(), + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + std::partial_sort(res.begin(), sort_end, res.end(), + [this](size_t a, size_t b) { return data[a] < data[b]; }); + } else { + if (reverse) + pdqsort(res.begin(), res.end(), + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + pdqsort(res.begin(), res.end(), + [this](size_t a, size_t b) { return data[a] < data[b]; }); + } } void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint32_t& hash) const { diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index db0902d15a1bfc..952a1a97915046 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -483,9 +483,8 @@ void ColumnStr::get_permutation(bool reverse, size_t limit, int /*nan_directi res[i] = i; } - if (limit >= s) { - limit = 0; - } + // std::partial_sort need limit << s can get performance benefit + if (limit > (s / 8.0)) limit = 0; if (limit) { if (reverse) { @@ -495,9 +494,9 @@ void ColumnStr::get_permutation(bool reverse, size_t limit, int /*nan_directi } } else { if (reverse) { - std::sort(res.begin(), res.end(), less(*this)); + pdqsort(res.begin(), res.end(), less(*this)); } else { - std::sort(res.begin(), res.end(), less(*this)); + pdqsort(res.begin(), res.end(), less(*this)); } } } diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index ff7ab99d5de89f..f8d05c3d492a6f 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -255,7 +255,8 @@ void ColumnVector::get_permutation(bool reverse, size_t limit, int nan_direct if (s == 0) return; - if (limit >= s) limit = 0; + // std::partial_sort need limit << s can get performance benefit + if (limit > (s / 8.0)) limit = 0; if (limit) { for (size_t i = 0; i < s; ++i) res[i] = i;