diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp new file mode 100644 index 00000000000000..b8ae4c6530d575 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/aggregate_functions/aggregate_function_bitmap_agg.h" + +#include "vec/aggregate_functions/aggregate_function_simple_factory.h" +#include "vec/aggregate_functions/helpers.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" + +namespace doris::vectorized { + +template +AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_types) { + auto type = remove_nullable(argument_types[0]); + WhichDataType which(type); +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) { \ + return std::make_shared>(argument_types); \ + } + FOR_INTEGER_TYPES(DISPATCH) +#undef DISPATCH + LOG(WARNING) << "with unknown type, failed in create_with_int_data_type bitmap_union_int" + << " and type is: " << argument_types[0]->get_name(); + return nullptr; +} + +AggregateFunctionPtr create_aggregate_function_bitmap_agg(const std::string& name, + const DataTypes& argument_types, + const bool result_is_nullable) { + const bool arg_is_nullable = argument_types[0]->is_nullable(); + if (arg_is_nullable) { + return AggregateFunctionPtr(create_with_int_data_type(argument_types)); + } else { + return AggregateFunctionPtr(create_with_int_data_type(argument_types)); + } +} + +void register_aggregate_function_bitmap_agg(AggregateFunctionSimpleFactory& factory) { + factory.register_function_both("bitmap_agg", create_aggregate_function_bitmap_agg); +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h new file mode 100644 index 00000000000000..43ebb73249d367 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "util/bitmap_value.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/data_types/data_type_bitmap.h" + +namespace doris { +namespace vectorized { +class Arena; +class BufferReadable; +class BufferWritable; +class IColumn; +} // namespace vectorized +} // namespace doris + +namespace doris::vectorized { + +template +struct AggregateFunctionBitmapAggData { + BitmapValue value; + + void add(const T& value_) { value.add(value_); } + + void reset() { value.clear(); } + + void merge(const AggregateFunctionBitmapAggData& other) { value |= other.value; } +}; + +template +class AggregateFunctionBitmapAgg final + : public IAggregateFunctionDataHelper, + AggregateFunctionBitmapAgg> { +public: + using ColVecType = ColumnVector; + using Data = AggregateFunctionBitmapAggData; + + AggregateFunctionBitmapAgg(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper>( + argument_types_) {} + + std::string get_name() const override { return "bitmap_agg"; } + DataTypePtr get_return_type() const override { return std::make_shared(); } + + void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, + Arena* arena) const override { + DCHECK_LT(row_num, columns[0]->size()); + if constexpr (arg_nullable) { + auto& nullable_col = assert_cast(*columns[0]); + auto& nullable_map = nullable_col.get_null_map_data(); + if (!nullable_map[row_num]) { + auto& col = assert_cast(nullable_col.get_nested_column()); + this->data(place).add(col.get_data()[row_num]); + } + } else { + auto& col = assert_cast(*columns[0]); + this->data(place).add(col.get_data()[row_num]); + } + } + + void reset(AggregateDataPtr place) const override { this->data(place).reset(); } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, + Arena* arena) const override { + this->data(place).merge(this->data(rhs)); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + auto& column = assert_cast(to); + column.get_data().push_back(this->data(place).value); + } + + void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { + __builtin_unreachable(); + } + + void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, + Arena*) const override { + __builtin_unreachable(); + } + + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + size_t num_rows) const override { + auto& col = assert_cast(column); + DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; + auto* src = col.get_data().data(); + auto* data = &(this->data(places)); + for (size_t i = 0; i != num_rows; ++i) { + data[i].value = src[i]; + } + } + + void serialize_to_column(const std::vector& places, size_t offset, + MutableColumnPtr& dst, const size_t num_rows) const override { + auto& col = assert_cast(*dst); + col.resize(num_rows); + auto* data = col.get_data().data(); + for (size_t i = 0; i != num_rows; ++i) { + data[i] = this->data(places[i] + offset).value; + } + } + + void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, + Arena* arena) const override { + auto& col = assert_cast(column); + const size_t num_rows = column.size(); + auto* data = col.get_data().data(); + + for (size_t i = 0; i != num_rows; ++i) { + this->data(place).value |= data[i]; + } + } + + void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, + const IColumn& column, size_t begin, size_t end, + Arena* arena) const override { + DCHECK(end <= column.size() && begin <= end) + << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); + auto& col = assert_cast(column); + auto* data = col.get_data().data(); + for (size_t i = begin; i <= end; ++i) { + this->data(place).value |= data[i]; + } + } + + void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, + AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + const size_t num_rows) const override { + auto& col = assert_cast(*assert_cast(column)); + auto* data = col.get_data().data(); + for (size_t i = 0; i != num_rows; ++i) { + this->data(places[i]).value |= data[i]; + } + } + + void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, + AggregateDataPtr rhs, const ColumnString* column, + Arena* arena, const size_t num_rows) const override { + auto& col = assert_cast(*assert_cast(column)); + auto* data = col.get_data().data(); + for (size_t i = 0; i != num_rows; ++i) { + if (places[i]) { + this->data(places[i]).value |= data[i]; + } + } + } + + void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, + IColumn& to) const override { + auto& col = assert_cast(to); + size_t old_size = col.size(); + col.resize(old_size + 1); + col.get_data()[old_size] = this->data(place).value; + } + + [[nodiscard]] MutableColumnPtr create_serialize_column() const override { + return ColumnBitmap::create(); + } + + [[nodiscard]] DataTypePtr get_serialized_type() const override { + return std::make_shared(); + } +}; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp index 9ff47a6f57d0c6..91113b1f52e41f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp @@ -60,6 +60,7 @@ void register_aggregate_function_histogram(AggregateFunctionSimpleFactory& facto void register_aggregate_function_count_old(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_sum_old(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_map_agg(AggregateFunctionSimpleFactory& factory); +void register_aggregate_function_bitmap_agg(AggregateFunctionSimpleFactory& factory); AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { static std::once_flag oc; @@ -95,6 +96,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { register_aggregate_function_avg_weighted(instance); register_aggregate_function_histogram(instance); register_aggregate_function_map_agg(instance); + register_aggregate_function_bitmap_agg(instance); register_aggregate_function_stddev_variance_samp(instance); register_aggregate_function_replace_reader_load(instance); diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md new file mode 100644 index 00000000000000..260c7b6b855d6e --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md @@ -0,0 +1,89 @@ +--- +{ + "title": "BITMAP_AGG", + "language": "en" +} +--- + + + +## BITMAP_AGG +### description +#### Syntax + +`BITMAP_AGG(expr)` + + +This aggregating function returns a bitmap that aggregates the values of expr, excluding any null values. +The type of expr needs to be TINYINT, SMALLINT, INT, or BIGINT. + +### example +``` +MySQL > select `n_nationkey`, `n_name`, `n_regionkey` from `nation`; ++-------------+----------------+-------------+ +| n_nationkey | n_name | n_regionkey | ++-------------+----------------+-------------+ +| 0 | ALGERIA | 0 | +| 1 | ARGENTINA | 1 | +| 2 | BRAZIL | 1 | +| 3 | CANADA | 1 | +| 4 | EGYPT | 4 | +| 5 | ETHIOPIA | 0 | +| 6 | FRANCE | 3 | +| 7 | GERMANY | 3 | +| 8 | INDIA | 2 | +| 9 | INDONESIA | 2 | +| 10 | IRAN | 4 | +| 11 | IRAQ | 4 | +| 12 | JAPAN | 2 | +| 13 | JORDAN | 4 | +| 14 | KENYA | 0 | +| 15 | MOROCCO | 0 | +| 16 | MOZAMBIQUE | 0 | +| 17 | PERU | 1 | +| 18 | CHINA | 2 | +| 19 | ROMANIA | 3 | +| 20 | SAUDI ARABIA | 4 | +| 21 | VIETNAM | 2 | +| 22 | RUSSIA | 3 | +| 23 | UNITED KINGDOM | 3 | +| 24 | UNITED STATES | 1 | ++-------------+----------------+-------------+ + +MySQL > select n_regionkey, bitmap_to_string(bitmap_agg(n_nationkey)) from nation group by n_regionkey; ++-------------+---------------------------------------------+ +| n_regionkey | bitmap_to_string(bitmap_agg(`n_nationkey`)) | ++-------------+---------------------------------------------+ +| 4 | 4,10,11,13,20 | +| 2 | 8,9,12,18,21 | +| 1 | 1,2,3,17,24 | +| 0 | 0,5,14,15,16 | +| 3 | 6,7,19,22,23 | ++-------------+---------------------------------------------+ + +MySQL > select bitmap_count(bitmap_agg(n_nationkey)) from nation; ++-----------------------------------------+ +| bitmap_count(bitmap_agg(`n_nationkey`)) | ++-----------------------------------------+ +| 25 | ++-----------------------------------------+ +``` +### keywords +BITMAP_AGG diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md new file mode 100644 index 00000000000000..d27f50e2ec85f1 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_agg.md @@ -0,0 +1,88 @@ +--- +{ + "title": "BITMAP_AGG", + "language": "zh-CN" +} +--- + + + +## BITMAP_AGG +### description +#### Syntax + +`BITMAP_AGG(expr)` + +聚合 expr 的值(不包括任何空值)得到 bitmap。 +expr 的类型需要为 TINYINT,SMALLINT,INT 和 BIGINT 类型。 + +### example +``` +MySQL > select `n_nationkey`, `n_name`, `n_regionkey` from `nation`; ++-------------+----------------+-------------+ +| n_nationkey | n_name | n_regionkey | ++-------------+----------------+-------------+ +| 0 | ALGERIA | 0 | +| 1 | ARGENTINA | 1 | +| 2 | BRAZIL | 1 | +| 3 | CANADA | 1 | +| 4 | EGYPT | 4 | +| 5 | ETHIOPIA | 0 | +| 6 | FRANCE | 3 | +| 7 | GERMANY | 3 | +| 8 | INDIA | 2 | +| 9 | INDONESIA | 2 | +| 10 | IRAN | 4 | +| 11 | IRAQ | 4 | +| 12 | JAPAN | 2 | +| 13 | JORDAN | 4 | +| 14 | KENYA | 0 | +| 15 | MOROCCO | 0 | +| 16 | MOZAMBIQUE | 0 | +| 17 | PERU | 1 | +| 18 | CHINA | 2 | +| 19 | ROMANIA | 3 | +| 20 | SAUDI ARABIA | 4 | +| 21 | VIETNAM | 2 | +| 22 | RUSSIA | 3 | +| 23 | UNITED KINGDOM | 3 | +| 24 | UNITED STATES | 1 | ++-------------+----------------+-------------+ + +MySQL > select n_regionkey, bitmap_to_string(bitmap_agg(n_nationkey)) from nation group by n_regionkey; ++-------------+---------------------------------------------+ +| n_regionkey | bitmap_to_string(bitmap_agg(`n_nationkey`)) | ++-------------+---------------------------------------------+ +| 4 | 4,10,11,13,20 | +| 2 | 8,9,12,18,21 | +| 1 | 1,2,3,17,24 | +| 0 | 0,5,14,15,16 | +| 3 | 6,7,19,22,23 | ++-------------+---------------------------------------------+ + +MySQL > select bitmap_count(bitmap_agg(n_nationkey)) from nation; ++-----------------------------------------+ +| bitmap_count(bitmap_agg(`n_nationkey`)) | ++-----------------------------------------+ +| 25 | ++-----------------------------------------+ +``` +### keywords +BITMAP_AGG diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java index da2a6190ef4e5e..b646c7ef982946 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java @@ -54,7 +54,7 @@ public class AggregateFunction extends Function { FunctionSet.INTERSECT_COUNT, FunctionSet.ORTHOGONAL_BITMAP_UNION_COUNT, FunctionSet.COUNT, "approx_count_distinct", "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL, FunctionSet.RETENTION, - FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT, FunctionSet.MAP_AGG); + FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT, FunctionSet.MAP_AGG, FunctionSet.BITMAP_AGG); public static ImmutableSet ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET = ImmutableSet.of("stddev_samp", "variance_samp", "var_samp", "percentile_approx"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 0d84e68d252404..2391e1ec843112 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -204,6 +204,9 @@ public boolean isNullResultWithOneNullParamFunctions(String funcName) { public static final String HIST = "hist"; public static final String MAP_AGG = "map_agg"; + public static final String BITMAP_AGG = "bitmap_agg"; + public static final String COUNT_BY_ENUM = "count_by_enum"; + private static final Map TOPN_UPDATE_SYMBOL = ImmutableMap.builder() .put(Type.CHAR, @@ -1282,6 +1285,16 @@ private void initAggregateBuiltins() { addBuiltin(AggregateFunction.createBuiltin("group_bit_xor", Lists.newArrayList(t), t, t, "", "", "", "", "", false, true, false, true)); + if (!t.equals(Type.LARGEINT)) { + addBuiltin( + AggregateFunction.createBuiltin("bitmap_agg", Lists.newArrayList(t), Type.BITMAP, Type.BITMAP, + "", + "", + "", + "", + "", + true, false, true, true)); + } } addBuiltin(AggregateFunction.createBuiltin(QUANTILE_UNION, Lists.newArrayList(Type.QUANTILE_STATE), diff --git a/regression-test/data/query_p0/aggregate/bitmap_agg.out b/regression-test/data/query_p0/aggregate/bitmap_agg.out new file mode 100644 index 00000000000000..82a80101eb9492 --- /dev/null +++ b/regression-test/data/query_p0/aggregate/bitmap_agg.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +20000 + +-- !sql2 -- +20000 + diff --git a/regression-test/suites/query_p0/aggregate/bitmap_agg.groovy b/regression-test/suites/query_p0/aggregate/bitmap_agg.groovy new file mode 100644 index 00000000000000..edfc246df04277 --- /dev/null +++ b/regression-test/suites/query_p0/aggregate/bitmap_agg.groovy @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("bitmap_agg") { + sql "DROP TABLE IF EXISTS `test_bitmap_agg`;" + sql """ + CREATE TABLE `test_bitmap_agg` ( + `id` int(11) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ + insert into `test_bitmap_agg` + select number from numbers("number" = "20000"); + """ + + qt_sql1 """ + select bitmap_count(bitmap_agg(id)) from `test_bitmap_agg`; + """ + + sql "DROP TABLE IF EXISTS `test_bitmap_agg`;" + + sql "DROP TABLE IF EXISTS test_bitmap_agg_nullable;" + sql """ + CREATE TABLE `test_bitmap_agg_nullable` ( + `id` int(11) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + sql """ + insert into `test_bitmap_agg_nullable` + select number from numbers("number" = "20000"); + """ + qt_sql2 """ + select bitmap_count(bitmap_agg(id)) from `test_bitmap_agg_nullable`; + """ + sql "DROP TABLE IF EXISTS `test_bitmap_agg`;" + + }