From 5baebdb44149907bee59ba99ff1bfca8adc1c4b0 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Mon, 3 Jul 2023 23:17:50 +0800 Subject: [PATCH] support BE schema change --- be/src/olap/delta_writer.cpp | 6 +- be/src/olap/memtable.cpp | 107 +++++++++++-- be/src/olap/memtable.h | 3 +- be/src/olap/push_handler.cpp | 2 + .../rowset/segment_v2/segment_iterator.cpp | 1 + be/src/olap/schema_change.cpp | 1 + be/src/olap/tablet_meta.cpp | 3 +- be/src/olap/tablet_schema.cpp | 8 + be/src/olap/tablet_schema.h | 6 +- be/src/util/jsonb_parser.h | 2 +- be/src/vec/columns/column_object.cpp | 74 +++++---- be/src/vec/columns/column_object.h | 3 +- be/src/vec/common/schema_util.cpp | 117 ++++++++++++++- be/src/vec/common/schema_util.h | 13 ++ be/src/vec/data_types/get_least_supertype.cpp | 34 +++++ be/src/vec/exec/scan/new_olap_scanner.cpp | 1 + .../doris/alter/SchemaChangeHandler.java | 9 +- .../org/apache/doris/analysis/ColumnDef.java | 3 +- .../apache/doris/analysis/DescribeStmt.java | 1 - .../java/org/apache/doris/catalog/Column.java | 5 +- .../common/proc/IndexSchemaProcNode.java | 66 ++++---- .../doris/service/FrontendServiceImpl.java | 32 ++-- gensrc/thrift/FrontendService.thrift | 5 +- regression-test/data/variant_p0/load.out | 141 ++++++++++++++++++ regression-test/suites/variant_p0/load.groovy | 60 ++++++-- 25 files changed, 580 insertions(+), 123 deletions(-) create mode 100644 regression-test/data/variant_p0/load.out diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 98205f2c6007f7..888e470f8a9267 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -201,7 +201,7 @@ Status DeltaWriter::init() { context.segments_overlap = OVERLAPPING; context.tablet_schema = _tablet_schema; context.newest_write_timestamp = UnixSeconds(); - context.tablet_id = _tablet->table_id(); + context.tablet_id = _tablet->tablet_id(); context.tablet = _tablet; context.write_type = DataWriteType::TYPE_DIRECT; context.mow_context = std::make_shared(_cur_max_version, _req.txn_id, _rowset_ids, @@ -352,7 +352,8 @@ void DeltaWriter::_reset_mem_table() { _delete_bitmap); _mem_table.reset(new MemTable(_tablet, _schema.get(), _tablet_schema.get(), _req.slots, _req.tuple_desc, _rowset_writer.get(), mow_context, - mem_table_insert_tracker, mem_table_flush_tracker)); + mem_table_insert_tracker, mem_table_flush_tracker, + _req.index_id)); COUNTER_UPDATE(_segment_num, 1); _mem_table->set_callback([this](MemTableStat& stat) { @@ -470,6 +471,7 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes, vectorized::schema_util::get_least_common_schema( {_tablet->tablet_schema(), rw_ctx.tablet_schema}, update_schema); _tablet->update_by_least_common_schema(update_schema); + VLOG_DEBUG << "dump updated tablet schema: " << update_schema->dump_structure(); } Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, _tablet, _req.txn_id, diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index a0b24fb86ffc0a..d734f2ae1a7f28 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -57,6 +57,7 @@ #include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/get_least_supertype.h" #include "vec/json/path_in_data.h" #include "vec/jsonb/serialize.h" @@ -67,7 +68,7 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t const std::vector* slot_descs, TupleDescriptor* tuple_desc, RowsetWriter* rowset_writer, std::shared_ptr mow_context, const std::shared_ptr& insert_mem_tracker, - const std::shared_ptr& flush_mem_tracker) + const std::shared_ptr& flush_mem_tracker, int64_t index_id) : _tablet(std::move(tablet)), _keys_type(_tablet->keys_type()), _schema(schema), @@ -81,7 +82,8 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t _offsets_of_aggregate_states(schema->num_columns()), _total_size_of_aggregate_states(0), _mem_usage(0), - _mow_context(mow_context) { + _mow_context(mow_context), + _index_id(index_id) { #ifndef BE_TEST _insert_mem_tracker_use_hook = std::make_unique( fmt::format("MemTableHookInsert:TabletId={}", std::to_string(tablet_id())), @@ -527,7 +529,6 @@ Status MemTable::close() { return flush(); } -// This function could throw exeception, it's not expection safe Status MemTable::unfold_variant_column(vectorized::Block& block, FlushContext* ctx) { if (block.rows() == 0 || _tablet_schema->num_variant_columns() == 0) { return Status::OK(); @@ -562,9 +563,69 @@ Status MemTable::unfold_variant_column(vectorized::Block& block, FlushContext* c TabletSchemaSPtr flush_schema = std::make_shared(*_tablet_schema); vectorized::Block flush_block(std::move(block)); - // Flatten variant column into flat columns, append flatten columns to the back of original Block and TabletSchema + // Get subcolumns name->column map + phmap::flat_hash_map subcolumn_map; + for (size_t i = 0; i < variant_column_pos.size(); ++i) { + size_t variant_pos = variant_column_pos[i]; + const TabletColumn& variant_col = _tablet_schema->columns()[variant_pos]; + std::for_each( + variant_col.get_sub_columns().begin(), variant_col.get_sub_columns().end(), + [&](const TabletColumn& subcolumn) { + subcolumn_map[StringRef(subcolumn.name().data(), subcolumn.name().size())] = + &subcolumn; + }); + } + + // column positions in flush schema + std::vector modifying_columns_pos; + std::vector new_columns_pos; + + // If column already exist in original tablet schema, then we pick common type + // and cast column to common type, and modify tablet column to common type, + // otherwise it's a new column, we should add to frontend + auto column_integrate = [&](const TabletColumn& parent_variant, + auto& column_entry_from_object) { + const std::string& column_name = + parent_variant.name() + "." + column_entry_from_object->path.get_path(); + const vectorized::DataTypePtr& final_data_type_from_object = + column_entry_from_object->data.get_least_common_type(); + auto it = subcolumn_map.find(column_name); + TabletColumn tablet_column; + if (it != subcolumn_map.end()) { + // Already exists column, check type equality, directly modify meta. + // If tablet column type is different from entry's column type, a cast will be performed later + const TabletColumn* original = it->second; + vectorized::DataTypePtr original_type = it->second->get_vec_type(); + vectorized::DataTypePtr common_type; + vectorized::get_least_supertype( + vectorized::DataTypes {original_type, final_data_type_from_object}, + &common_type); + if (!original_type->equals(*common_type)) { + // update to common type + modifying_columns_pos.push_back(flush_schema->num_columns()); + vectorized::schema_util::get_column_by_type(common_type, column_name, + tablet_column); + tablet_column.set_unique_id(original->unique_id()); + } else { + tablet_column = *original; + } + } else { + vectorized::schema_util::get_column_by_type(final_data_type_from_object, column_name, + tablet_column); + // New columns, directly add to meta, new column unique id need to be set + new_columns_pos.push_back(flush_schema->num_columns()); + } + tablet_column.set_parent_unique_id(parent_variant.unique_id()); + tablet_column.set_path_info(column_entry_from_object->path); + flush_schema->append_column(std::move(tablet_column)); + flush_block.insert({column_entry_from_object->data.get_finalized_column_ptr()->get_ptr(), + final_data_type_from_object, column_name}); + }; + + // 1. Flatten variant column into flat columns, append flatten columns to the back of original Block and TabletSchema // those columns are extracted columns, leave none extracted columns remain in original variant column, which is - // JSONB format at present + // JSONB format at present. + // 2. Collect columns that need to be added or modified when data type changes or new columns encountered for (size_t i = 0; i < variant_column_pos.size(); ++i) { size_t variant_pos = variant_column_pos[i]; vectorized::ColumnObject& object_column = assert_cast( @@ -578,21 +639,27 @@ Status MemTable::unfold_variant_column(vectorized::Block& block, FlushContext* c root = entry; continue; } - const std::string& column_name = parent_column.name() + "." + entry->path.get_path(); - TabletColumn tablet_column; - vectorized::schema_util::get_column_by_type(entry->data.get_least_common_type(), - column_name, tablet_column); - tablet_column.set_path_info(entry->path); - tablet_column.set_parent_unique_id(_tablet_schema->columns()[variant_pos].unique_id()); - flush_schema->append_column(std::move(tablet_column)); - flush_block.insert({entry->data.get_finalized_column_ptr()->get_ptr(), - entry->data.get_least_common_type(), column_name}); + column_integrate(parent_column, entry); } // Handle root column flush_block.get_by_position(variant_pos).column = root->data.get_finalized_column_ptr(); flush_block.get_by_position(variant_pos).type = root->data.get_least_common_type(); } + // Check new columns and modified columns and send FrontendService::modifyColumns RPC to + // 1. Get each unique if for sub columns, and record related column in FE meta. + // 2. Notify the Front end meta that some columns type changed. + // Those operation is based on the light weight schema change feature + vectorized::schema_util::UpdateSchemaRequest request; + request.from_schema = flush_schema; + request.new_columns_pos = new_columns_pos; + request.modifying_columns = modifying_columns_pos; + request.tablet_id = _tablet->table_id(); + request.index_id = _index_id; + // For CAS + request.schema_version = _tablet_schema->schema_version(); + RETURN_IF_ERROR(vectorized::schema_util::update_front_end_schema(request)); + { // Update rowset schema, tablet's tablet schema will be updated when build Rowset // Eg. flush schema: A(int), B(float), C(int), D(int) @@ -610,6 +677,18 @@ Status MemTable::unfold_variant_column(vectorized::Block& block, FlushContext* c VLOG_DEBUG << "dump rs schema: " << rw_ctx.tablet_schema->dump_structure(); } + // Cast to expected type + for (size_t i = 0; i < flush_block.columns(); ++i) { + auto expected_type = flush_schema->columns()[i].get_vec_type(); + if (!expected_type->equals(*flush_block.get_by_position(i).type)) { + RETURN_IF_ERROR(vectorized::schema_util::cast_column( + {flush_block.get_by_position(i).column, flush_block.get_by_position(i).type, + ""}, + expected_type, &flush_block.get_by_position(i).column)); + flush_block.get_by_position(i).type = expected_type; + } + } + ctx->flush_schema = flush_schema; block.swap(flush_block); VLOG_DEBUG << "dump block: " << block.dump_data(); diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 912f2f42115797..d96bc3ffb46946 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -177,7 +177,7 @@ class MemTable { const std::vector* slot_descs, TupleDescriptor* tuple_desc, RowsetWriter* rowset_writer, std::shared_ptr mow_context, const std::shared_ptr& insert_mem_tracker, - const std::shared_ptr& flush_mem_tracker); + const std::shared_ptr& flush_mem_tracker, int64_t index_id); ~MemTable(); int64_t tablet_id() const { return _tablet->tablet_id(); } @@ -301,6 +301,7 @@ class MemTable { std::shared_ptr _mow_context; size_t _num_columns; + int64_t _index_id; }; // class MemTable inline std::ostream& operator<<(std::ostream& os, const MemTable& table) { diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 0a3a0e2fee74d9..722fcd91d2b35c 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -137,6 +137,7 @@ Status PushHandler::_do_streaming_ingestion(TabletSharedPtr tablet, const TPushR TabletSchema tablet_schema; tablet_schema.copy_from(*tablet->tablet_schema()); if (!request.columns_desc.empty() && request.columns_desc[0].col_unique_id >= 0) { + // TODO(lhy) handle variant tablet_schema.clear_columns(); for (const auto& column_desc : request.columns_desc) { tablet_schema.append_column(TabletColumn(column_desc)); @@ -163,6 +164,7 @@ Status PushHandler::_do_streaming_ingestion(TabletSharedPtr tablet, const TPushR tablet_schema->copy_from(*tablet->tablet_schema()); if (!request.columns_desc.empty() && request.columns_desc[0].col_unique_id >= 0) { tablet_schema->clear_columns(); + // TODO(lhy) handle variant for (const auto& column_desc : request.columns_desc) { tablet_schema->append_column(TabletColumn(column_desc)); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 53d89e6395b33a..8fa6c795a06e39 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1491,6 +1491,7 @@ void SegmentIterator::_init_current_block( i >= block->columns()) { //todo(wb) maybe we can release it after output block current_columns[cid]->clear(); } else { // non-predicate column + // if (!block->get_by_position(i).type->equals()) current_columns[cid] = std::move(*block->get_by_position(i).column).mutate(); if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATE) { diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 98f7e8e53527ce..3e2404f53a1e9b 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -724,6 +724,7 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& TabletSchemaSPtr base_tablet_schema = std::make_shared(); base_tablet_schema->copy_from(*base_tablet->tablet_schema()); if (!request.columns.empty() && request.columns[0].col_unique_id >= 0) { + // TODO(lhy) handle variant base_tablet_schema->clear_columns(); for (const auto& column : request.columns) { base_tablet_schema->append_column(TabletColumn(column)); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 1b46c21151d069..1cc0e19b7b98dc 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -346,7 +346,8 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco } for (size_t i = 0; i < tcolumn.children_column.size(); i++) { ColumnPB* children_column = column->add_children_columns(); - init_column_from_tcolumn(i, tcolumn.children_column[i], children_column); + init_column_from_tcolumn(tcolumn.children_column[i].col_unique_id, + tcolumn.children_column[i], children_column); } } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index ea1aac72c8ba6b..3adf686dd15832 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -538,6 +538,10 @@ void TabletColumn::set_path_info(const vectorized::PathInData& path) { _column_path = path; } +vectorized::DataTypePtr TabletColumn::get_vec_type() const { + return vectorized::DataTypeFactory::instance().create_data_type(*this); +} + void TabletIndex::init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema) { _index_id = index.index_id; @@ -917,6 +921,10 @@ const std::vector& TabletSchema::columns() const { return _cols; } +std::vector& TabletSchema::mutable_columns() { + return _cols; +} + const TabletColumn& TabletSchema::column(size_t ordinal) const { DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; return _cols[ordinal]; diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index c750d4c942f8a3..65d981bfef7a01 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -42,6 +42,7 @@ namespace doris { namespace vectorized { class Block; class PathInData; +class IDataType; } // namespace vectorized struct OlapTableIndexSchema; @@ -63,7 +64,7 @@ class TabletColumn { int32_t unique_id() const { return _unique_id; } void set_unique_id(int32_t id) { _unique_id = id; } - std::string name() const { return _col_name; } + const std::string& name() const { return _col_name; } void set_name(std::string col_name) { _col_name = col_name; } FieldType type() const { return _type; } void set_type(FieldType type) { _type = type; } @@ -113,6 +114,7 @@ class TabletColumn { uint32_t get_subtype_count() const { return _sub_column_count; } const TabletColumn& get_sub_column(uint32_t i) const { return _sub_columns[i]; } + const std::vector& get_sub_columns() const { return _sub_columns; } friend bool operator==(const TabletColumn& a, const TabletColumn& b); friend bool operator!=(const TabletColumn& a, const TabletColumn& b); @@ -130,6 +132,7 @@ class TabletColumn { bool is_extracted_column() const { return !_column_path.empty(); }; bool parent_unique_d() const { return _parent_col_unique_id; } void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id = col_unique_id; } + std::shared_ptr get_vec_type() const; private: int32_t _unique_id; @@ -234,6 +237,7 @@ class TabletSchema { Status have_column(const std::string& field_name) const; const TabletColumn& column_by_uid(int32_t col_unique_id) const; const std::vector& columns() const; + std::vector& mutable_columns(); size_t num_columns() const { return _num_columns; } size_t num_key_columns() const { return _num_key_columns; } size_t num_null_columns() const { return _num_null_columns; } diff --git a/be/src/util/jsonb_parser.h b/be/src/util/jsonb_parser.h index 8d29b52faf1625..96b3d6f47c3cf8 100644 --- a/be/src/util/jsonb_parser.h +++ b/be/src/util/jsonb_parser.h @@ -83,7 +83,7 @@ class JsonbParserT { explicit JsonbParserT(OS_TYPE& os) : writer_(os), stream_pos_(0), err_(JsonbErrType::E_NONE) {} // parse a UTF-8 JSON string - bool parse(const std::string& str, hDictInsert handler = nullptr) { + bool(parseconst std::string& str, hDictInsert handler = nullptr) { return parse(str.c_str(), (unsigned int)str.size(), handler); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index e28651eaaecfe4..e3f77e1f96a2d0 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -89,24 +89,24 @@ size_t getNumberOfDimensions(const IDataType& type) { return 0; } -DataTypePtr get_data_type_by_column(const IColumn& column) { - auto idx = column.get_underlying_data_type(); - if (WhichDataType(idx).is_simple()) { - return DataTypeFactory::instance().create_data_type(idx); - } - if (WhichDataType(idx).is_nothing()) { - return std::make_shared(); - } - if (const auto* column_array = check_and_get_column(&column)) { - return std::make_shared(get_data_type_by_column(column_array->get_data())); - } - if (const auto* column_nullable = check_and_get_column(&column)) { - return make_nullable(get_data_type_by_column(column_nullable->get_nested_column())); - } - // TODO add more types - assert(false); - return nullptr; -} +// DataTypePtr get_data_type_by_column(const IColumn& column) { +// auto idx = column.get_underlying_data_type(); +// if (WhichDataType(idx).is_simple()) { +// return DataTypeFactory::instance().create_data_type(idx); +// } +// if (WhichDataType(idx).is_nothing()) { +// return std::make_shared(); +// } +// if (const auto* column_array = check_and_get_column(&column)) { +// return std::make_shared(get_data_type_by_column(column_array->get_data())); +// } +// if (const auto* column_nullable = check_and_get_column(&column)) { +// return make_nullable(get_data_type_by_column(column_nullable->get_nested_column())); +// } +// // TODO add more types +// assert(false); +// return nullptr; +// } /// Recreates column with default scalar values and keeps sizes of arrays. ColumnPtr recreate_column_with_default_value(const ColumnPtr& column, @@ -221,12 +221,14 @@ class FieldVisitorToScalarType : public StaticVisitor { return 0; } size_t operator()(const Int64& x) { - // // Only Int64 | Int32 at present - // field_types.insert(FieldType::Int64); - // type_indexes.insert(TypeIndex::Int64); - // return 0; field_types.insert(FieldType::Int64); - if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) { + if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) { + type_indexes.insert(TypeIndex::Int8); + } else if (x <= std::numeric_limits::max() && + x >= std::numeric_limits::min()) { + type_indexes.insert(TypeIndex::Int16); + } else if (x <= std::numeric_limits::max() && + x >= std::numeric_limits::min()) { type_indexes.insert(TypeIndex::Int32); } else { type_indexes.insert(TypeIndex::Int64); @@ -271,12 +273,12 @@ void get_field_info(const Field& field, FieldInfo* info) { }; } -ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr&& data_, bool is_nullable_, bool is_root_) - : least_common_type(get_data_type_by_column(*data_)), - is_nullable(is_nullable_), - is_root(is_root_) { - data.push_back(std::move(data_)); -} +// ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr&& data_, bool is_nullable_, bool is_root_) +// : least_common_type(get_data_type_by_column(*data_)), +// is_nullable(is_nullable_), +// is_root(is_root_) { +// data.push_back(std::move(data_)); +// } ColumnObject::Subcolumn::Subcolumn(size_t size_, bool is_nullable_, bool is_root_) : least_common_type(std::make_shared()), @@ -316,7 +318,8 @@ void ColumnObject::Subcolumn::insert(Field field) { void ColumnObject::Subcolumn::add_new_column_part(DataTypePtr type) { data.push_back(type->create_column()); - least_common_type = LeastCommonType {std::move(type)}; + least_common_type = LeastCommonType {type}; + data_types.push_back(type); } void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { @@ -453,13 +456,18 @@ void ColumnObject::Subcolumn::finalize() { if (num_of_defaults_in_prefix) { result_column->insert_many_defaults(num_of_defaults_in_prefix); } - for (auto& part : data) { + for (size_t i = 0; i < data.size(); ++i) { + auto& part = data[i]; + auto from_type = data_types[i]; part = part->convert_to_full_column_if_const(); - auto from_type = get_data_type_by_column(*part); size_t part_size = part->size(); if (!from_type->equals(*to_type)) { ColumnPtr ptr; - schema_util::cast_column({part, from_type, ""}, to_type, &ptr); + Status st = schema_util::cast_column({part, from_type, ""}, to_type, &ptr); + if (!st.ok()) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + st.to_string() + ", real_code:{}", st.code()); + } part = ptr; } result_column->insert_range_from(*part, 0, part_size); diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 52897054d566b3..3eb329b7734893 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -88,7 +88,7 @@ class ColumnObject final : public COWHelper { Subcolumn(size_t size_, bool is_nullable_, bool is_root = false); - Subcolumn(MutableColumnPtr&& data_, bool is_nullable_, bool is_root = false); + // Subcolumn(MutableColumnPtr&& data_, bool is_nullable_, bool is_root = false); size_t size() const; @@ -176,6 +176,7 @@ class ColumnObject final : public COWHelper { /// That means that the least common type for i-th prefix is the type of i-th part /// and it's the supertype for all type of column from 0 to i-1. std::vector data; + std::vector data_types; /// Until we insert any non-default field we don't know further /// least common type and we count number of defaults in prefix, /// which will be converted to the default type of final common type. diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 8726814907f186..3cd389a61fdde6 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -37,6 +37,7 @@ #include "common/config.h" #include "common/status.h" #include "olap/olap_common.h" +#include "olap/tablet_schema.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "util/thrift_rpc_helper.h" @@ -184,7 +185,6 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str void update_least_common_schema(const std::vector& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id) { - const TabletColumn& variant = common_schema->column_by_uid(variant_col_unique_id); // Types of subcolumns by path from all tuples. std::unordered_map subcolumns_types; for (const TabletSchemaSPtr& schema : schemas) { @@ -231,10 +231,11 @@ void update_least_common_schema(const std::vector& schemas, // TODO handle ambiguos path and deduce to JSONB type + std::string variant_col_name = common_schema->column_by_uid(variant_col_unique_id).name(); // Append all common type columns of this variant for (int i = 0; i < tuple_paths.size(); ++i) { TabletColumn common_column; - const std::string& column_name = variant.name() + "." + tuple_paths[i].get_path(); + const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path(); get_column_by_type(tuple_types[i], column_name, common_column); common_column.set_parent_unique_id(variant_col_unique_id); common_column.set_path_info(tuple_paths[i]); @@ -290,4 +291,116 @@ void finalize_variant_columns(Block& block, const std::vector& variant_pos) } } +static TColumnDef get_columndef(const TabletColumn& column) { + TColumnDesc t_column_desc; + t_column_desc.__set_columnName(column.name()); + t_column_desc.__set_columnLength(column.length()); + t_column_desc.__set_colUniqueId(column.unique_id()); + t_column_desc.__set_parentColUniqueId(column.parent_unique_d()); + t_column_desc.__set_columnType( + DataTypeFactory::instance().create_data_type(column)->get_type_as_tprimitive_type()); + t_column_desc.__set_isAllowNull(column.is_nullable()); + + if (!column.get_sub_columns().empty()) { + t_column_desc.__isset.children = true; + } + for (const TabletColumn& subcolumn : column.get_sub_columns()) { + TColumnDef t_child_column_def = get_columndef(subcolumn); + t_column_desc.children.push_back(std::move(t_child_column_def.columnDesc)); + } + TColumnDef t_column_def; + t_column_def.__set_columnDesc(std::move(t_column_desc)); + return t_column_def; +} + +Status update_front_end_schema(UpdateSchemaRequest& request) { + TModifyColumnsRequest modify_request; + TModifyColumnsResult modify_res; + modify_request.__set_index_id(request.index_id); + modify_request.__set_table_id(request.tablet_id); + modify_request.__set_schema_version(request.schema_version); + if (request.new_columns_pos.empty() && request.modifying_columns.empty()) { + return Status::OK(); + } + modify_request.__isset.newColumns = true; + for (int pos : request.new_columns_pos) { + modify_request.newColumns.push_back(get_columndef(request.from_schema->columns()[pos])); + } + modify_request.__isset.modifyColumns = true; + for (int pos : request.modifying_columns) { + CHECK(request.from_schema->columns()[pos].unique_id() > 0); + modify_request.modifyColumns.push_back(get_columndef(request.from_schema->columns()[pos])); + } + auto master_addr = ExecEnv::GetInstance()->master_info()->network_address; + Status rpc_st = ThriftRpcHelper::rpc( + master_addr.hostname, master_addr.port, + [&modify_request, &modify_res](FrontendServiceConnection& client) { + client->modifyColumns(modify_res, modify_request); + }, + config::txn_commit_rpc_timeout_ms); + if (!rpc_st.ok()) { + return Status::InternalError("Failed to do schema change, rpc error {}", + rpc_st.to_string()); + } + if (modify_res.status.status_code != TStatusCode::CONFLICT_SCHEMA_VERSION && + modify_res.status.status_code != TStatusCode::OK) { + LOG(WARNING) << fmt::format("Encouter unkown modifyColumns RPC error, status: {}, msg: {}", + modify_res.status.status_code, modify_res.status.error_msgs[0]); + return Status::InternalError(modify_res.status.error_msgs[0]); + } + CHECK(!modify_res.allColumns.empty()); + + // column_name->column + phmap::flat_hash_map column_name_map; + for (const TColumn& c : modify_res.allColumns) { + column_name_map[c.column_name] = &c; + // flatten it's subcolumns + if (c.column_type.type == TPrimitiveType::VARIANT) { + for (const TColumn& subcol : c.children_column) { + column_name_map[subcol.column_name] = &subcol; + } + } + } + + // Schema version missmatch, CAS failed retry + if (modify_res.status.status_code == TStatusCode::CONFLICT_SCHEMA_VERSION) { + LOG(INFO) << fmt::format( + "Index {} meet conflict schema version, retry. Expected {} but meet {}", + modify_request.index_id, modify_request.schema_version, modify_res.schema_version); + if (--request.max_try <= 0) { + return Status::InternalError("Reach max retry, meet too many conflict schema version"); + } + if (request.need_backoff) { + // TODO + } + // Get least common schema, and refresh new columns, and try again + std::vector refreshed_new_columns_pos; + for (int pos : request.new_columns_pos) { + TabletColumn& new_column = request.from_schema->mutable_columns()[pos]; + auto it = column_name_map.find(new_column.name()); + if (it == column_name_map.end()) { + refreshed_new_columns_pos.push_back(pos); + } + } + std::vector refreshed_modify_columns_pos; + // for (int pos : request.modifying_columns) { + // // TODO update to least common schema + // } + // retry + request.modifying_columns = refreshed_modify_columns_pos; + request.new_columns_pos = refreshed_new_columns_pos; + RETURN_IF_ERROR(update_front_end_schema(request)); + return Status::OK(); + } + // Set unique id for new columns + for (int pos : request.new_columns_pos) { + TabletColumn& new_column = request.from_schema->mutable_columns()[pos]; + auto it = column_name_map.find(new_column.name()); + CHECK(it != column_name_map.end()); + CHECK(it->second->__isset.col_unique_id); + new_column.set_unique_id(it->second->col_unique_id); + } + return Status::OK(); +} + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index dde27606efbf66..a9e5dde3201dfe 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -85,4 +85,17 @@ void get_least_common_schema(const std::vector& schemas, void update_least_common_schema(const std::vector& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id); +// Modify column type or add new columns +struct UpdateSchemaRequest { + TabletSchemaSPtr from_schema; + std::vector new_columns_pos; + std::vector modifying_columns; + int32_t tablet_id; + int index_id; + int schema_version; + bool need_backoff = false; + int max_try = 10; +}; +Status update_front_end_schema(UpdateSchemaRequest& request); + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index a1cfcd8959ff97..a831a41ad7b060 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -36,6 +36,7 @@ #include "vec/common/typeid_cast.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_jsonb.h" @@ -355,6 +356,39 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type) { } } + /// For Arrays + { + bool have_array = false; + bool all_arrays = true; + DataTypes nested_types; + nested_types.reserve(types.size()); + for (const auto& type : types) { + if (const DataTypeArray* type_array = typeid_cast(type.get())) { + have_array = true; + nested_types.emplace_back(type_array->get_nested_type()); + } else { + all_arrays = false; + } + } + if (have_array) { + if (!all_arrays) { + return throw_or_return( + types, ErrorCode::INVALID_ARGUMENT, + "because some of them are Array and some of them are not", type); + } + DataTypePtr nested_type; + get_least_supertype(nested_types, &nested_type); + /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype, + /// nested_type will be nullptr, we should return nullptr in this case. + if (!nested_type) { + *type = nullptr; + return; + } + *type = std::make_shared(nested_type); + return; + } + } + /// Decimals { UInt32 have_decimal32 = type_ids.count(TypeIndex::Decimal32); diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index ca56bc95bd245a..a629b3addb01a1 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -154,6 +154,7 @@ Status NewOlapScanner::init() { // tabletschema is bounded to rowset and tablet's schema maybe outdated, // so we have to use schema from a query plan witch FE puts it in query plans. _tablet_schema->clear_columns(); + // TODO(lhy) handle variant for (const auto& column_desc : olap_scan_node.columns_desc) { _tablet_schema->append_column(TabletColumn(column_desc)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 7f27a36a15b20f..cb352194621181 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -1084,7 +1084,6 @@ private boolean addColumnInternal(OlapTable olapTable, Column newColumn, ColumnP * ADD COLUMN k1 int to rollup1, * ADD COLUMN k1 int to rollup2 * So that k1 will be added to base index 'twice', and we just ignore this repeat adding. - * */ private void checkAndAddColumn(List modIndexSchema, Column newColumn, ColumnPosition columnPos, Set newColNameSet, boolean isBaseIndex, @@ -1127,6 +1126,7 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co posIndex = i; } } + LOG.debug("newColumn.getParentUniqueId {}", newColumn.getParentUniqueId()); // Find target parent column if (newColumn.getParentUniqueId() > 0 && col.getUniqueId() == newColumn.getParentUniqueId()) { parentColumn = col; @@ -1137,12 +1137,11 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co if (hasPos && posIndex == -1) { throw new DdlException("Column[" + columnPos.getLastCol() + "] does not found"); } - + if (hasPos && newColumn.getParentUniqueId() > 0) { throw new DdlException("Column[" + columnPos.getLastCol() + "] pos confilict"); } - // check if add to first if (columnPos != null && columnPos.isFirst()) { posIndex = -1; @@ -1155,7 +1154,7 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co // Not the target index if (newColumn.getParentUniqueId() > 0 && parentColumn == null) { - LOG.debug("Not the target index, parentColUniqueId {}", newColumn.getParentUniqueId()); + LOG.debug("Not the target index, parentColUniqueId {}", newColumn.getParentUniqueId()); return; } @@ -1163,7 +1162,7 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co if (parentColumn != null) { parentColumn.addChildrenColumn(toAddColumn); LOG.debug("newSubColumn setUniqueId({}), modIndexSchema:{}, parentColum:{}", - newColumnUniqueId, modIndexSchema, parentColumn.getName()); + newColumnUniqueId, modIndexSchema, parentColumn.toThrift()); return; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index c79a5357847fac..82e3d16c323587 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -539,7 +539,8 @@ public Column toColumn() { visible, defaultValue.defaultValueExprDef, Column.COLUMN_UNIQUE_ID_INIT_VALUE, defaultValue.getValue(), genericAggregationName, typeList, nullableList); if (parentUniqueId > 0) { - col.setParentUniqueId(parentUniqueId); + LOG.debug("set parentUniqueId {}", parentUniqueId); + col.setParentUniqueId(parentUniqueId); } return col; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescribeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescribeStmt.java index 5ef86bf4ea164b..37cdac575d15d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescribeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescribeStmt.java @@ -205,7 +205,6 @@ public void analyze(Analyzer analyzer) throws UserException { extras.add("BLOOM_FILTER"); } String extraStr = StringUtils.join(extras, ","); - List row = Arrays.asList( "", "", diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index c66a69c11cbb07..73205d30a14384 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -548,6 +548,9 @@ private void setChildrenTColumn(Column children, TColumn tColumn) { childrenTColumn.setColumnType(childrenTColumnType); childrenTColumn.setIsAllowNull(children.isAllowNull()); + if (children.getUniqueId() > 0) { + childrenTColumn.setColUniqueId(children.getUniqueId()); + } // TODO: If we don't set the aggregate type for children, the type will be // considered as TAggregationType::SUM after deserializing in BE. // For now, we make children inherit the aggregate type from their parent. @@ -570,7 +573,7 @@ private void toChildrenThrift(Column column, TColumn tColumn) { tColumn.setChildrenColumn(new ArrayList<>()); setChildrenTColumn(k, tColumn); setChildrenTColumn(v, tColumn); - } else if (column.type.isStructType()) { + } else if (column.type.isStructType() || column.type.isVariantType()) { List childrenColumns = column.getChildren(); tColumn.setChildrenColumn(new ArrayList<>()); for (Column children : childrenColumns) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/IndexSchemaProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/IndexSchemaProcNode.java index 7ede055f1da75e..3966e9be2fa935 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/IndexSchemaProcNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/IndexSchemaProcNode.java @@ -48,6 +48,43 @@ public IndexSchemaProcNode(List schema, Set bfColumns) { this.bfColumns = bfColumns; } + private void addOneRow(Column column, BaseProcResult result) { + if (column.getType().isVariantType()) { + // Flatten variant's sub columns + for (Column childColumn : column.getChildren()) { + addOneRow(childColumn, result); + } + } + // Extra string (aggregation and bloom filter) + List extras = Lists.newArrayList(); + if (column.getAggregationType() != null) { + extras.add(column.getAggregationString()); + } + if (bfColumns != null && bfColumns.contains(column.getName())) { + extras.add("BLOOM_FILTER"); + } + if (column.isAutoInc()) { + extras.add("AUTO_INCREMENT"); + } + String extraStr = StringUtils.join(extras, ","); + + List rowList = Arrays.asList(column.getDisplayName(), + column.getOriginType().toString(), + column.isAllowNull() ? "Yes" : "No", + ((Boolean) column.isKey()).toString(), + column.getDefaultValue() == null + ? FeConstants.null_string : column.getDefaultValue(), + extraStr); + + if (column.getOriginType().isDateV2()) { + rowList.set(1, "DATE"); + } + if (column.getOriginType().isDatetimeV2()) { + rowList.set(1, "DATETIME"); + } + result.addRow(rowList); + } + @Override public ProcResult fetchResult() throws AnalysisException { Preconditions.checkNotNull(schema); @@ -56,34 +93,7 @@ public ProcResult fetchResult() throws AnalysisException { result.setNames(TITLE_NAMES); for (Column column : schema) { - // Extra string (aggregation and bloom filter) - List extras = Lists.newArrayList(); - if (column.getAggregationType() != null) { - extras.add(column.getAggregationString()); - } - if (bfColumns != null && bfColumns.contains(column.getName())) { - extras.add("BLOOM_FILTER"); - } - if (column.isAutoInc()) { - extras.add("AUTO_INCREMENT"); - } - String extraStr = StringUtils.join(extras, ","); - - List rowList = Arrays.asList(column.getDisplayName(), - column.getOriginType().toString(), - column.isAllowNull() ? "Yes" : "No", - ((Boolean) column.isKey()).toString(), - column.getDefaultValue() == null - ? FeConstants.null_string : column.getDefaultValue(), - extraStr); - - if (column.getOriginType().isDateV2()) { - rowList.set(1, "DATE"); - } - if (column.getOriginType().isDatetimeV2()) { - rowList.set(1, "DATETIME"); - } - result.addRow(rowList); + addOneRow(column, result); } return result; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 8b92e459bb9826..b5b70881aab76c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -363,23 +363,25 @@ private static ColumnDef initColumnfromThrift(TColumnDesc tColumnDesc, String co TypeDef typeDef = TypeDef.createTypeDef(tColumnDesc); boolean isAllowNull = tColumnDesc.isIsAllowNull(); ColumnDef.DefaultValue defaultVal = ColumnDef.DefaultValue.NOT_SET; - // Dynamic table's Array default value should be '[]' + // Variant Column's Array default value should be '[]' if (typeDef.getType().isArrayType()) { defaultVal = ColumnDef.DefaultValue.ARRAY_EMPTY_DEFAULT_VALUE; } ColumnDef col = new ColumnDef(tColumnDesc.getColumnName(), typeDef, false, null, isAllowNull, false, defaultVal, comment, true); if (tColumnDesc.isSetParentColUniqueId()) { - col.setParentUniqueId(tColumnDesc.getParentColUniqueId()); + col.setParentUniqueId(tColumnDesc.getParentColUniqueId()); } if (tColumnDesc.isSetColUniqueId()) { - col.setParentUniqueId(tColumnDesc.getColUniqueId()); + col.setUniqueId(tColumnDesc.getColUniqueId()); } + LOG.debug("{} initColumnfromThrift", tColumnDesc); return col; } private void lightSchemaChangeAddColumns(OlapTable olapTable, List addColumns, - Map> indexSchemaMap, long indexId) throws UserException, MetaNotFoundException { + Map> indexSchemaMap, + long indexId) throws UserException, MetaNotFoundException { List columnDefs = new ArrayList(); // prepare ColumnDef for new columns for (TColumnDef tColumnDef : addColumns) { @@ -399,7 +401,6 @@ private void lightSchemaChangeAddColumns(OlapTable olapTable, List a // create AddColumnsClause AddColumnsClause addColumnsClause = new AddColumnsClause(columnDefs, null, null); addColumnsClause.analyze(null); - // index id -> index col_unique_id supplier Map colUniqueIdSupplierMap = new HashMap<>(); for (Map.Entry> entry : olapTable.getIndexIdToSchema(true).entrySet()) { @@ -440,24 +441,30 @@ private void lightSchemaChangeModifyColumns(OlapTable olapTable, List colUniqueIdColumnMap = Maps.newHashMap(); for (Column column : indexSchemaMap.get(indexId)) { colUniqueIdColumnMap.put(column.getUniqueId(), column); - } + if (column.getType().isVariantType()) { + // Flatten children columns + for (Column childColumn : column.getChildren()) { + colUniqueIdColumnMap.put(childColumn.getUniqueId(), childColumn); + } + } + } + LOG.debug("colUniqueIdColumnMap {}", colUniqueIdColumnMap); // prepare ColumnDef for modifying columns, only modify it's type for (TColumnDef tColumnDef : modifyColumns) { TColumnDesc tColumnDesc = tColumnDef.getColumnDesc(); ColumnDef columnDef = initColumnfromThrift(tColumnDesc, ""); if (columnDef.getUniqueId() > 0) { Column targetColumn = colUniqueIdColumnMap.get(columnDef.getUniqueId()); - targetColumn.setType(columnDef.getType()); + targetColumn.setType(columnDef.getType()); continue; } if (columnDef.getParentUniqueId() > 0) { Column targetColumn = colUniqueIdColumnMap.get(columnDef.getParentUniqueId()); - targetColumn.setType(columnDef.getType()); + targetColumn.setType(columnDef.getType()); continue; } Preconditions.checkState(false, "Not supported"); } - } @Override @@ -504,6 +511,7 @@ public TModifyColumnsResult modifyColumns(TModifyColumnsRequest request) throws for (Column column : olapTable.getSchemaByIndexId(request.getIndexId())) { allColumns.add(column.toThrift()); } + result.setSchemaVersion(olapTable.getIndexSchemaVersion(request.getIndexId())); result.setAllColumns(allColumns); result.setStatus(status); return result; @@ -517,17 +525,17 @@ public TModifyColumnsResult modifyColumns(TModifyColumnsRequest request) throws olapTable.checkNormalStateForAlter(); // index id -> index schema - Map> indexSchemaMap = new HashMap<>(); + Map> indexSchemaMap = new HashMap<>(); lightSchemaChangeAddColumns(olapTable, addColumns, indexSchemaMap, request.getIndexId()); lightSchemaChangeModifyColumns(olapTable, modifyColumns, indexSchemaMap, request.getIndexId()); - //for light schema change add/modify column optimize, direct modify table meta. + // for light schema change add/modify column optimize, direct modify table meta. List newIndexes = olapTable.getCopiedIndexes(); long jobId = Env.getCurrentEnv().getNextId(); Env.getCurrentEnv().getSchemaChangeHandler().modifyTableLightSchemaChange( db, olapTable, indexSchemaMap, newIndexes, null, false, jobId, false); - //5. build all columns + // build all columns for (Column column : olapTable.getSchemaByIndexId(request.getIndexId())) { allColumns.add(column.toThrift()); } diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index d6d7d5462fe110..987b6de6a7dd27 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -51,9 +51,9 @@ struct TColumnDesc { 6: optional bool isAllowNull 7: optional string columnKey 8: optional list children - 9: optional i32 col_unique_id + 9: optional i32 colUniqueId // If set, then this column will be attached to it's parent - 10: optional i32 parent_col_unique_id + 10: optional i32 parentColUniqueId } // A column definition; used by CREATE TABLE and DESCRIBE statements. A column @@ -62,7 +62,6 @@ struct TColumnDesc { struct TColumnDef { 1: required TColumnDesc columnDesc 2: optional string comment - 3: optional i32 colUniqueId } // Arguments to DescribeTable, which returns a list of column descriptors for a diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out new file mode 100644 index 00000000000000..09d75df099be8e --- /dev/null +++ b/regression-test/data/variant_p0/load.out @@ -0,0 +1,141 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +k BIGINT Yes true \N +v.a JSON Yes false \N NONE +v.b.c TINYINT Yes false \N NONE +v.b.c.a ARRAY No false [] NONE +v.b TINYINT Yes false \N NONE +v.id TEXT Yes false \N NONE +v.type TEXT Yes false \N NONE +v.actor.id INT Yes false \N NONE +v.actor.login TEXT Yes false \N NONE +v.actor.display_login TEXT Yes false \N NONE +v.actor.gravatar_id TEXT Yes false \N NONE +v.actor.url TEXT Yes false \N NONE +v.actor.avatar_url TEXT Yes false \N NONE +v.repo.id INT Yes false \N NONE +v.repo.name TEXT Yes false \N NONE +v.repo.url TEXT Yes false \N NONE +v.payload.forkee.id INT Yes false \N NONE +v.payload.forkee.node_id TEXT Yes false \N NONE +v.payload.forkee.name TEXT Yes false \N NONE +v.payload.forkee.full_name TEXT Yes false \N NONE +v.payload.forkee.private TINYINT Yes false \N NONE +v.payload.forkee.owner.login TEXT Yes false \N NONE +v.payload.forkee.owner.id INT Yes false \N NONE +v.payload.forkee.owner.node_id TEXT Yes false \N NONE +v.payload.forkee.owner.avatar_url TEXT Yes false \N NONE +v.payload.forkee.owner.gravatar_id TEXT Yes false \N NONE +v.payload.forkee.owner.url TEXT Yes false \N NONE +v.payload.forkee.owner.html_url TEXT Yes false \N NONE +v.payload.forkee.owner.followers_url TEXT Yes false \N NONE +v.payload.forkee.owner.following_url TEXT Yes false \N NONE +v.payload.forkee.owner.gists_url TEXT Yes false \N NONE +v.payload.forkee.owner.starred_url TEXT Yes false \N NONE +v.payload.forkee.owner.subscriptions_url TEXT Yes false \N NONE +v.payload.forkee.owner.organizations_url TEXT Yes false \N NONE +v.payload.forkee.owner.repos_url TEXT Yes false \N NONE +v.payload.forkee.owner.events_url TEXT Yes false \N NONE +v.payload.forkee.owner.received_events_url TEXT Yes false \N NONE +v.payload.forkee.owner.type TEXT Yes false \N NONE +v.payload.forkee.owner.site_admin TINYINT Yes false \N NONE +v.payload.forkee.html_url TEXT Yes false \N NONE +v.payload.forkee.fork TINYINT Yes false \N NONE +v.payload.forkee.url TEXT Yes false \N NONE +v.payload.forkee.forks_url TEXT Yes false \N NONE +v.payload.forkee.keys_url TEXT Yes false \N NONE +v.payload.forkee.collaborators_url TEXT Yes false \N NONE +v.payload.forkee.teams_url TEXT Yes false \N NONE +v.payload.forkee.hooks_url TEXT Yes false \N NONE +v.payload.forkee.issue_events_url TEXT Yes false \N NONE +v.payload.forkee.events_url TEXT Yes false \N NONE +v.payload.forkee.assignees_url TEXT Yes false \N NONE +v.payload.forkee.branches_url TEXT Yes false \N NONE +v.payload.forkee.tags_url TEXT Yes false \N NONE +v.payload.forkee.blobs_url TEXT Yes false \N NONE +v.payload.forkee.git_tags_url TEXT Yes false \N NONE +v.payload.forkee.git_refs_url TEXT Yes false \N NONE +v.payload.forkee.trees_url TEXT Yes false \N NONE +v.payload.forkee.statuses_url TEXT Yes false \N NONE +v.payload.forkee.languages_url TEXT Yes false \N NONE +v.payload.forkee.stargazers_url TEXT Yes false \N NONE +v.payload.forkee.contributors_url TEXT Yes false \N NONE +v.payload.forkee.subscribers_url TEXT Yes false \N NONE +v.payload.forkee.subscription_url TEXT Yes false \N NONE +v.payload.forkee.commits_url TEXT Yes false \N NONE +v.payload.forkee.git_commits_url TEXT Yes false \N NONE +v.payload.forkee.comments_url TEXT Yes false \N NONE +v.payload.forkee.issue_comment_url TEXT Yes false \N NONE +v.payload.forkee.contents_url TEXT Yes false \N NONE +v.payload.forkee.compare_url TEXT Yes false \N NONE +v.payload.forkee.merges_url TEXT Yes false \N NONE +v.payload.forkee.archive_url TEXT Yes false \N NONE +v.payload.forkee.downloads_url TEXT Yes false \N NONE +v.payload.forkee.issues_url TEXT Yes false \N NONE +v.payload.forkee.pulls_url TEXT Yes false \N NONE +v.payload.forkee.milestones_url TEXT Yes false \N NONE +v.payload.forkee.notifications_url TEXT Yes false \N NONE +v.payload.forkee.labels_url TEXT Yes false \N NONE +v.payload.forkee.releases_url TEXT Yes false \N NONE +v.payload.forkee.deployments_url TEXT Yes false \N NONE +v.payload.forkee.created_at TEXT Yes false \N NONE +v.payload.forkee.updated_at TEXT Yes false \N NONE +v.payload.forkee.pushed_at TEXT Yes false \N NONE +v.payload.forkee.git_url TEXT Yes false \N NONE +v.payload.forkee.ssh_url TEXT Yes false \N NONE +v.payload.forkee.clone_url TEXT Yes false \N NONE +v.payload.forkee.svn_url TEXT Yes false \N NONE +v.payload.forkee.size SMALLINT Yes false \N NONE +v.payload.forkee.stargazers_count TINYINT Yes false \N NONE +v.payload.forkee.watchers_count TINYINT Yes false \N NONE +v.payload.forkee.has_issues TINYINT Yes false \N NONE +v.payload.forkee.has_projects TINYINT Yes false \N NONE +v.payload.forkee.has_downloads TINYINT Yes false \N NONE +v.payload.forkee.has_wiki TINYINT Yes false \N NONE +v.payload.forkee.has_pages TINYINT Yes false \N NONE +v.payload.forkee.forks_count TINYINT Yes false \N NONE +v.payload.forkee.archived TINYINT Yes false \N NONE +v.payload.forkee.disabled TINYINT Yes false \N NONE +v.payload.forkee.open_issues_count TINYINT Yes false \N NONE +v.payload.forkee.allow_forking TINYINT Yes false \N NONE +v.payload.forkee.is_template TINYINT Yes false \N NONE +v.payload.forkee.web_commit_signoff_required TINYINT Yes false \N NONE +v.payload.forkee.visibility TEXT Yes false \N NONE +v.payload.forkee.forks TINYINT Yes false \N NONE +v.payload.forkee.open_issues TINYINT Yes false \N NONE +v.payload.forkee.watchers TINYINT Yes false \N NONE +v.payload.forkee.default_branch TEXT Yes false \N NONE +v.payload.forkee.public TINYINT Yes false \N NONE +v.public TINYINT Yes false \N NONE +v.created_at TEXT Yes false \N NONE +v.payload.push_id BIGINT Yes false \N NONE +v.payload.size TINYINT Yes false \N NONE +v.payload.distinct_size TINYINT Yes false \N NONE +v.payload.ref TEXT Yes false \N NONE +v.payload.head TEXT Yes false \N NONE +v.payload.before TEXT Yes false \N NONE +v.payload.commits.url ARRAY No false [] NONE +v.payload.commits.sha ARRAY No false [] NONE +v.payload.commits.author.email ARRAY No false [] NONE +v.payload.commits.distinct ARRAY No false [] NONE +v.payload.commits.author.name ARRAY No false [] NONE +v.payload.commits.message ARRAY No false [] NONE +v VARIANT Yes false \N NONE + +-- !sql -- +29 + +-- !sql -- +k BIGINT Yes true \N +v.c JSON Yes false \N NONE +v.cc ARRAY No false [] NONE +v.ccc INT Yes false \N NONE +v.cccc DOUBLE Yes false \N NONE +v.ccccc ARRAY No false [] NONE +v.b JSON Yes false \N NONE +v.bb BIGINT Yes false \N NONE +v VARIANT Yes false \N NONE + +-- !sql -- +14 + diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 63a699da2fb4c0..7ac53fa3f3aef7 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -17,28 +17,56 @@ suite("regression_test_variant", "variant_type"){ // prepare test table - def table_name = "simple_variant_type" - sql "DROP TABLE IF EXISTS ${table_name}" - sql """ + def create_table = { table_name -> + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ CREATE TABLE IF NOT EXISTS ${table_name} ( k bigint, v variant ) DUPLICATE KEY(`k`) DISTRIBUTED BY RANDOM BUCKETS 5 - properties("replication_num" = "1", "disable_auto_compaction" = "true"); + properties("replication_num" = "1", "disable_auto_compaction" = "false"); """ + } + def verify = { table_name -> + sql "sync" + qt_sql """desc ${table_name}""" + qt_sql """select count() from ${table_name}""" + } + + def table_name = "simple_variant" + create_table table_name sql """insert into ${table_name} values (1, '[1]'),(1, '{"a" : 1}');""" - sql """insert into ${table_name} values (1, '[2]'),(1, '{"a" : [[[1]]]}');""" - sql """insert into ${table_name} values (1, '3'),(1, '{"a" : 1}'), (1, '{"a" : [1]}');""" - sql """insert into ${table_name} values (1, '"4"'),(1, '{"a" : "1223"}');""" - sql """insert into ${table_name} values (1, '5.0'),(1, '{"a" : [1]}');""" - sql """insert into ${table_name} values (1, '"[6]"'),(1, '{"a" : ["1", 2, 1.1]}');""" - sql """insert into ${table_name} values (1, '7'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" - sql """insert into ${table_name} values (1, '8.11111'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (1, '"9999"'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (1, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (1, '[123.0]'),(1, '{"a" : 1, "b" : {"c" : 1}}'),(1, '{"a" : 1, "b" : 10}');""" - sql """insert into ${table_name} values (1, '[123.2]'),(1, '{"a" : 1, "b" : 10}'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" - qt_sql "select count() from ${table_name}" + sql """insert into ${table_name} values (2, '[2]'),(1, '{"a" : [[[1]]]}');""" + sql """insert into ${table_name} values (3, '3'),(1, '{"a" : 1}'), (1, '{"a" : [1]}');""" + sql """insert into ${table_name} values (4, '"4"'),(1, '{"a" : "1223"}');""" + sql """insert into ${table_name} values (5, '5.0'),(1, '{"a" : [1]}');""" + sql """insert into ${table_name} values (6, '"[6]"'),(1, '{"a" : ["1", 2, 1.1]}');""" + sql """insert into ${table_name} values (7, '7'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" + sql """insert into ${table_name} values (8, '8.11111'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (9, '"9999"'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (10, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${table_name} values (11, '[123.0]'),(1, '{"a" : 1, "b" : {"c" : 1}}'),(1, '{"a" : 1, "b" : 10}');""" + sql """insert into ${table_name} values (12, '[123.2]'),(1, '{"a" : 1, "b" : 10}'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" + sql """insert into ${table_name} values (13, '{"id":"25060169353","type":"PushEvent","actor":{"id":66998102,"login":"yessine09","display_login":"yessine09","gravatar_id":"","url":"https://api.github.com/users/yessine09","avatar_url":"https://avatars.githubusercontent.com/u/66998102?"},"repo":{"id":558113479,"name":"yessine09/tpAchat","url":"https://api.github.com/repos/yessine09/tpAchat"},"payload":{"push_id":11571720453,"size":1,"distinct_size":1,"ref":"refs/heads/yessine","head":"c6e9675024be85f488a40569ca2d7d5a41d632d4","before":"21d50347d2d11e43e43446584a5d2c14561bd0a2","commits":[{"sha":"c6e9675024be85f488a40569ca2d7d5a41d632d4","author":{"email":"yessine.akaichi@esprit.tn","name":"yessine09"},"message":"test commit","distinct":true,"url":"https://api.github.com/repos/yessine09/tpAchat/commits/c6e9675024be85f488a40569ca2d7d5a41d632d4"}]},"public":true,"created_at":"2022-11-07T00:00:00Z"}'), (2, '{"id":"25060169349","type":"ForkEvent","actor":{"id":104698059,"login":"irina-marzioni-tdf","display_login":"irina-marzioni-tdf","gravatar_id":"","url":"https://api.github.com/users/irina-marzioni-tdf","avatar_url":"https://avatars.githubusercontent.com/u/104698059?"},"repo":{"id":557482773,"name":"dddario-TDF-EDU/ejercicios-2022-10-19-Trabajo-en-grupo","url":"https://api.github.com/repos/dddario-TDF-EDU/ejercicios-2022-10-19-Trabajo-en-grupo"},"payload":{"forkee":{"id":562642534,"node_id":"R_kgDOIYk-Zg","name":"ejercicios-2022-10-19-Trabajo-en-grupo","full_name":"irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo","private":false,"owner":{"login":"irina-marzioni-tdf","id":104698059,"node_id":"U_kgDOBj2Qyw","avatar_url":"https://avatars.githubusercontent.com/u/104698059?v=4","gravatar_id":"","url":"https://api.github.com/users/irina-marzioni-tdf","html_url":"https://github.com/irina-marzioni-tdf","followers_url":"https://api.github.com/users/irina-marzioni-tdf/followers","following_url":"https://api.github.com/users/irina-marzioni-tdf/following{/other_user}","gists_url":"https://api.github.com/users/irina-marzioni-tdf/gists{/gist_id}","starred_url":"https://api.github.com/users/irina-marzioni-tdf/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/irina-marzioni-tdf/subscriptions","organizations_url":"https://api.github.com/users/irina-marzioni-tdf/orgs","repos_url":"https://api.github.com/users/irina-marzioni-tdf/repos","events_url":"https://api.github.com/users/irina-marzioni-tdf/events{/privacy}","received_events_url":"https://api.github.com/users/irina-marzioni-tdf/received_events","type":"User","site_admin":false},"html_url":"https://github.com/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo","description":null,"fork":true,"url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo","forks_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/forks","keys_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/keys{/key_id}","collaborators_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/teams","hooks_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/hooks","issue_events_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/issues/events{/number}","events_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/events","assignees_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/assignees{/user}","branches_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/branches{/branch}","tags_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/tags","blobs_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/git/refs{/sha}","trees_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/git/trees{/sha}","statuses_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/statuses/{sha}","languages_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/languages","stargazers_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/stargazers","contributors_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/contributors","subscribers_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/subscribers","subscription_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/subscription","commits_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/commits{/sha}","git_commits_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/git/commits{/sha}","comments_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/comments{/number}","issue_comment_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/issues/comments{/number}","contents_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/contents/{+path}","compare_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/compare/{base}...{head}","merges_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/merges","archive_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/downloads","issues_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/issues{/number}","pulls_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/pulls{/number}","milestones_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/milestones{/number}","notifications_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/labels{/name}","releases_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/releases{/id}","deployments_url":"https://api.github.com/repos/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo/deployments","created_at":"2022-11-06T23:59:59Z","updated_at":"2022-10-25T19:15:10Z","pushed_at":"2022-10-25T19:35:33Z","git_url":"git://github.com/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo.git","ssh_url":"git@github.com:irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo.git","clone_url":"https://github.com/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo.git","svn_url":"https://github.com/irina-marzioni-tdf/ejercicios-2022-10-19-Trabajo-en-grupo","homepage":null,"size":153,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":0,"watchers":0,"default_branch":"main","public":true}},"public":true,"created_at":"2022-11-07T00:00:00Z"}')""" + verify table_name + + table_name = "type_conflict_resolution" + create_table table_name + sql """insert into ${table_name} values (1, '{"c" : "123"}');""" + sql """insert into ${table_name} values (2, '{"c" : 123}');""" + sql """insert into ${table_name} values (3, '{"cc" : [123]}');""" + sql """insert into ${table_name} values (4, '{"cc" : [123.1]}');""" + sql """insert into ${table_name} values (5, '{"ccc" : 123}');""" + sql """insert into ${table_name} values (6, '{"ccc" : 123321}');""" + sql """insert into ${table_name} values (7, '{"cccc" : 123}');""" + sql """insert into ${table_name} values (8, '{"cccc" : 123.11}');""" + sql """insert into ${table_name} values (9, '{"ccccc" : [123]}');""" + sql """insert into ${table_name} values (10, '{"ccccc" : [123456789]}');""" + sql """insert into ${table_name} values (11, '{"b" : 1111111111111111}');""" + sql """insert into ${table_name} values (12, '{"b" : 1.222222}');""" + sql """insert into ${table_name} values (13, '{"bb" : 1}');""" + sql """insert into ${table_name} values (14, '{"bb" : 214748364711}');""" + verify table_name }