diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp index 57f1f54b7b91f5..2fb0afea82ae8a 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp @@ -66,7 +66,9 @@ ColumnPtr PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s src_physical_type = tparquet::Type::INT32; src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT); } - if (is_consistent() && _logical_converter->is_consistent()) { + + if (!_convert_params->is_type_compatibility && is_consistent() && + _logical_converter->is_consistent()) { if (_cached_src_physical_type == nullptr) { _cached_src_physical_type = DataTypeFactory::instance().create_data_type( src_logical_type, dst_logical_type->is_nullable()); @@ -246,7 +248,19 @@ std::unique_ptr PhysicalToLogicalConverter::get_conv } PrimitiveType src_logical_primitive = src_logical_type.type; - if (is_parquet_native_type(src_logical_primitive)) { + if (field_schema->is_type_compatibility) { + if (src_logical_type == TYPE_SMALLINT) { + physical_converter.reset(new UnsignedIntegerConverter()); + } else if (src_logical_type == TYPE_INT) { + physical_converter.reset(new UnsignedIntegerConverter()); + } else if (src_logical_type == TYPE_BIGINT) { + physical_converter.reset(new UnsignedIntegerConverter()); + } else if (src_logical_type == TYPE_LARGEINT) { + physical_converter.reset(new UnsignedIntegerConverter()); + } else { + physical_converter.reset(new UnsupportedConverter(src_physical_type, src_logical_type)); + } + } else if (is_parquet_native_type(src_logical_primitive)) { if (is_string_type(src_logical_primitive) && src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) { // for FixedSizeBinary diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 551bf7e14edbc8..91b81121aa4303 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -40,6 +40,9 @@ struct ConvertParams { DecimalScaleParams decimal_scale; FieldSchema* field_schema = nullptr; + //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. + bool is_type_compatibility = false; + /** * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have * logicalType or converted_type to indicates its precision. We have to reset the time mask. @@ -108,6 +111,7 @@ struct ConvertParams { t.from_unixtime(0, *ctz); offset_days = t.day() == 31 ? -1 : 0; } + is_type_compatibility = field_schema_->is_type_compatibility; } template @@ -273,6 +277,67 @@ class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { } }; +template +struct UnsignedTypeTraits; + +template <> +struct UnsignedTypeTraits { + using UnsignedCppType = UInt8; + //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers + //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) + //must annotate an int64 primitive type. + using StorageCppType = Int32; + using StorageColumnType = vectorized::ColumnInt32; +}; + +template <> +struct UnsignedTypeTraits { + using UnsignedCppType = UInt16; + using StorageCppType = Int32; + using StorageColumnType = vectorized::ColumnInt32; +}; + +template <> +struct UnsignedTypeTraits { + using UnsignedCppType = UInt32; + using StorageCppType = Int32; + using StorageColumnType = vectorized::ColumnInt32; +}; + +template <> +struct UnsignedTypeTraits { + using UnsignedCppType = UInt64; + using StorageCppType = Int64; + using StorageColumnType = vectorized::ColumnInt64; +}; + +template +class UnsignedIntegerConverter : public PhysicalToLogicalConverter { + Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { + using UnsignedCppType = typename UnsignedTypeTraits::UnsignedCppType; + using StorageCppType = typename UnsignedTypeTraits::StorageCppType; + using StorageColumnType = typename UnsignedTypeTraits::StorageColumnType; + using DstColumnType = typename PrimitiveTypeTraits::ColumnType; + + ColumnPtr from_col = remove_nullable(src_physical_col); + MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + auto& src_data = static_cast(from_col.get())->get_data(); + + size_t rows = src_data.size(); + size_t start_idx = to_col->size(); + to_col->resize(start_idx + rows); + auto& data = static_cast(*to_col.get()).get_data(); + + for (int i = 0; i < rows; i++) { + StorageCppType src_value = src_data[i]; + auto unsigned_value = static_cast(src_value); + data[start_idx + i] = unsigned_value; + } + + return Status::OK(); + } +}; + class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { private: int _type_length; diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index 08692de8743c06..9097b65718f53d 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -191,16 +191,19 @@ void FieldDescriptor::parse_physical_field(const tparquet::SchemaElement& physic physical_field->physical_type = physical_schema.type; _physical_fields.push_back(physical_field); physical_field->physical_column_index = _physical_fields.size() - 1; - physical_field->type = get_doris_type(physical_schema); + auto type = get_doris_type(physical_schema); + physical_field->type = type.first; + physical_field->is_type_compatibility = type.second; } -TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& physical_schema) { - TypeDescriptor type; - type.type = INVALID_TYPE; +std::pair FieldDescriptor::get_doris_type( + const tparquet::SchemaElement& physical_schema) { + std::pair ans = {INVALID_TYPE, false}; + TypeDescriptor& type = ans.first; if (physical_schema.__isset.logicalType) { - type = convert_to_doris_type(physical_schema.logicalType); + ans = convert_to_doris_type(physical_schema.logicalType); } else if (physical_schema.__isset.converted_type) { - type = convert_to_doris_type(physical_schema); + ans = convert_to_doris_type(physical_schema); } // use physical type instead if (type.type == INVALID_TYPE) { @@ -233,7 +236,7 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph break; } } - return type; + return ans; } // Copy from org.apache.iceberg.avro.AvroSchemaUtil#validAvroName @@ -302,8 +305,11 @@ void FieldDescriptor::iceberg_sanitize(const std::vector& read_colu } } -TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) { - TypeDescriptor type; +std::pair FieldDescriptor::convert_to_doris_type( + tparquet::LogicalType logicalType) { + std::pair ans = {INVALID_TYPE, false}; + TypeDescriptor& type = ans.first; + bool& is_type_compatibility = ans.second; if (logicalType.__isset.STRING) { type = TypeDescriptor(TYPE_STRING); } else if (logicalType.__isset.DECIMAL) { @@ -313,16 +319,25 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi type = TypeDescriptor(TYPE_DATEV2); } else if (logicalType.__isset.INTEGER) { if (logicalType.INTEGER.isSigned) { - if (logicalType.INTEGER.bitWidth <= 32) { + if (logicalType.INTEGER.bitWidth <= 8) { + type = TypeDescriptor(TYPE_TINYINT); + } else if (logicalType.INTEGER.bitWidth <= 16) { + type = TypeDescriptor(TYPE_SMALLINT); + } else if (logicalType.INTEGER.bitWidth <= 32) { type = TypeDescriptor(TYPE_INT); } else { type = TypeDescriptor(TYPE_BIGINT); } } else { - if (logicalType.INTEGER.bitWidth <= 16) { + is_type_compatibility = true; + if (logicalType.INTEGER.bitWidth <= 8) { + type = TypeDescriptor(TYPE_SMALLINT); + } else if (logicalType.INTEGER.bitWidth <= 16) { type = TypeDescriptor(TYPE_INT); - } else { + } else if (logicalType.INTEGER.bitWidth <= 32) { type = TypeDescriptor(TYPE_BIGINT); + } else { + type = TypeDescriptor(TYPE_LARGEINT); } } } else if (logicalType.__isset.TIME) { @@ -344,12 +359,14 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi } else { type = TypeDescriptor(INVALID_TYPE); } - return type; + return ans; } -TypeDescriptor FieldDescriptor::convert_to_doris_type( +std::pair FieldDescriptor::convert_to_doris_type( const tparquet::SchemaElement& physical_schema) { - TypeDescriptor type; + std::pair ans = {INVALID_TYPE, false}; + TypeDescriptor& type = ans.first; + bool& is_type_compatibility = ans.second; switch (physical_schema.converted_type) { case tparquet::ConvertedType::type::UTF8: type = TypeDescriptor(TYPE_STRING); @@ -378,28 +395,33 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type( type = TypeDescriptor(TYPE_TINYINT); break; case tparquet::ConvertedType::type::UINT_8: + is_type_compatibility = true; [[fallthrough]]; case tparquet::ConvertedType::type::INT_16: type = TypeDescriptor(TYPE_SMALLINT); break; case tparquet::ConvertedType::type::UINT_16: + is_type_compatibility = true; [[fallthrough]]; case tparquet::ConvertedType::type::INT_32: type = TypeDescriptor(TYPE_INT); break; case tparquet::ConvertedType::type::UINT_32: - [[fallthrough]]; - case tparquet::ConvertedType::type::UINT_64: + is_type_compatibility = true; [[fallthrough]]; case tparquet::ConvertedType::type::INT_64: type = TypeDescriptor(TYPE_BIGINT); break; + case tparquet::ConvertedType::type::UINT_64: + is_type_compatibility = true; + type = TypeDescriptor(TYPE_LARGEINT); + break; default: LOG(WARNING) << "Not supported parquet ConvertedType: " << physical_schema.converted_type; type = TypeDescriptor(INVALID_TYPE); break; } - return type; + return ans; } Status FieldDescriptor::parse_group_field(const std::vector& t_schemas, diff --git a/be/src/vec/exec/format/parquet/schema_desc.h b/be/src/vec/exec/format/parquet/schema_desc.h index 50e526bd7300a2..ca726ef1b57590 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.h +++ b/be/src/vec/exec/format/parquet/schema_desc.h @@ -49,6 +49,9 @@ struct FieldSchema { int16_t repeated_parent_def_level = 0; std::vector children; + //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. + bool is_type_compatibility = false; + FieldSchema() = default; ~FieldSchema() = default; FieldSchema(const FieldSchema& fieldSchema) = default; @@ -84,12 +87,13 @@ class FieldDescriptor { Status parse_node_field(const std::vector& t_schemas, size_t curr_pos, FieldSchema* node_field); - TypeDescriptor convert_to_doris_type(tparquet::LogicalType logicalType); + std::pair convert_to_doris_type(tparquet::LogicalType logicalType); - TypeDescriptor convert_to_doris_type(const tparquet::SchemaElement& physical_schema); + std::pair convert_to_doris_type( + const tparquet::SchemaElement& physical_schema); public: - TypeDescriptor get_doris_type(const tparquet::SchemaElement& physical_schema); + std::pair get_doris_type(const tparquet::SchemaElement& physical_schema); // org.apache.iceberg.avro.AvroSchemaUtil#sanitize will encode special characters, // we have to decode these characters diff --git a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out index 406bc7660ffd68..5748cedd2289c5 100644 --- a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out +++ b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out @@ -100,12 +100,12 @@ id int Yes false \N NONE 9 1 string 27 false 5 true 1 -- !desc_s3 -- -__add_5 int Yes false \N NONE -__bit_or_7 int Yes false \N NONE +__add_5 smallint Yes false \N NONE +__bit_or_7 tinyint Yes false \N NONE __cast_3 bigint Yes false \N NONE __greater_than_4 boolean Yes false \N NONE __in_predicate_6 boolean Yes false \N NONE -__literal_1 int Yes false \N NONE +__literal_1 tinyint Yes false \N NONE __literal_2 text Yes false \N NONE id int Yes false \N NONE diff --git a/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out new file mode 100644 index 00000000000000..4cda2746a00b37 --- /dev/null +++ b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out @@ -0,0 +1,440 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_1 -- +0 254 54979 3876010132 12292188520939801104 +1 194 41087 2468877118 11276751473207154134 +2 204 40618 3112417582 12531644712494408881 +3 252 42885 2959241092 12524633273560617315 +4 168 34898 3576407414 13096188234563161622 +5 252 39188 3127662355 16817880057727309281 +6 146 45695 2457180674 13258241842726882717 +7 223 54096 3818049848 13392705476834798352 +8 247 49103 4235043353 14882039847048390015 +9 171 37711 4121304297 12690575475523254418 +10 208 45522 2849886325 9887016810088790216 +11 140 57808 4249890046 11248669065541052626 +12 154 63353 2494834510 10992021804074820099 +13 169 62034 3442632722 13741258842709060503 +14 201 39884 2544963866 13079528300040741505 +15 208 48887 2865890129 15763071531082484590 +16 200 58596 2907876388 13993629507485604147 +17 210 50277 3777672898 9348401130412952441 +18 212 51965 3618857131 16174811648348878672 +19 161 39818 2368776725 15511547001336881917 +20 160 41750 2980821608 11023149785626200227 +21 141 57340 4284126112 15552326510762976216 +22 139 58391 4117149789 15878585387857525976 +23 212 59593 2459376464 15173162289502083954 +24 165 53957 3427228999 17619046273898992624 +25 229 49948 2600007878 15455673680657024177 +26 134 64599 3738904119 16329279862825806683 +27 205 41073 2442068534 14375087414295500534 +28 204 60681 3422219729 18264453353631472102 +29 136 47361 3588730082 13238158010948418606 +30 252 42761 4175633691 15734976010511914446 +31 150 45199 4078820644 15090555688768508964 +32 210 57428 4009891336 16805687728956091249 +33 235 34379 3963852622 13291314989308428602 +34 226 33850 3643451160 15146799101548124057 +35 133 54253 2734990478 12652028700805136484 +36 166 61697 3958278249 9924263812189602832 +37 137 55610 2782254392 18017977508881163787 +38 166 41727 3012921589 12285785983985756200 +39 194 46987 2351267776 18191890018150428322 +40 201 61160 3877183539 17825711874965469887 +41 209 54646 3739634163 12447543494188025837 +42 157 65086 2250645811 15257822118590463416 +43 223 56812 4101782180 9308776360130037320 +44 135 61061 2806848998 15345311951702932353 +45 147 39862 3521569945 17679765098696012133 +46 229 38351 2299955463 11589155413771055105 +47 141 49268 2740241757 16646921996087125692 +48 144 53152 3733884127 12946127273932789697 +49 200 48517 4024062219 15103440093398422258 + +-- !test_2 -- +id int Yes false \N NONE +uint8_column smallint Yes false \N NONE +uint16_column int Yes false \N NONE +uint32_column bigint Yes false \N NONE +uint64_column largeint Yes false \N NONE + +-- !test_3 -- +id int Yes false \N NONE +uint8_column smallint Yes false \N NONE +uint16_column int Yes false \N NONE +uint32_column bigint Yes false \N NONE +uint64_column largeint Yes false \N NONE + +-- !test_4 -- +id int Yes false \N NONE +uint8_column smallint Yes false \N NONE +uint16_column int Yes false \N NONE +uint32_column bigint Yes false \N NONE +uint64_column largeint Yes false \N NONE + +-- !test_5 -- +0 200 48438 3775964178 9957179629640951554 +1 197 37330 2548711656 11127988488306710308 +2 200 65117 3419624570 11652168873218179696 +3 223 39174 3669818470 17675778338328486831 +4 196 60825 3064802389 18435946529637287047 +5 148 58165 3414732886 10219427643252484377 +6 217 61318 4075782121 10508828167797335037 +7 185 46040 2798591473 16764958584953558745 +8 148 50700 2541953946 11228894632595899400 +9 235 41262 3398155979 14622381114650660637 +10 189 49552 3754066331 16077913453789464999 +11 138 41102 2488033055 11226600460306403745 +12 132 62435 3745892606 17429590483490742063 +13 164 34858 3380810793 15447251686769296941 +14 246 44518 4213847006 13168159509128150529 +15 128 34475 2584892403 14470296482506793549 +16 180 40732 2667143993 13453719924509014943 +17 206 49824 3079026622 9242535560261271930 +18 250 61860 3723105814 9754643556067415401 +19 248 51724 4229819070 15295668751829551718 +20 237 38710 2951615403 14702842917502593482 +21 133 46707 2464573031 15860122391678970590 +22 249 41930 4210407904 12677893784378713520 +23 218 34250 3657784995 14768434928936445583 +24 134 52064 4274319068 16672757830801563734 +25 179 54232 3587434213 10613547124477746521 +26 217 63838 2348239122 17998346098073086386 +27 190 52362 3442840997 13122160861538572795 +28 131 39957 2321439682 16792774292797573856 +29 143 50902 2914375790 10557924491128546903 +30 228 33365 2438054546 14302876531585763284 +31 251 59126 2308219390 12753986538521770387 +32 197 58579 2647214662 16916351783057256258 +33 246 62028 4202894981 16869494938800942234 +34 229 47800 3963077237 12000510020655611310 +35 195 43221 2980563838 17324881735239531242 +36 210 46389 3302751013 10936691069329977133 +37 190 60582 4294458835 13465474203101539625 +38 234 58068 2575498858 18264769986785189204 +39 244 55282 2242973260 14725356984149511589 +40 224 33882 3169401634 9641824149700693760 +41 210 48824 2805949235 17877575698845246971 +42 250 49871 4015422133 11571502995585068959 +43 253 53173 3315146396 15527276320077174431 +44 205 48824 2153923483 18227485227719579199 +45 202 65101 2286014232 10596463304445669324 +46 144 39272 3664679383 14619550616500589262 +47 207 58788 2459180916 15787272910593406395 +48 236 61302 2814354943 15797302386492157450 +49 222 44025 2267428102 16226182608093468653 + +-- !test_6 -- +0 254 65535 4294967294 18446744073709551614 +1 254 65534 4294967295 18446744073709551615 +2 254 65535 4294967294 18446744073709551615 +3 255 65535 4294967294 18446744073709551615 +4 255 65534 4294967295 18446744073709551615 +5 254 65534 4294967295 18446744073709551614 +6 254 65534 4294967294 18446744073709551614 +7 255 65534 4294967294 18446744073709551614 +8 255 65534 4294967295 18446744073709551614 +9 255 65535 4294967294 18446744073709551614 + +-- !test_7 -- +id bigint Yes false \N NONE +mediumint_unsigned bigint Yes false \N NONE +int_unsigned bigint Yes false \N NONE + +-- !test_8 -- +1 111 111 +2 222 222 + +-- !test_9 -- +16 200 58596 2907876388 13993629507485604147 +49 200 48517 4024062219 15103440093398422258 + +-- !test_10 -- +38 166 41727 3012921589 12285785983985756200 + +-- !test_11 -- +46 229 38351 2299955463 11589155413771055105 + +-- !test_12 -- +49 200 48517 4024062219 15103440093398422258 + +-- !test_13 -- +49 222 44025 2267428102 16226182608093468653 + +-- !test_14 -- +38 234 58068 2575498858 18264769986785189204 + +-- !test_15 -- +14 246 44518 4213847006 13168159509128150529 + +-- !test_16 -- +25 179 54232 3587434213 10613547124477746521 + +-- !test_17 -- +500 + +-- !test_18 -- +500 + +-- !test_19 -- +500 + +-- !test_20 -- +500 + +-- !test_21 -- +500 + +-- !test_22 -- +0 254 65535 4294967294 18446744073709551614 +5 254 65534 4294967295 18446744073709551614 +6 254 65534 4294967294 18446744073709551614 +7 255 65534 4294967294 18446744073709551614 +8 255 65534 4294967295 18446744073709551614 +9 255 65535 4294967294 18446744073709551614 +10 255 65535 4294967294 18446744073709551614 +11 254 65535 4294967294 18446744073709551614 +12 255 65535 4294967294 18446744073709551614 +14 254 65535 4294967295 18446744073709551614 +16 255 65535 4294967295 18446744073709551614 +17 255 65535 4294967294 18446744073709551614 +18 255 65534 4294967295 18446744073709551614 +20 255 65535 4294967294 18446744073709551614 +21 255 65534 4294967294 18446744073709551614 +22 255 65534 4294967294 18446744073709551614 +24 255 65534 4294967294 18446744073709551614 +25 254 65535 4294967295 18446744073709551614 +27 255 65534 4294967294 18446744073709551614 +29 255 65534 4294967295 18446744073709551614 +30 255 65534 4294967295 18446744073709551614 +32 254 65534 4294967294 18446744073709551614 +37 254 65534 4294967295 18446744073709551614 +38 254 65535 4294967294 18446744073709551614 +40 255 65535 4294967294 18446744073709551614 +41 255 65534 4294967294 18446744073709551614 +43 254 65534 4294967295 18446744073709551614 +44 254 65534 4294967294 18446744073709551614 +45 255 65534 4294967295 18446744073709551614 +46 254 65534 4294967295 18446744073709551614 +53 254 65534 4294967294 18446744073709551614 +56 254 65535 4294967294 18446744073709551614 +58 255 65534 4294967295 18446744073709551614 +60 254 65535 4294967295 18446744073709551614 +62 255 65534 4294967295 18446744073709551614 +63 255 65535 4294967294 18446744073709551614 +66 255 65534 4294967295 18446744073709551614 +68 254 65535 4294967295 18446744073709551614 +71 254 65535 4294967295 18446744073709551614 +73 254 65535 4294967295 18446744073709551614 +75 254 65534 4294967294 18446744073709551614 +76 255 65534 4294967294 18446744073709551614 +77 254 65535 4294967295 18446744073709551614 +80 254 65534 4294967295 18446744073709551614 +81 255 65534 4294967294 18446744073709551614 +84 255 65534 4294967295 18446744073709551614 +86 255 65535 4294967295 18446744073709551614 +87 254 65534 4294967295 18446744073709551614 +89 255 65535 4294967294 18446744073709551614 +91 254 65534 4294967294 18446744073709551614 +92 255 65534 4294967294 18446744073709551614 +94 254 65535 4294967294 18446744073709551614 +98 255 65534 4294967295 18446744073709551614 +99 254 65535 4294967294 18446744073709551614 +100 255 65534 4294967295 18446744073709551614 +102 255 65534 4294967295 18446744073709551614 +103 255 65534 4294967295 18446744073709551614 +106 254 65534 4294967295 18446744073709551614 +108 254 65535 4294967295 18446744073709551614 +109 255 65535 4294967294 18446744073709551614 +110 254 65534 4294967295 18446744073709551614 +112 255 65535 4294967295 18446744073709551614 +115 255 65534 4294967295 18446744073709551614 +117 255 65535 4294967294 18446744073709551614 +120 254 65535 4294967294 18446744073709551614 +121 254 65535 4294967295 18446744073709551614 +123 254 65535 4294967294 18446744073709551614 +124 254 65535 4294967295 18446744073709551614 +130 255 65534 4294967294 18446744073709551614 +131 254 65535 4294967295 18446744073709551614 +132 254 65535 4294967294 18446744073709551614 +133 254 65534 4294967295 18446744073709551614 +139 254 65535 4294967295 18446744073709551614 +141 255 65535 4294967295 18446744073709551614 +148 255 65535 4294967294 18446744073709551614 +149 254 65534 4294967295 18446744073709551614 +150 254 65534 4294967295 18446744073709551614 +151 254 65534 4294967294 18446744073709551614 +152 254 65535 4294967295 18446744073709551614 +157 255 65534 4294967295 18446744073709551614 +158 255 65535 4294967294 18446744073709551614 +167 255 65534 4294967295 18446744073709551614 +168 254 65535 4294967294 18446744073709551614 +169 254 65534 4294967294 18446744073709551614 +173 255 65534 4294967294 18446744073709551614 +174 254 65535 4294967295 18446744073709551614 +177 254 65535 4294967295 18446744073709551614 +184 255 65535 4294967295 18446744073709551614 +186 254 65535 4294967295 18446744073709551614 +187 255 65535 4294967295 18446744073709551614 +189 254 65534 4294967295 18446744073709551614 +190 254 65535 4294967295 18446744073709551614 +191 254 65535 4294967294 18446744073709551614 +192 254 65535 4294967294 18446744073709551614 +194 255 65534 4294967294 18446744073709551614 +195 255 65534 4294967295 18446744073709551614 +196 254 65535 4294967295 18446744073709551614 +197 255 65535 4294967295 18446744073709551614 +204 255 65535 4294967295 18446744073709551614 +207 255 65535 4294967295 18446744073709551614 +210 255 65535 4294967294 18446744073709551614 +211 255 65535 4294967294 18446744073709551614 +213 254 65534 4294967295 18446744073709551614 +214 255 65534 4294967295 18446744073709551614 +216 254 65535 4294967295 18446744073709551614 +217 255 65535 4294967295 18446744073709551614 +222 255 65534 4294967295 18446744073709551614 +226 255 65535 4294967294 18446744073709551614 +227 254 65534 4294967294 18446744073709551614 +228 254 65535 4294967295 18446744073709551614 +229 255 65535 4294967294 18446744073709551614 +231 254 65534 4294967294 18446744073709551614 +232 254 65534 4294967294 18446744073709551614 +233 255 65535 4294967295 18446744073709551614 +240 255 65534 4294967295 18446744073709551614 +241 254 65535 4294967294 18446744073709551614 +243 254 65534 4294967294 18446744073709551614 +244 255 65534 4294967295 18446744073709551614 +246 255 65534 4294967294 18446744073709551614 +248 255 65535 4294967295 18446744073709551614 +251 254 65535 4294967294 18446744073709551614 +253 255 65534 4294967294 18446744073709551614 +256 255 65534 4294967294 18446744073709551614 +257 255 65535 4294967294 18446744073709551614 +258 254 65534 4294967294 18446744073709551614 +261 254 65535 4294967295 18446744073709551614 +262 254 65535 4294967295 18446744073709551614 +264 255 65534 4294967294 18446744073709551614 +266 254 65535 4294967295 18446744073709551614 +267 255 65534 4294967294 18446744073709551614 +269 254 65535 4294967294 18446744073709551614 +270 255 65534 4294967294 18446744073709551614 +273 254 65535 4294967294 18446744073709551614 +279 255 65535 4294967295 18446744073709551614 +281 254 65534 4294967294 18446744073709551614 +283 255 65534 4294967295 18446744073709551614 +285 254 65535 4294967294 18446744073709551614 +287 255 65535 4294967294 18446744073709551614 +288 255 65534 4294967294 18446744073709551614 +290 254 65534 4294967295 18446744073709551614 +291 255 65535 4294967295 18446744073709551614 +292 255 65534 4294967295 18446744073709551614 +295 254 65535 4294967294 18446744073709551614 +296 255 65534 4294967295 18446744073709551614 +298 255 65535 4294967294 18446744073709551614 +301 255 65534 4294967295 18446744073709551614 +302 254 65534 4294967294 18446744073709551614 +307 254 65535 4294967294 18446744073709551614 +308 254 65535 4294967294 18446744073709551614 +309 254 65535 4294967295 18446744073709551614 +313 254 65534 4294967295 18446744073709551614 +317 254 65534 4294967294 18446744073709551614 +319 255 65535 4294967295 18446744073709551614 +320 254 65535 4294967295 18446744073709551614 +321 254 65535 4294967295 18446744073709551614 +322 254 65535 4294967295 18446744073709551614 +325 255 65535 4294967295 18446744073709551614 +326 254 65534 4294967295 18446744073709551614 +327 255 65535 4294967295 18446744073709551614 +328 255 65534 4294967294 18446744073709551614 +331 254 65535 4294967294 18446744073709551614 +332 255 65535 4294967294 18446744073709551614 +334 255 65534 4294967294 18446744073709551614 +336 254 65535 4294967295 18446744073709551614 +337 254 65535 4294967295 18446744073709551614 +339 255 65535 4294967295 18446744073709551614 +341 254 65535 4294967294 18446744073709551614 +342 255 65534 4294967294 18446744073709551614 +345 254 65534 4294967294 18446744073709551614 +346 255 65535 4294967294 18446744073709551614 +347 255 65534 4294967295 18446744073709551614 +349 255 65534 4294967295 18446744073709551614 +350 254 65535 4294967295 18446744073709551614 +353 255 65534 4294967294 18446744073709551614 +355 255 65535 4294967294 18446744073709551614 +356 254 65535 4294967295 18446744073709551614 +357 254 65535 4294967295 18446744073709551614 +361 254 65535 4294967295 18446744073709551614 +363 254 65535 4294967295 18446744073709551614 +367 255 65535 4294967294 18446744073709551614 +368 254 65534 4294967295 18446744073709551614 +372 254 65535 4294967294 18446744073709551614 +373 255 65535 4294967294 18446744073709551614 +376 255 65535 4294967294 18446744073709551614 +377 254 65535 4294967295 18446744073709551614 +379 255 65535 4294967294 18446744073709551614 +382 254 65534 4294967295 18446744073709551614 +385 254 65535 4294967294 18446744073709551614 +389 254 65534 4294967294 18446744073709551614 +390 255 65535 4294967295 18446744073709551614 +391 254 65535 4294967294 18446744073709551614 +393 255 65534 4294967295 18446744073709551614 +395 254 65535 4294967294 18446744073709551614 +396 254 65534 4294967294 18446744073709551614 +398 254 65535 4294967294 18446744073709551614 +400 254 65534 4294967294 18446744073709551614 +401 254 65534 4294967294 18446744073709551614 +402 255 65534 4294967295 18446744073709551614 +403 254 65534 4294967295 18446744073709551614 +408 255 65535 4294967294 18446744073709551614 +409 254 65534 4294967295 18446744073709551614 +410 254 65534 4294967294 18446744073709551614 +411 255 65535 4294967294 18446744073709551614 +412 254 65535 4294967294 18446744073709551614 +413 255 65534 4294967294 18446744073709551614 +415 254 65534 4294967294 18446744073709551614 +417 254 65534 4294967295 18446744073709551614 +418 254 65535 4294967294 18446744073709551614 +420 255 65535 4294967294 18446744073709551614 +424 255 65535 4294967294 18446744073709551614 +426 254 65535 4294967295 18446744073709551614 +430 255 65535 4294967294 18446744073709551614 +431 255 65534 4294967295 18446744073709551614 +432 255 65535 4294967294 18446744073709551614 +434 254 65535 4294967294 18446744073709551614 +440 255 65534 4294967294 18446744073709551614 +441 254 65535 4294967294 18446744073709551614 +443 254 65535 4294967295 18446744073709551614 +444 255 65535 4294967295 18446744073709551614 +450 254 65534 4294967294 18446744073709551614 +454 255 65534 4294967294 18446744073709551614 +456 255 65535 4294967294 18446744073709551614 +458 255 65535 4294967295 18446744073709551614 +459 254 65534 4294967294 18446744073709551614 +461 255 65535 4294967294 18446744073709551614 +462 255 65535 4294967295 18446744073709551614 +463 254 65534 4294967294 18446744073709551614 +464 255 65534 4294967295 18446744073709551614 +465 254 65535 4294967294 18446744073709551614 +466 255 65534 4294967295 18446744073709551614 +468 254 65534 4294967295 18446744073709551614 +470 254 65535 4294967294 18446744073709551614 +471 255 65534 4294967294 18446744073709551614 +472 254 65535 4294967295 18446744073709551614 +473 255 65534 4294967295 18446744073709551614 +474 255 65535 4294967295 18446744073709551614 +475 255 65535 4294967294 18446744073709551614 +477 254 65534 4294967294 18446744073709551614 +480 255 65535 4294967295 18446744073709551614 +483 254 65535 4294967294 18446744073709551614 +485 255 65535 4294967294 18446744073709551614 +486 254 65534 4294967294 18446744073709551614 +487 254 65534 4294967295 18446744073709551614 +488 255 65535 4294967295 18446744073709551614 +489 254 65535 4294967295 18446744073709551614 +490 255 65535 4294967294 18446744073709551614 +494 254 65534 4294967294 18446744073709551614 +495 255 65534 4294967295 18446744073709551614 +496 254 65534 4294967295 18446744073709551614 +497 255 65535 4294967294 18446744073709551614 +499 255 65534 4294967295 18446744073709551614 + diff --git a/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet new file mode 100644 index 00000000000000..8f87903255ff3b Binary files /dev/null and b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet differ diff --git a/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet new file mode 100644 index 00000000000000..fe48ab71842d17 Binary files /dev/null and b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet differ diff --git a/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet new file mode 100644 index 00000000000000..ff8dae5ecece17 Binary files /dev/null and b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet differ diff --git a/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet new file mode 100644 index 00000000000000..010d15497ec96f Binary files /dev/null and b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet differ diff --git a/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy new file mode 100644 index 00000000000000..24cfb5f2ac207c --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy @@ -0,0 +1,102 @@ +import org.junit.Assert + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This suit test the `backends` tvf +suite("test_local_tvf_parquet_unsigned_integers", "p0") { + List> backends = sql """ show backends """ + def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/" + + assertTrue(backends.size() > 0) + + def be_id = backends[0][0] + // cluster mode need to make sure all be has this data + def outFilePath="/" + def transFile01="${dataFilePath}/unsigned_integers_1.parquet" + def transFile02="${dataFilePath}/unsigned_integers_2.parquet" + def transFile03="${dataFilePath}/unsigned_integers_3.parquet" + def transFile04="${dataFilePath}/unsigned_integers_4.parquet" + + for (List backend : backends) { + def be_host = backend[1] + scpFiles ("root", be_host, transFile01, outFilePath, false); + scpFiles ("root", be_host, transFile02, outFilePath, false); + scpFiles ("root", be_host, transFile03, outFilePath, false); + scpFiles ("root", be_host, transFile04, outFilePath, false); + } + + def file1 = outFilePath + "unsigned_integers_1.parquet"; + def file2 = outFilePath + "unsigned_integers_2.parquet"; + def file3 = outFilePath + "unsigned_integers_3.parquet"; + def file4 = outFilePath + "unsigned_integers_4.parquet"; + + + + + qt_test_1 """ select * from local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet") order by id ;""" + + qt_test_2 """ desc function local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet");""" + + qt_test_3 """ desc function local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet");""" + + qt_test_4 """ desc function local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet");""" + + qt_test_5 """ select * from local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet") order by id ;""" + + qt_test_6 """ select * from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") order by id limit 10;""" + + qt_test_7 """ desc function local( "file_path" = "${file4}", "backend_id" = "${be_id}", "format" = "parquet");""" + + qt_test_8 """ select * from local( "file_path" = "${file4}", "backend_id" = "${be_id}", "format" = "parquet") order by id ;""" + + + + qt_test_9 """ select * from local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet") where uint8_column = 200 order by id ;""" + + qt_test_10 """ select * from local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet") where uint16_column = 41727 order by id ;""" + + qt_test_11 """ select * from local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet") where uint32_column = 2299955463 order by id ;""" + + qt_test_12 """ select * from local( "file_path" = "${file1}", "backend_id" = "${be_id}", "format" = "parquet") where uint64_column = 15103440093398422258 order by id ;""" + + + + qt_test_13 """ select * from local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet") where uint8_column = 222 order by id ;""" + + qt_test_14 """ select * from local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet") where uint16_column = 58068 order by id ;""" + + qt_test_15 """ select * from local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet") where uint32_column = 4213847006 order by id ;""" + + qt_test_16 """ select * from local( "file_path" = "${file2}", "backend_id" = "${be_id}", "format" = "parquet") where uint64_column = 10613547124477746521 order by id ;""" + + + qt_test_17 """ select count(id) from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;""" + + qt_test_18 """ select count(uint8_column) from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;""" + + qt_test_19 """ select count(uint16_column) from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;""" + + qt_test_20 """ select count(uint32_column) from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;""" + + qt_test_21 """ select count(uint64_column) from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;""" + + qt_test_22 """ select * from local( "file_path" = "${file3}", "backend_id" = "${be_id}", "format" = "parquet") where uint64_column = 18446744073709551614 order by id ;""" + + + +}