diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_AllStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_AllStripes.orc new file mode 100755 index 00000000000000..3030247ca17543 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_AllStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_HalfStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_HalfStripes.orc new file mode 100755 index 00000000000000..1ae1c1a13d3466 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_HalfStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_AllStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_AllStripes.orc new file mode 100755 index 00000000000000..89ffeef05b8b89 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_AllStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_HalfStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_HalfStripes.orc new file mode 100755 index 00000000000000..3fc798b9474dbb Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_HalfStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_AllStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_AllStripes.orc new file mode 100755 index 00000000000000..b927123b65cfa0 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_AllStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_HalfStripes.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_HalfStripes.orc new file mode 100755 index 00000000000000..3f94bde0678499 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_HalfStripes.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_NONE.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_NONE.orc new file mode 100755 index 00000000000000..1b5e35d6b091ca Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/DwrfStripeCache_NONE.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/before_1582_ts_v2_4.snappy.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/before_1582_ts_v2_4.snappy.orc new file mode 100644 index 00000000000000..af9ef040270ac3 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/before_1582_ts_v2_4.snappy.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/corrupted.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/corrupted.orc new file mode 100644 index 00000000000000..08f7ab951f9948 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/corrupted.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_blob_stream_in_string_dict.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_blob_stream_in_string_dict.orc new file mode 100644 index 00000000000000..1c7f742039e81b Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_blob_stream_in_string_dict.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_length_stream_in_string_dict.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_length_stream_in_string_dict.orc new file mode 100644 index 00000000000000..92912b0ea969b9 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/missing_length_stream_in_string_dict.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/negative_dict_entry_lengths.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/negative_dict_entry_lengths.orc new file mode 100644 index 00000000000000..171537db992bfe Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/negative_dict_entry_lengths.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/stripe_footer_bad_column_encodings.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/stripe_footer_bad_column_encodings.orc new file mode 100644 index 00000000000000..244662391e9dca Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_exception_files/stripe_footer_bad_column_encodings.orc differ diff --git a/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy b/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy new file mode 100644 index 00000000000000..28216febf78bb4 --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_orc_exception_files","external,hive,tvf,external_docker") { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + def hdfsUserName = "doris" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + + if (enabled != null && enabled.equalsIgnoreCase("true")) { + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/corrupted.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Footer is corrupt: STRUCT type 0 has 3 subTypes, but has 2 fieldNames" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_FOOTER_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_BOTH_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_INDEX_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/DwrfStripeCache_NONE.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/before_1582_ts_v2_4.snappy.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = Can't open /usr/share/zoneinfo/PST" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/missing_blob_stream_in_string_dict.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = DICTIONARY_DATA stream not found in StringDictionaryColumn" + } + + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/missing_length_stream_in_string_dict.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = LENGTH stream not found in StringDictionaryColumn" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/negative_dict_entry_lengths.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = Negative dictionary entry length" + } + + test { + uri = "${defaultFS}" + "/user/doris/preinstalled_data/orc_exception_files/stripe_footer_bad_column_encodings.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = bad StripeFooter from zlib" + } + } +} \ No newline at end of file