From b08803aa8943d005f889f15234fddd271772d5f6 Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 16 May 2024 23:08:19 +0800 Subject: [PATCH] Update test and err message --- cpp/src/parquet/column_reader.cc | 2 +- cpp/src/parquet/column_reader_test.cc | 55 ++++++++------------------- cpp/submodules/parquet-testing | 2 +- 3 files changed, 18 insertions(+), 41 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index a048aee9edfc1..a053971f95624 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -103,7 +103,7 @@ inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) { } constexpr std::string_view kErrorRepDefLevelNotMatchesNumValues = - "Number of decoded rep / def levels did less than num_values in page_header"; + "Number of decoded rep / def levels did more or less than num_values in page_header"; constexpr std::string_view kErrorRepDefLevelInEqual = "Number of decoded rep / def levels did not match"; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 4c51a6e2cea1c..998f1c37be652 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -437,17 +437,16 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { TEST_F(TestPrimitiveReader, DefLevelNotExpected) { max_def_level_ = 1; max_rep_level_ = 0; - std::vector values(1, false); - // storing def-levels less than value in page-header - { - std::vector input_def_levels(1, 1); + + auto do_check = [&](const std::vector& input_def_levels, int num_values) { + std::vector values(num_values, false); NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); // The data page falls back to plain encoding std::shared_ptr dummy = AllocateBuffer(); std::shared_ptr data_page = MakeDataPage( - &descr, values, /*num_values=*/3, Encoding::PLAIN, /*indices=*/{}, + &descr, values, /*num_values=*/num_values, Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, /*rep_levels=*/{}, /*max_rep_level=*/max_rep_level_); @@ -456,7 +455,7 @@ TEST_F(TestPrimitiveReader, DefLevelNotExpected) { auto reader = static_cast(reader_.get()); ASSERT_TRUE(reader->HasNext()); - constexpr int batch_size = 3; + constexpr int batch_size = 10; std::vector def_levels(batch_size, 0); std::vector rep_levels(batch_size, 0); bool values_out[batch_size]; @@ -467,42 +466,20 @@ TEST_F(TestPrimitiveReader, DefLevelNotExpected) { &values_read); }, ParquetException, - ::testing::Property(&ParquetException::what, - ::testing::HasSubstr("Number of decoded rep / def levels did " - "less than num_values in page_header"))); + ::testing::Property( + &ParquetException::what, + ::testing::HasSubstr("Number of decoded rep / def levels did " + "more or less than num_values in page_header"))); + }; + // storing def-levels less than value in page-header + { + std::vector input_def_levels(1, 1); + do_check(input_def_levels, /*num_values=*/3); } - // storing def-levels more than value in page-header + // storing def-levels more than value in page-header { std::vector input_def_levels(2, 1); - NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); - const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); - - // The data page falls back to plain encoding - std::shared_ptr dummy = AllocateBuffer(); - std::shared_ptr data_page = MakeDataPage( - &descr, values, /*num_values=*/1, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, - /*rep_levels=*/{}, - /*max_rep_level=*/max_rep_level_); - pages_.push_back(data_page); - InitReader(&descr); - auto reader = static_cast(reader_.get()); - ASSERT_TRUE(reader->HasNext()); - - constexpr int batch_size = 3; - std::vector def_levels(batch_size, 0); - std::vector rep_levels(batch_size, 0); - bool values_out[batch_size]; - int64_t values_read; - EXPECT_THROW_THAT( - [&]() { - reader->ReadBatch(batch_size, def_levels.data(), rep_levels.data(), values_out, - &values_read); - }, - ParquetException, - ::testing::Property(&ParquetException::what, - ::testing::HasSubstr("Number of decoded rep / def levels did " - "less than num_values in page_header"))); + do_check(input_def_levels, /*num_values=*/1); } } diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing index 74278bc4a1122..1ba34478f535c 160000 --- a/cpp/submodules/parquet-testing +++ b/cpp/submodules/parquet-testing @@ -1 +1 @@ -Subproject commit 74278bc4a1122d74945969e6dec405abd1533ec3 +Subproject commit 1ba34478f535c89382263c42c675a9af4f57f2dd