Skip to content

Commit

Permalink
Added test for mixed encoded data with offset index
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam committed Feb 21, 2024
1 parent 5187a9b commit 17e539c
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1431,12 +1431,20 @@ public void dictionaryEncodingTest() {
@Test
public void mixedDictionaryEncodingTest() {
// Test the behavior of writing parquet files with some pages dictionary encoded and some not
final String path = ParquetTableReadWriteTest.class
String path = ParquetTableReadWriteTest.class
.getResource("/ParquetDataWithMixedEncodingWithoutOffsetIndex.parquet").getFile();
final Table fromDisk = readParquetFileFromGitLFS(new File(path)).select();
final Table expected =
Table fromDisk = readParquetFileFromGitLFS(new File(path)).select();
Table expected =
emptyTable(2_000_000).update("Broken=String.format(`%015d`, ii < 1200000 ? (ii % 30000) : ii)");
assertTableEquals(expected, fromDisk);

path = ParquetTableReadWriteTest.class.getResource("/ParquetDataWithMixedEncodingWithOffsetIndex.parquet")
.getFile();
fromDisk = readParquetFileFromGitLFS(new File(path)).select();
final Collection<String> columns = new ArrayList<>(Arrays.asList("shortStringColumn = `Some data`"));
final int numRows = 20;
expected = TableTools.emptyTable(numRows).select(Selectable.from(columns));
assertTableEquals(expected, fromDisk);
}

@Test
Expand Down
Git LFS file not shown

0 comments on commit 17e539c

Please sign in to comment.