From e9cc38d1cd18a15c6943f3d956fac0a86825ec42 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 19 Apr 2024 14:41:56 +1200 Subject: [PATCH] GH-41302: [C#][Integration] Fix writing list and binary arrays with zero length offsets to IPC format (#41303) ### Rationale for this change Fixes the integration test failures caused by #41230 ### What changes are included in this PR? Only try to access the offset values if the array length is non-zero when writing list and binary arrays to IPC format. ### Are these changes tested? Yes, I've manually run the integration tests with C# and Java to verify they pass (when also including the changes from #41264), and also added new unit tests for this. ### Are there any user-facing changes? This may also be a bug that affects users but it isn't in a released version. * GitHub Issue: #41302 Authored-by: Adam Reeve Signed-off-by: Curt Hagenlocher --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 18 +++++-- .../ArrowFileWriterTests.cs | 52 +++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 1b83735925556..a7e4c13525236 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -165,8 +165,13 @@ public void Visit(ListArray array) _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); - int valuesOffset = array.ValueOffsets[0]; - int valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = array.ValueOffsets[0]; + valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + } var values = array.Values; if (valuesOffset > 0 || valuesLength < values.Length) @@ -206,8 +211,13 @@ public void Visit(BinaryArray array) _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); - int valuesOffset = array.ValueOffsets[0]; - int valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = array.ValueOffsets[0]; + valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + } _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, valuesOffset, valuesLength)); } diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index baea4d61e5b66..297cb5e181cd3 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -193,5 +193,57 @@ public async Task WritesEmptyFileAsync() Assert.Null(readBatch); SchemaComparer.Compare(originalBatch.Schema, reader.Schema); } + + [Fact] + public async Task WriteBinaryArrayWithEmptyOffsets() + { + // Empty binary arrays generated by the C# builder have a single offset, + // but some implementations may produce an empty offsets buffer. + + var array = new BinaryArray( + new BinaryType(), + length: 0, + valueOffsetsBuffer: ArrowBuffer.Empty, + dataBuffer: ArrowBuffer.Empty, + nullBitmapBuffer: ArrowBuffer.Empty, + nullCount: 0); + + var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build(); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true); + + await writer.WriteRecordBatchAsync(recordBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false); + } + + [Fact] + public async Task WriteListArrayWithEmptyOffsets() + { + var values = new Int32Array.Builder().Build(); + var array = new ListArray( + new ListType(new Int32Type()), + length: 0, + valueOffsetsBuffer: ArrowBuffer.Empty, + values: values, + nullBitmapBuffer: ArrowBuffer.Empty, + nullCount: 0); + + var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build(); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true); + + await writer.WriteRecordBatchAsync(recordBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false); + } } }