Skip to content

Commit

Permalink
GH-41302: [C#][Integration] Fix writing list and binary arrays with z…
Browse files Browse the repository at this point in the history
…ero length offsets to IPC format (#41303)

### Rationale for this change

Fixes the integration test failures caused by #41230

### What changes are included in this PR?

Only try to access the offset values if the array length is non-zero when writing list and binary arrays to IPC format.

### Are these changes tested?

Yes, I've manually run the integration tests with C# and Java to verify they pass (when also including the changes from #41264), and also added new unit tests for this.

### Are there any user-facing changes?

This may also be a bug that affects users but it isn't in a released version.
* GitHub Issue: #41302

Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
  • Loading branch information
adamreeve authored and raulcd committed Apr 29, 2024
1 parent f0b570b commit ef4db17
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 4 deletions.
18 changes: 14 additions & 4 deletions csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,13 @@ public void Visit(ListArray array)
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));

int valuesOffset = array.ValueOffsets[0];
int valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
int valuesOffset = 0;
int valuesLength = 0;
if (array.Length > 0)
{
valuesOffset = array.ValueOffsets[0];
valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
}

var values = array.Values;
if (valuesOffset > 0 || valuesLength < values.Length)
Expand Down Expand Up @@ -206,8 +211,13 @@ public void Visit(BinaryArray array)
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));

int valuesOffset = array.ValueOffsets[0];
int valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
int valuesOffset = 0;
int valuesLength = 0;
if (array.Length > 0)
{
valuesOffset = array.ValueOffsets[0];
valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
}

_buffers.Add(CreateSlicedBuffer<byte>(array.ValueBuffer, valuesOffset, valuesLength));
}
Expand Down
52 changes: 52 additions & 0 deletions csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,57 @@ public async Task WritesEmptyFileAsync()
Assert.Null(readBatch);
SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
}

[Fact]
public async Task WriteBinaryArrayWithEmptyOffsets()
{
// Empty binary arrays generated by the C# builder have a single offset,
// but some implementations may produce an empty offsets buffer.

var array = new BinaryArray(
new BinaryType(),
length: 0,
valueOffsetsBuffer: ArrowBuffer.Empty,
dataBuffer: ArrowBuffer.Empty,
nullBitmapBuffer: ArrowBuffer.Empty,
nullCount: 0);

var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build();

var stream = new MemoryStream();
var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true);

await writer.WriteRecordBatchAsync(recordBatch);
await writer.WriteEndAsync();

stream.Position = 0;

await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false);
}

[Fact]
public async Task WriteListArrayWithEmptyOffsets()
{
var values = new Int32Array.Builder().Build();
var array = new ListArray(
new ListType(new Int32Type()),
length: 0,
valueOffsetsBuffer: ArrowBuffer.Empty,
values: values,
nullBitmapBuffer: ArrowBuffer.Empty,
nullCount: 0);

var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build();

var stream = new MemoryStream();
var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true);

await writer.WriteRecordBatchAsync(recordBatch);
await writer.WriteEndAsync();

stream.Position = 0;

await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false);
}
}
}

0 comments on commit ef4db17

Please sign in to comment.