diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 6127c5a662dfe..1b83735925556 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -163,9 +163,18 @@ public void Visit(BooleanArray array) public void Visit(ListArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); - _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length + 1)); + _buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); - VisitArray(array.Values); + int valuesOffset = array.ValueOffsets[0]; + int valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + + var values = array.Values; + if (valuesOffset > 0 || valuesLength < values.Length) + { + values = ArrowArrayFactory.Slice(values, valuesOffset, valuesLength); + } + + VisitArray(values); } public void Visit(ListViewArray array) @@ -195,8 +204,12 @@ public void Visit(FixedSizeListArray array) public void Visit(BinaryArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); - _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length + 1)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); + _buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); + + int valuesOffset = array.ValueOffsets[0]; + int valuesLength = array.ValueOffsets[array.Length] - valuesOffset; + + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, valuesOffset, valuesLength)); } public void Visit(BinaryViewArray array) @@ -263,6 +276,39 @@ public void Visit(NullArray array) // There are no buffers for a NullArray } + private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength) + { + var requiredBytes = CalculatePaddedBufferLength(sizeof(int) * (arrayLength + 1)); + + if (arrayOffset != 0) + { + // Array has been sliced, so we need to shift and adjust the offsets + var originalOffsets = valueOffsetsBuffer.Span.CastTo().Slice(arrayOffset, arrayLength + 1); + var firstOffset = arrayLength > 0 ? originalOffsets[0] : 0; + + var newValueOffsetsBuffer = _allocator.Allocate(requiredBytes); + var newValueOffsets = newValueOffsetsBuffer.Memory.Span.CastTo(); + + for (int i = 0; i < arrayLength + 1; ++i) + { + newValueOffsets[i] = originalOffsets[i] - firstOffset; + } + + return new ArrowBuffer(newValueOffsetsBuffer); + } + else if (valueOffsetsBuffer.Length > requiredBytes) + { + // Array may have been sliced but the offset is zero, + // so we can truncate the existing offsets + return new ArrowBuffer(valueOffsetsBuffer.Memory.Slice(0, requiredBytes)); + } + else + { + // Use the full buffer + return valueOffsetsBuffer; + } + } + private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) { if (buffer.IsEmpty) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index faf650973d64c..baea4d61e5b66 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -113,6 +113,7 @@ public async Task WritesFooterAlignedMultipleOf8Async() [InlineData(0, 45)] [InlineData(3, 45)] [InlineData(16, 45)] + [InlineData(10, 0)] public async Task WriteSlicedArrays(int sliceOffset, int sliceLength) { var originalBatch = TestData.CreateSampleRecordBatch(length: 100); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 07c8aa3f56b3b..95972193219ac 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -160,10 +160,14 @@ public void Visit(StructArray array) Assert.Equal(expectedArray.Length, array.Length); Assert.Equal(expectedArray.NullCount, array.NullCount); - Assert.Equal(0, array.Offset); Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, array.Offset); + } + for (int i = 0; i < array.Fields.Count; i++) { array.Fields[i].Accept(new ArrayComparer(expectedArray.Fields[i], _strictCompare)); @@ -178,12 +182,12 @@ public void Visit(UnionArray array) Assert.Equal(expectedArray.Mode, array.Mode); Assert.Equal(expectedArray.Length, array.Length); Assert.Equal(expectedArray.NullCount, array.NullCount); - Assert.Equal(0, array.Offset); Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, array.Offset); Assert.True(expectedArray.TypeBuffer.Span.SequenceEqual(array.TypeBuffer.Span)); } else @@ -252,12 +256,12 @@ private void CompareBinaryArrays(BinaryArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); Assert.True(expectedArray.Values.Slice(0, expectedArray.Length).SequenceEqual(actualArray.Values.Slice(0, actualArray.Length))); } @@ -284,7 +288,11 @@ private void CompareVariadicArrays(BinaryViewArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); + + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + } CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); @@ -309,12 +317,12 @@ private void CompareArrays(FixedSizeBinaryArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); Assert.True(expectedArray.ValueBuffer.Span.Slice(0, expectedArray.Length).SequenceEqual(actualArray.ValueBuffer.Span.Slice(0, actualArray.Length))); } else @@ -338,12 +346,12 @@ private void CompareArrays(PrimitiveArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); Assert.True(expectedArray.Values.Slice(0, expectedArray.Length).SequenceEqual(actualArray.Values.Slice(0, actualArray.Length))); } else @@ -370,12 +378,12 @@ private void CompareArrays(BooleanArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); int booleanByteCount = BitUtility.ByteCount(expectedArray.Length); Assert.True(expectedArray.Values.Slice(0, booleanByteCount).SequenceEqual(actualArray.Values.Slice(0, booleanByteCount))); } @@ -397,22 +405,31 @@ private void CompareArrays(ListArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); } else { - int offsetsStart = (expectedArray.Offset) * sizeof(int); - int offsetsLength = (expectedArray.Length + 1) * sizeof(int); - Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(offsetsStart, offsetsLength).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength))); + for (int i = 0; i < actualArray.Length; ++i) + { + if (expectedArray.IsNull(i)) + { + Assert.True(actualArray.IsNull(i)); + } + else + { + var expectedList = expectedArray.GetSlicedValues(i); + var actualList = actualArray.GetSlicedValues(i); + actualList.Accept(new ArrayComparer(expectedList, _strictCompare)); + } + } } - - actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); } private void CompareArrays(ListViewArray actualArray) @@ -424,12 +441,12 @@ private void CompareArrays(ListViewArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { + Assert.Equal(expectedArray.Offset, actualArray.Offset); Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); Assert.True(expectedArray.SizesBuffer.Span.SequenceEqual(actualArray.SizesBuffer.Span)); } @@ -453,7 +470,10 @@ private void CompareArrays(FixedSizeListArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(0, actualArray.Offset); + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + } CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 29ddef2864862..3ea42ee0fbcb7 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -294,7 +294,18 @@ public void Visit(StringType type) for (var i = 0; i < Length; i++) { - builder.Append(str); + switch (i % 3) + { + case 0: + builder.AppendNull(); + break; + case 1: + builder.Append(str); + break; + case 2: + builder.Append(str + str); + break; + } } Array = builder.Build(); @@ -328,15 +339,21 @@ public void Visit(ListType type) { var builder = new ListArray.Builder(type.ValueField).Reserve(Length); - var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1); + var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length * 3 / 2); for (var i = 0; i < Length; i++) { - builder.Append(); - valueBuilder.Append(i); + if (i % 10 == 2) + { + builder.AppendNull(); + } + else + { + builder.Append(); + var listLength = i % 4; + valueBuilder.AppendRange(Enumerable.Range(i, listLength).Select(x => (long)x)); + } } - //Add a value to check if Values.Length can exceed ListArray.Length - valueBuilder.Append(0); Array = builder.Build(); } @@ -352,8 +369,12 @@ public void Visit(ListViewType type) builder.Append(); valueBuilder.Append(i); } - //Add a value to check if Values.Length can exceed ListArray.Length - valueBuilder.Append(0); + + if (Length > 0) + { + // Add a value to check if Values.Length can exceed ListArray.Length + valueBuilder.Append(0); + } Array = builder.Build(); } @@ -562,9 +583,13 @@ public void Visit(MapType type) keyBuilder.Append(i.ToString()); valueBuilder.Append(i); } - //Add a value to check if Values.Length can exceed MapArray.Length - keyBuilder.Append("0"); - valueBuilder.Append(0); + + if (Length > 0) + { + // Add a value to check if Values.Length can exceed MapArray.Length + keyBuilder.Append("0"); + valueBuilder.Append(0); + } Array = builder.Build(); }