Skip to content

Commit

Permalink
apacheGH-41198: [C#] Fix concatenation of union arrays (apache#41226)
Browse files Browse the repository at this point in the history
### Rationale for this change

Fixes concatenation of union arrays.

### What changes are included in this PR?

* Re-enables union array concatenation tests that were disabled in apache#41197 after making union array comparisons more thorough in the `ArrowReaderVerifier`
* Updates the union array concatenation logic to account for array lengths when concatenating the type and offset buffers, and fixes how the base offset is calculated.
* Fixes creating the type buffers for the array concatenation tests.

### Are these changes tested?

Yes, this uses the existing `ArrowArrayConcatenatorTests` tests.

### Are there any user-facing changes?

Yes, this is a user-facing bug fix.
* GitHub Issue: apache#41198

Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
  • Loading branch information
adamreeve authored and vibhatha committed May 25, 2024
1 parent 7ad5356 commit 7a18b3f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 14 deletions.
22 changes: 15 additions & 7 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ private ArrowBuffer ConcatenateUnionTypeBuffer()

foreach (ArrayData arrayData in _arrayDataList)
{
builder.Append(arrayData.Buffers[0]);
builder.Append(arrayData.Buffers[0].Span.Slice(arrayData.Offset, arrayData.Length));
}

return builder.Build(_allocator);
Expand All @@ -376,18 +376,26 @@ private ArrowBuffer ConcatenateUnionTypeBuffer()
private ArrowBuffer ConcatenateUnionOffsetBuffer()
{
var builder = new ArrowBuffer.Builder<int>(_totalLength);
int baseOffset = 0;
var typeCount = _arrayDataList.Count > 0 ? _arrayDataList[0].Children.Length : 0;
var baseOffsets = new int[typeCount];

foreach (ArrayData arrayData in _arrayDataList)
{
ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>();
foreach (int offset in span)
ReadOnlySpan<byte> typeSpan = arrayData.Buffers[0].Span.Slice(arrayData.Offset, arrayData.Length);
ReadOnlySpan<int> offsetSpan = arrayData.Buffers[1].Span.CastTo<int>().Slice(arrayData.Offset, arrayData.Length);
for (int i = 0; i < arrayData.Length; ++i)
{
builder.Append(baseOffset + offset);
var typeId = typeSpan[i];
builder.Append(checked(baseOffsets[typeId] + offsetSpan[i]));
}

// The next offset must start from the current last offset.
baseOffset += span[arrayData.Length];
for (int i = 0; i < typeCount; ++i)
{
checked
{
baseOffsets[i] += arrayData.Children[i].Length;
}
}
}

return builder.Build(_allocator);
Expand Down
9 changes: 2 additions & 7 deletions csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ public void TestStandardCases()
{
foreach ((List<IArrowArray> testTargetArrayList, IArrowArray expectedArray) in GenerateTestData())
{
if (expectedArray is UnionArray)
{
// Union array concatenation is incorrect. See https://github.com/apache/arrow/issues/41198
continue;
}

IArrowArray actualArray = ArrowArrayConcatenator.Concatenate(testTargetArrayList);
ArrowReaderVerifier.CompareArrays(expectedArray, actualArray);
}
Expand Down Expand Up @@ -604,10 +598,11 @@ public void Visit(UnionType type)

for (int j = 0; j < dataList.Count; j++)
{
byte index = (byte)Math.Max(j % 3, 1);
byte index = (byte)Math.Min(j % 3, 1);
int? intValue = (index == 1) ? dataList[j] : null;
string stringValue = (index == 1) ? null : dataList[j]?.ToString();
typeBuilder.Append(index);
typeResultBuilder.Append(index);

if (isDense)
{
Expand Down

0 comments on commit 7a18b3f

Please sign in to comment.