Skip to content

Commit

Permalink
fix: adding c data component v1 WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhatha committed Jul 22, 2024
1 parent 5e8073e commit 2e6c8b3
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
Expand Down Expand Up @@ -400,7 +401,9 @@ public List<ArrowBuf> visit(ArrowType.Duration type) {

@Override
public List<ArrowBuf> visit(ArrowType.ListView type) {
throw new UnsupportedOperationException(
"Importing buffers for view type: " + type + " not supported");
return Arrays.asList(
maybeImportBitmap(type),
importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH),
importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH));
}
}
4 changes: 4 additions & 0 deletions java/c/src/main/java/org/apache/arrow/c/Format.java
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ static String asString(ArrowType arrowType) {
return "vu";
case BinaryView:
return "vz";
case ListView:
return "+vl";
case NONE:
throw new IllegalArgumentException("Arrow type ID is NONE");
default:
Expand Down Expand Up @@ -313,6 +315,8 @@ static ArrowType asType(String format, long flags)
return new ArrowType.Utf8View();
case "vz":
return new ArrowType.BinaryView();
case "+vl":
return new ArrowType.ListView();
default:
String[] parts = format.split(":", 2);
if (parts.length == 2) {
Expand Down
21 changes: 21 additions & 0 deletions java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;
Expand Down Expand Up @@ -683,6 +684,26 @@ public void testFixedSizeListVector() {
}
}

@Test
public void testListViewVector() {
try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
setVector(
vector,
Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
new ArrayList<Integer>());
assertTrue(roundtrip(vector, ListViewVector.class));
}
}

@Test
public void testEmptyListViewVector() {
try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
setVector(vector, new ArrayList<Integer>());
assertTrue(roundtrip(vector, ListViewVector.class));
}
}

@Test
public void testMapVector() {
int count = 5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;

Expand Down Expand Up @@ -234,6 +236,14 @@ public Boolean visit(ExtensionTypeVector<?> left, Range range) {
return underlyingVisitor.rangeEquals(range);
}

@Override
public Boolean visit(ListViewVector left, Range range) {
if (!validate(left)) {
return false;
}
return compareListViewVectors(range);
}

protected RangeEqualsVisitor createInnerVisitor(
ValueVector leftInner,
ValueVector rightInner,
Expand Down Expand Up @@ -612,6 +622,51 @@ protected boolean compareListVectors(Range range) {
return true;
}

protected boolean compareListViewVectors(Range range) {
ListViewVector leftVector = (ListViewVector) left;
ListViewVector rightVector = (ListViewVector) right;

RangeEqualsVisitor innerVisitor =
createInnerVisitor(
leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
Range innerRange = new Range();

for (int i = 0; i < range.getLength(); i++) {
int leftIndex = range.getLeftStart() + i;
int rightIndex = range.getRightStart() + i;

boolean isNull = leftVector.isNull(leftIndex);
if (isNull != rightVector.isNull(rightIndex)) {
return false;
}

int offsetWidth = BaseRepeatedValueViewVector.OFFSET_WIDTH;
int sizeWidth = BaseRepeatedValueViewVector.SIZE_WIDTH;

if (!isNull) {
final int startIndexLeft = leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth);
final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth);

final int startIndexRight = rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth);
final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth);

if (leftSize != rightSize) {
return false;
}

innerRange =
innerRange
.setRightStart(startIndexRight)
.setLeftStart(startIndexLeft)
.setLength(leftSize);
if (!innerVisitor.rangeEquals(innerRange)) {
return false;
}
}
}
return true;
}

protected boolean compareFixedSizeListVectors(Range range) {
FixedSizeListVector leftVector = (FixedSizeListVector) left;
FixedSizeListVector rightVector = (FixedSizeListVector) right;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.Field;
Expand Down Expand Up @@ -124,6 +125,11 @@ public Boolean visit(ExtensionTypeVector<?> left, Void value) {
return compareField(left.getField(), right.getField());
}

@Override
public Boolean visit(ListViewVector left, Void value) {
return compareField(left.getField(), right.getField());
}

private boolean compareField(Field leftField, Field rightField) {

if (leftField == rightField) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;

Expand Down Expand Up @@ -60,4 +61,6 @@ public interface VectorVisitor<OUT, IN> {
OUT visit(NullVector left, IN value);

OUT visit(ExtensionTypeVector<?> left, IN value);

OUT visit(ListViewVector left, IN value);
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
import org.apache.arrow.vector.TypeLayout;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
Expand Down Expand Up @@ -81,6 +83,16 @@ private void validateOffsetBuffer(ValueVector vector, long minCapacity) {
offsetBuffer.capacity());
}

private void validateSizeBuffer(ListViewVector vector, long minCapacity) {
ArrowBuf sizeBuffer = vector.getSizeBuffer();
validateOrThrow(sizeBuffer != null, "The size buffer is null.");
validateOrThrow(
sizeBuffer.capacity() >= minCapacity,
"Not enough capacity for the size buffer. Minimum capacity %s, actual capacity %s.",
minCapacity,
sizeBuffer.capacity());
}

private void validateFixedWidthDataBuffer(ValueVector vector, int valueCount, int bitWidth) {
ArrowBuf dataBuffer = vector.getDataBuffer();
validateOrThrow(dataBuffer != null, "The fixed width data buffer is null.");
Expand Down Expand Up @@ -287,4 +299,34 @@ public Void visit(ExtensionTypeVector<?> vector, Void value) {
vector.getUnderlyingVector().accept(this, value);
return null;
}

@Override
public Void visit(ListViewVector vector, Void value) {
// TODO: complete this method
int valueCount = vector.getValueCount();
validateVectorCommon(vector);
validateValidityBuffer(vector, valueCount);
validateSizeBuffer(vector, (long) valueCount * ListViewVector.SIZE_WIDTH);
long minOffsetCapacity =
valueCount == 0 ? 0L : (long) (valueCount + 1) * ListVector.OFFSET_WIDTH;
validateOffsetBuffer(vector, minOffsetCapacity);

FieldVector dataVector = vector.getDataVector();
// TODO: rather use the size buffer to get the dataLength and do the validation
int lastOffset =
valueCount == 0
? 0
: vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
validateOrThrow(
dataVectorLength >= lastOffset,
"Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
lastOffset + 1,
dataVectorLength);

if (dataVector != null) {
dataVector.accept(this, null);
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.holders.IntervalDayHolder;
import org.apache.arrow.vector.types.Types;
Expand Down Expand Up @@ -728,4 +730,32 @@ public static void setVector(StructVector vector, Map<String, List<Integer>> val
}
vector.setValueCount(valueCount);
}

/** Populate values for {@link ListViewVector}. */
public static void setVector(ListViewVector vector, List<Integer>... values) {
vector.allocateNewSafe();
Types.MinorType type = Types.MinorType.INT;
vector.addOrGetVector(FieldType.nullable(type.getType()));

IntVector dataVector = (IntVector) vector.getDataVector();
dataVector.allocateNew();

// set underlying vectors
int curPos = 0;
vector.getOffsetBuffer().setInt(0, curPos);
for (int i = 0; i < values.length; i++) {
if (values[i] == null) {
BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
} else {
BitVectorHelper.setBit(vector.getValidityBuffer(), i);
for (int value : values[i]) {
dataVector.setSafe(curPos, value);
curPos += 1;
}
}
vector.getOffsetBuffer().setInt((long) i * BaseRepeatedValueViewVector.OFFSET_WIDTH, curPos);
}
dataVector.setValueCount(curPos);
vector.setValueCount(values.length);
}
}

0 comments on commit 2e6c8b3

Please sign in to comment.