Skip to content

Commit

Permalink
fix: adding other test skeletons with todos
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhatha committed Jul 30, 2024
1 parent e57f01f commit ae9fe8c
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 83 deletions.
198 changes: 115 additions & 83 deletions java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ViewVarBinaryVector;
import org.apache.arrow.vector.ViewVarCharVector;
Expand Down Expand Up @@ -952,7 +951,7 @@ public void testImportReleasedArray() {
}
}

private FieldVector getSlicedVector(FieldVector vector, int offset) {
private FieldVector getSlicedVector(FieldVector vector, int offset, int length) {
// Consumer allocates empty structures
try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
Expand All @@ -963,7 +962,11 @@ private FieldVector getSlicedVector(FieldVector vector, int offset) {
// Producer exports vector into the C Data Interface structures
Data.exportVector(allocator, vector, null, arrowArray, arrowSchema);
}
// consumerArrowArray.snapshot().offset = offset;

ArrowArray.Snapshot snapshot = consumerArrowArray.snapshot();
snapshot.offset = offset;
snapshot.length = length;
consumerArrowArray.save(snapshot);

// Consumer imports vector
FieldVector imported =
Expand All @@ -972,108 +975,137 @@ private FieldVector getSlicedVector(FieldVector vector, int offset) {
assertEquals(childAllocator, imported.getAllocator());
}

// Check that transfers work
// Check whether the transfer works
TransferPair pair = imported.getTransferPair(allocator);
pair.transfer();
return (FieldVector) pair.getTo();
}
}

@Test
public void testSliceVarCharVector2() {
public void testSliceVariableWidthVector() {
try (final VarCharVector vector = new VarCharVector("v", allocator);
VarCharVector target = new VarCharVector("v", allocator)) {
setVector(vector, "foo", "bar", "baz1", "baz223", "baz23445", "baz2121", "12312baz");
// slice information
final int startIndex = 2;
final int length = 3;
// create a sliced vector manually to mimic C++ slice behavior
VarCharVector slicedVector = (VarCharVector) getSlicedVector(vector, startIndex);
vector.splitAndTransferTo(startIndex, length, target);

// assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector));
assertTrue(roundtrip(slicedVector, VarCharVector.class));
try (VarCharVector slicedVector =
(VarCharVector) getSlicedVector(vector, startIndex, length)) {
vector.splitAndTransferTo(startIndex, length, target);
assertTrue(roundtrip(slicedVector, VarCharVector.class));
assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector));
}
}
}

@Test
public void testSliceVarCharVector() {
try (final VarCharVector vector = new VarCharVector("v", allocator);
VarCharVector slicedVector = new VarCharVector("v", allocator);
VarCharVector target = new VarCharVector("v", allocator)) {
slicedVector.allocateNew();
setVector(vector, "foo", "bar", "baz1", "baz223", "baz23445", "baz2121", "12312baz");
public void testSliceFixedWidthVector() {
// TODO: fix the import visitor to support offset
try (final IntVector vector = new IntVector("v", allocator);
IntVector target = new IntVector("v", allocator)) {
setVector(vector, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10);
// slice information
final int startIndex = 2;
final int length = 3;
// create a sliced vector manually to mimic C++ slice behavior
final ArrowBuf offsetBuffer = vector.getOffsetBuffer();
final ArrowBuf dataBuffer = vector.getDataBuffer();
final int offsetWidth = VarCharVector.OFFSET_WIDTH;

final ArrowBuf expectedSliceOffSetBuffer =
offsetBuffer.slice(startIndex * offsetWidth, (length + 1) * offsetWidth);
final int sizeOfDataBuffer = expectedSliceOffSetBuffer.getInt(length * offsetWidth);
final ArrowBuf expectedSliceDataBuffer = dataBuffer.slice(0, sizeOfDataBuffer);
final ArrowBuf expectedSliceValidityBuffer = allocator.buffer(8);
expectedSliceValidityBuffer.setLong(0, 0b00000111);

createSlicedVector(
slicedVector,
expectedSliceValidityBuffer,
expectedSliceOffSetBuffer,
expectedSliceDataBuffer,
length);

vector.splitAndTransferTo(startIndex, length, target);

assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector));
assertTrue(roundtrip(slicedVector, VarCharVector.class));

expectedSliceValidityBuffer.close();
}
}

private static void createSlicedVector(
VariableWidthFieldVector slicedVector,
ArrowBuf expectedSliceValidityBuffer,
ArrowBuf expectedSliceOffSetBuffer,
ArrowBuf expectedSliceDataBuffer,
int length) {
final ArrowBuf sliceValidityBuffer = slicedVector.getValidityBuffer();
final ArrowBuf slicedOffsetBuffer = slicedVector.getOffsetBuffer();
final ArrowBuf slicedDataBuffer = slicedVector.getDataBuffer();

setBuffersFromExpectedBuffers(
sliceValidityBuffer,
expectedSliceValidityBuffer,
slicedOffsetBuffer,
expectedSliceOffSetBuffer,
slicedDataBuffer,
expectedSliceDataBuffer,
length);

// setLastSet before SetValueCount to make sure fillHoles doesn't get called
slicedVector.setLastSet(length - 1);
slicedVector.setValueCount(length);
}

private static void setBuffersFromExpectedBuffers(
ArrowBuf sliceValidityBuffer,
ArrowBuf expectedSliceValidityBuffer,
ArrowBuf slicedOffsetBuffer,
ArrowBuf expectedSliceOffSetBuffer,
ArrowBuf slicedDataBuffer,
ArrowBuf expectedSliceDataBuffer,
int length) {
final int offsetWidth = VarCharVector.OFFSET_WIDTH;
sliceValidityBuffer.setBytes(
0, expectedSliceValidityBuffer, 0, expectedSliceValidityBuffer.capacity());
for (int i = 0; i < length + 1; i++) {
slicedOffsetBuffer.setInt(
(long) i * offsetWidth, expectedSliceOffSetBuffer.getInt((long) i * offsetWidth));
}
slicedDataBuffer.setBytes(0, expectedSliceDataBuffer, 0, expectedSliceDataBuffer.capacity());
try (IntVector slicedVector = (IntVector) getSlicedVector(vector, startIndex, length)) {
vector.splitAndTransferTo(startIndex, length, target);
assertTrue(roundtrip(slicedVector, IntVector.class));
// assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector));
}
}
}

@Test
public void testSliceVariableWidthViewVector() {
// TODO: complete this test and function
}

@Test
public void testSliceListVector() {
// TODO: complete this test and function
}

@Test
public void testSliceLargeListVector() {
// TODO: complete this test and function
}

@Test
public void testSliceFixedSizeListVector() {
// TODO: complete this test and function
}

@Test
public void testSliceUnionVector() {
// TODO: complete this test and function
}

@Test
public void testSliceMapVector() {
// TODO: complete this test and function
}

@Test
public void testSliceIntVector() {
// TODO: complete this test and function
}

@Test
public void testSliceFloatingPointVector() {
// TODO: complete this test and function
}

@Test
public void testSliceLargeUtf8Vector() {
// TODO: complete this test and function
}

@Test
public void testSliceFixedSizeBinaryVector() {
// TODO: complete this test and function
}

@Test
public void testSliceBoolVector() {
// TODO: complete this test and function
}

@Test
public void testSliceDecimalVector() {
// TODO: complete this test and function
}

@Test
public void testSliceDateVector() {
// TODO: complete this test and function
}

@Test
public void testSliceTimeVector() {
// TODO: complete this test and function
}

@Test
public void testSliceTimeStampVector() {
// TODO: complete this test and function
}

@Test
public void testSliceIntervalVector() {
// TODO: complete this test and function
}

@Test
public void testSliceDurationVector() {
// TODO: complete this test and function
}

@Test
public void testSliceListViewVector() {
// TODO: complete this test and function
}

private VectorSchemaRoot createTestVSR() {
Expand Down
69 changes: 69 additions & 0 deletions java/c/src/test/python/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,16 +258,48 @@ def test_decimal_array(self):
]
self.round_trip_array(lambda: pa.array(data, pa.decimal128(5, 2)))

def test_decimal_slice_array(self):
array_data = [
round(decimal.Decimal(722.82), 2),
round(decimal.Decimal(-934.11), 2),
None,
round(decimal.Decimal(122.82), 2),
round(decimal.Decimal(934.11), 2),
round(decimal.Decimal(632.11), 2),
round(decimal.Decimal(312.11), 2),
round(decimal.Decimal(221.11), 2),
]
data = pa.array(array_data, pa.decimal128(5, 2))
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array)

def test_int_array(self):
self.round_trip_array(lambda: pa.array([1, 2, 3], type=pa.int32()))

def test_int_slice_array(self):
data = pa.array([1, 2, None, 4, 5, 6, 7, 8, 9, 10], type=pa.int32())
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array)

def test_list_array(self):
self.round_trip_array(lambda: pa.array(
[[], [0], [1, 2], [4, 5, 6]], pa.list_(pa.int64())
# disabled check_metadata since the list internal field name ("item")
# is not preserved during round trips (it becomes "$data$").
), check_metadata=False)

def test_list_slice_array(self):
data = pa.array(
[[], [0], None, [1, 2], [4, 5, 6], [7, 8, 9, 10], [11, 12, 13, 14, 15]], pa.list_(pa.int64())
# disabled check_metadata since the list internal field name ("item")
# is not preserved during round trips (it becomes "$data$").
)
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array, check_metadata=False)

def test_empty_list_array(self):
"""Validates GH-37056 fix.
Empty list of int32 produces a vector with empty child data buffer, however with non-zero capacity.
Expand Down Expand Up @@ -302,10 +334,36 @@ def test_struct_array(self):
]
self.round_trip_array(lambda: pa.array(data, type=pa.struct(fields)))

def test_struct_slice_array(self):
fields = [
("f1", pa.int32()),
("f2", pa.string()),
]
array_data = [
{"f1": 1, "f2": "a"},
None,
{"f1": 3, "f2": None},
{"f1": None, "f2": "d"},
{"f1": None, "f2": None},
{"f1": 6, "f2": "f"},
{"f1": 7, "f2": "g"},
{"f1": 8, "f2": "h"},
]
data = pa.array(array_data, type=pa.struct(fields))
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array)

def test_dict(self):
self.round_trip_array(
lambda: pa.array(["a", "b", None, "d"], pa.dictionary(pa.int64(), pa.utf8())))

def test_slice_dict(self):
data = pa.array(["a", "b", None, "d", "e", "f"], pa.dictionary(pa.int64(), pa.utf8()))
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array)

def test_map(self):
offsets = [0, None, 2, 6]
pykeys = [b"a", b"b", b"c", b"d", b"e", b"f"]
Expand All @@ -315,6 +373,17 @@ def test_map(self):
self.round_trip_array(
lambda: pa.MapArray.from_arrays(offsets, keys, items))

def test_slice_map(self):
offsets = [0, None, 2, 6]
pykeys = [b"a", b"b", b"c", b"d", b"e", b"f"]
pyitems = [1, 2, 3, None, 4, 5]
keys = pa.array(pykeys, type="binary")
items = pa.array(pyitems, type="i4")
data = pa.MapArray.from_arrays(offsets, keys, items)
sliced_array = data.slice(offset=2, length=3)
# TODO: complete this function
# self.round_trip_array(lambda: sliced_array)

def test_field(self):
self.round_trip_field(lambda: pa.field("aa", pa.bool_()))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
Expand Down Expand Up @@ -565,6 +566,18 @@ public static void setVector(VarCharVector vector, byte[]... values) {
vector.setValueCount(length);
}

/** Populate values for ViewVarCharVector. */
public static void setVector(ViewVarCharVector vector, byte[]... values) {
final int length = values.length;
vector.allocateNewSafe();
for (int i = 0; i < length; i++) {
if (values[i] != null) {
vector.set(i, values[i]);
}
}
vector.setValueCount(length);
}

public static void setVector(VariableWidthFieldVector vector, byte[]... values) {
final int length = values.length;
vector.allocateNewSafe();
Expand Down Expand Up @@ -600,6 +613,18 @@ public static void setVector(VarCharVector vector, String... values) {
vector.setValueCount(length);
}

/** Populate values for VarCharVector. */
public static void setVector(ViewVarCharVector vector, String... values) {
final int length = values.length;
vector.allocateNewSafe();
for (int i = 0; i < length; i++) {
if (values[i] != null) {
vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
}
}
vector.setValueCount(length);
}

/** Populate values for LargeVarCharVector. */
public static void setVector(LargeVarCharVector vector, String... values) {
final int length = values.length;
Expand Down

0 comments on commit ae9fe8c

Please sign in to comment.