Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37515: [C++] Remove memory address optimization from ChunkedArray::Equals(const std::shared_ptr<arrow::ChunkedArray>& other) if the ChunkedArray can have NaN values #37579

Merged
merged 8 commits into from
Sep 7, 2023
27 changes: 24 additions & 3 deletions cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "arrow/pretty_print.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"

Expand Down Expand Up @@ -111,13 +112,33 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const {
.ok();
}

bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
if (this == other.get()) {
return true;
namespace {

bool supportsNaN(const arrow::DataType& type) {
sgilmore10 marked this conversation as resolved.
Show resolved Hide resolved
bool supports_nan = false;
if (type.num_fields() == 0) {
// Only floating types support NaN
supports_nan |= is_floating(type.id());
sgilmore10 marked this conversation as resolved.
Show resolved Hide resolved
} else {
for (const auto& field : type.fields()) {
supports_nan |= supportsNaN(*field->type());
if (supports_nan) {
break;
}
sgilmore10 marked this conversation as resolved.
Show resolved Hide resolved
}
}
return supports_nan;
}

} // namespace

bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
if (!other) {
return false;
}
if (this == other.get() && !supportsNaN(*other->type())) {
sgilmore10 marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
return Equals(*other.get());
}

Expand Down
27 changes: 27 additions & 0 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,33 @@ TEST_F(TestChunkedArray, EqualsDifferingMetadata) {
ASSERT_TRUE(left.Equals(right));
}

TEST_F(TestChunkedArray, EqualsSameAddressWithNaNs) {
auto chunk0 = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
auto chunk1 = ArrayFromJSON(float64(), "[3, 4, 5]");
ASSERT_OK_AND_ASSIGN(auto result0, ChunkedArray::Make({chunk0, chunk1}, float64()));
// result0 has NaN values
ASSERT_FALSE(result0->Equals(result0));
sgilmore10 marked this conversation as resolved.
Show resolved Hide resolved

auto chunk2 = ArrayFromJSON(float64(), "[6, 7, 8, 9]");
ASSERT_OK_AND_ASSIGN(auto result1, ChunkedArray::Make({chunk1, chunk2}, float64()));
// result1 does not have NaN values
ASSERT_TRUE(result1->Equals(result1));

auto array0 = ArrayFromJSON(int32(), "[0, 1, 2]");
auto array1 = ArrayFromJSON(float64(), "[0, 1, NaN]");
std::vector<std::string> fieldnames = {"Int32Type", "Float64Type"};
ASSERT_OK_AND_ASSIGN(auto struct0, StructArray::Make({array0, array1}, fieldnames));
ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({struct0}, struct0->type()));
// result2 has NaN values
ASSERT_FALSE(result2->Equals(result2));

auto array2 = ArrayFromJSON(float64(), "[0, 1, 2]");
ASSERT_OK_AND_ASSIGN(auto struct1, StructArray::Make({array0, array2}, fieldnames));
ASSERT_OK_AND_ASSIGN(auto result3, ChunkedArray::Make({struct1}, struct1->type()));
// result3 does not have NaN values
ASSERT_TRUE(result3->Equals(result3));
}

TEST_F(TestChunkedArray, SliceEquals) {
random::RandomArrayGenerator gen(42);

Expand Down
Loading