diff --git a/velox/functions/sparksql/Hash.cpp b/velox/functions/sparksql/Hash.cpp index 8ad8b4e7f0831..8d48f9d882dbc 100644 --- a/velox/functions/sparksql/Hash.cpp +++ b/velox/functions/sparksql/Hash.cpp @@ -26,16 +26,280 @@ namespace { const int32_t kDefaultSeed = 42; +struct Murmur3Hash; +struct XxHash64; + +template +struct HashTraits {}; + +template <> +struct HashTraits { + using SeedType = int32_t; + using ReturnType = int32_t; +}; + +template <> +struct HashTraits { + using SeedType = int64_t; + using ReturnType = int64_t; +}; + +template < + typename HashClass, + typename T, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(T input, SeedType seed) { + return HashClass::hashInt32(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(int64_t input, SeedType seed) { + return HashClass::hashInt64(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(float input, SeedType seed) { + return HashClass::hashFloat(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(double input, SeedType seed) { + return HashClass::hashDouble(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(int128_t input, SeedType seed) { + return HashClass::hashLongDecimal(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(Timestamp input, SeedType seed) { + return HashClass::hashTimestamp(input, seed); +} + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +ReturnType hashOne(StringView input, SeedType seed) { + return HashClass::hashBytes(input, seed); +} + +template < + typename HashClass, + TypeKind kind, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +class PrimitiveVectorHasher; + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +class ArrayVectorHasher; + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +class MapVectorHasher; + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +class RowVectorHasher; + +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> +class VectorHasher { + public: + // Compute the hash value of input vector at index. + ReturnType hashAt(vector_size_t index, SeedType seed) { + if (decoded_.isNullAt(index)) { + return seed; + } + return hashNotNull(index, seed); + } + + virtual ReturnType hashNotNull(vector_size_t index, SeedType seed) = 0; + + VectorHasher(DecodedVector& decoded) : decoded_(decoded) {} + + virtual ~VectorHasher() = default; + + protected: + const DecodedVector& decoded_; +}; + +template +std::shared_ptr> createPrimitiveVectorHasher( + DecodedVector& decoded) { + return std::make_shared>(decoded); +} + +template +std::shared_ptr> createVectorHasher( + DecodedVector& decoded) { + auto baseType = decoded.base()->type(); + if (baseType->isPrimitiveType()) { + return VELOX_DYNAMIC_SCALAR_TEMPLATE_TYPE_DISPATCH( + createPrimitiveVectorHasher, HashClass, baseType->kind(), decoded); + } else if (baseType->isArray()) { + return std::make_shared>(decoded); + } else if (baseType->isMap()) { + return std::make_shared>(decoded); + } else if (baseType->isRow()) { + return std::make_shared>(decoded); + } + VELOX_UNREACHABLE(); +} + +template < + typename HashClass, + TypeKind kind, + typename SeedType, + typename ReturnType> +class PrimitiveVectorHasher + : public VectorHasher { + public: + PrimitiveVectorHasher(DecodedVector& decoded) + : VectorHasher(decoded) {} + + ReturnType hashNotNull(vector_size_t index, SeedType seed) override { + return hashOne( + this->decoded_.template valueAt::NativeType>( + index), + seed); + } +}; + +template +class ArrayVectorHasher : public VectorHasher { + public: + ArrayVectorHasher(DecodedVector& decoded) : VectorHasher(decoded) { + base_ = decoded.base()->as(); + indices_ = decoded.indices(); + + SelectivityVector rows(base_->elements()->size()); + decodedElements_.decode(*base_->elements(), rows); + elementHasher_ = createVectorHasher(decodedElements_); + } + + ReturnType hashNotNull(vector_size_t index, SeedType seed) override { + auto size = base_->sizeAt(indices_[index]); + auto offset = base_->offsetAt(indices_[index]); + + ReturnType result = seed; + for (auto i = 0; i < size; ++i) { + result = elementHasher_->hashAt(i + offset, result); + } + return result; + } + + private: + const ArrayVector* base_; + const int32_t* indices_; + DecodedVector decodedElements_; + std::shared_ptr> elementHasher_; +}; + +template +class MapVectorHasher : public VectorHasher { + public: + MapVectorHasher(DecodedVector& decoded) : VectorHasher(decoded) { + base_ = decoded.base()->as(); + indices_ = decoded.indices(); + + SelectivityVector rows(base_->mapKeys()->size()); + decodedKeys_.decode(*base_->mapKeys(), rows); + decodedValues_.decode(*base_->mapValues(), rows); + keyHasher_ = createVectorHasher(decodedKeys_); + valueHasher_ = createVectorHasher(decodedValues_); + } + + ReturnType hashNotNull(vector_size_t index, SeedType seed) override { + auto size = base_->sizeAt(indices_[index]); + auto offset = base_->offsetAt(indices_[index]); + + ReturnType result = seed; + for (auto i = 0; i < size; ++i) { + result = keyHasher_->hashAt(i + offset, result); + result = valueHasher_->hashAt(i + offset, result); + } + return result; + } + + private: + const MapVector* base_; + const int32_t* indices_; + DecodedVector decodedKeys_; + DecodedVector decodedValues_; + std::shared_ptr> keyHasher_; + std::shared_ptr> valueHasher_; +}; + +template +class RowVectorHasher : public VectorHasher { + public: + RowVectorHasher(DecodedVector& decoded) : VectorHasher(decoded) { + base_ = decoded.base()->as(); + indices_ = decoded.indices(); + + SelectivityVector rows(base_->size()); + decodedChildren_.resize(base_->childrenSize()); + hashers_.resize(base_->childrenSize()); + for (auto i = 0; i < base_->childrenSize(); ++i) { + decodedChildren_[i].decode(*base_->childAt(i), rows); + hashers_[i] = createVectorHasher(decodedChildren_[i]); + } + } + + ReturnType hashNotNull(vector_size_t index, SeedType seed) override { + ReturnType result = seed; + for (auto i = 0; i < base_->childrenSize(); ++i) { + result = hashers_[i]->hashAt(indices_[index], result); + } + return result; + } + + private: + const RowVector* base_; + const int32_t* indices_; + std::vector decodedChildren_; + std::vector>> hashers_; +}; + // ReturnType can be either int32_t or int64_t // HashClass contains the function like hashInt32 -template +template < + typename HashClass, + typename SeedType = typename HashTraits::SeedType, + typename ReturnType = typename HashTraits::ReturnType> void applyWithType( const SelectivityVector& rows, std::vector& args, // Not using const ref so we can reuse args std::optional seed, exec::EvalCtx& context, VectorPtr& resultRef) { - HashClass hash; size_t hashIdx = seed ? 1 : 0; SeedType hashSeed = seed ? *seed : kDefaultSeed; @@ -54,36 +318,16 @@ void applyWithType( decoded->nulls(&rows), rows.begin(), rows.end()); selected = selectedMinusNulls.get(); } - switch (args[i]->type()->kind()) { -// Derived from InterpretedHashFunction.hash: -// https://github.com/apache/spark/blob/382b66e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala#L532 -#define CASE(typeEnum, hashFn, inputType) \ - case TypeKind::typeEnum: \ - selected->applyToSelected([&](int row) { \ - result.set( \ - row, hashFn(decoded->valueAt(row), result.valueAt(row))); \ - }); \ - break; - CASE(BOOLEAN, hash.hashInt32, bool); - CASE(TINYINT, hash.hashInt32, int8_t); - CASE(SMALLINT, hash.hashInt32, int16_t); - CASE(INTEGER, hash.hashInt32, int32_t); - CASE(BIGINT, hash.hashInt64, int64_t); - CASE(VARCHAR, hash.hashBytes, StringView); - CASE(VARBINARY, hash.hashBytes, StringView); - CASE(REAL, hash.hashFloat, float); - CASE(DOUBLE, hash.hashDouble, double); - CASE(HUGEINT, hash.hashLongDecimal, int128_t); - CASE(TIMESTAMP, hash.hashTimestamp, Timestamp); -#undef CASE - default: - VELOX_NYI( - "Unsupported type for HASH(): {}", args[i]->type()->toString()); - } + + auto hasher = createVectorHasher(*decoded); + selected->applyToSelected([&](int row) { + result.set(row, hasher->hashNotNull(row, result.valueAt(row))); + }); } } -// Derived from src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java. +// Derived from +// src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java. // // Spark's Murmur3 seems slightly different from the original from Austin // Appleby: in particular the fmix function's first line is different. The @@ -95,13 +339,13 @@ void applyWithType( class Murmur3Hash final { public: - uint32_t hashInt32(int32_t input, uint32_t seed) { + static uint32_t hashInt32(int32_t input, uint32_t seed) { uint32_t k1 = mixK1(input); uint32_t h1 = mixH1(seed, k1); return fmix(h1, 4); } - uint32_t hashInt64(uint64_t input, uint32_t seed) { + static uint32_t hashInt64(uint64_t input, uint32_t seed) { uint32_t low = input; uint32_t high = input >> 32; @@ -116,19 +360,19 @@ class Murmur3Hash final { // Floating point numbers are hashed as if they are integers, with // -0f defined to have the same output as +0f. - uint32_t hashFloat(float input, uint32_t seed) { + static uint32_t hashFloat(float input, uint32_t seed) { return hashInt32( input == -0.f ? 0 : *reinterpret_cast(&input), seed); } - uint32_t hashDouble(double input, uint32_t seed) { + static uint32_t hashDouble(double input, uint32_t seed) { return hashInt64( input == -0. ? 0 : *reinterpret_cast(&input), seed); } // Spark also has an hashUnsafeBytes2 function, but it was not used at the // time of implementation. - uint32_t hashBytes(const StringView& input, uint32_t seed) { + static uint32_t hashBytes(const StringView& input, uint32_t seed) { const char* i = input.data(); const char* const end = input.data() + input.size(); uint32_t h1 = seed; @@ -141,25 +385,25 @@ class Murmur3Hash final { return fmix(h1, input.size()); } - uint32_t hashLongDecimal(int128_t input, uint32_t seed) { + static uint32_t hashLongDecimal(int128_t input, uint32_t seed) { char out[sizeof(int128_t)]; int32_t length = DecimalUtil::toByteArray(input, out); return hashBytes(StringView(out, length), seed); } - uint32_t hashTimestamp(Timestamp input, uint32_t seed) { + static uint32_t hashTimestamp(Timestamp input, uint32_t seed) { return hashInt64(input.toMicros(), seed); } private: - uint32_t mixK1(uint32_t k1) { + static uint32_t mixK1(uint32_t k1) { k1 *= 0xcc9e2d51; k1 = bits::rotateLeft(k1, 15); k1 *= 0x1b873593; return k1; } - uint32_t mixH1(uint32_t h1, uint32_t k1) { + static uint32_t mixH1(uint32_t h1, uint32_t k1) { h1 ^= k1; h1 = bits::rotateLeft(h1, 13); h1 = h1 * 5 + 0xe6546b64; @@ -167,7 +411,7 @@ class Murmur3Hash final { } // Finalization mix - force all bits of a hash block to avalanche - uint32_t fmix(uint32_t h1, uint32_t length) { + static uint32_t fmix(uint32_t h1, uint32_t length) { h1 ^= length; h1 ^= h1 >> 16; h1 *= 0x85ebca6b; @@ -190,7 +434,7 @@ class Murmur3HashFunction final : public exec::VectorFunction { exec::EvalCtx& context, VectorPtr& resultRef) const final { context.ensureWritable(rows, INTEGER(), resultRef); - applyWithType(rows, args, seed_, context, resultRef); + applyWithType(rows, args, seed_, context, resultRef); } private: @@ -198,21 +442,15 @@ class Murmur3HashFunction final : public exec::VectorFunction { }; class XxHash64 final { - const uint64_t PRIME64_1 = 0x9E3779B185EBCA87L; - const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FL; - const uint64_t PRIME64_3 = 0x165667B19E3779F9L; - const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63L; - const uint64_t PRIME64_5 = 0x27D4EB2F165667C5L; - public: - int64_t hashInt32(const int32_t input, uint64_t seed) { + static uint64_t hashInt32(const int32_t input, uint64_t seed) { int64_t hash = seed + PRIME64_5 + 4L; hash ^= static_cast((input & 0xFFFFFFFFL) * PRIME64_1); hash = bits::rotateLeft64(hash, 23) * PRIME64_2 + PRIME64_3; return fmix(hash); } - int64_t hashInt64(int64_t input, uint64_t seed) { + static uint64_t hashInt64(int64_t input, uint64_t seed) { int64_t hash = seed + PRIME64_5 + 8L; hash ^= bits::rotateLeft64(input * PRIME64_2, 31) * PRIME64_1; hash = bits::rotateLeft64(hash, 27) * PRIME64_1 + PRIME64_4; @@ -221,17 +459,17 @@ class XxHash64 final { // Floating point numbers are hashed as if they are integers, with // -0f defined to have the same output as +0f. - int64_t hashFloat(float input, uint64_t seed) { + static uint64_t hashFloat(float input, uint64_t seed) { return hashInt32( input == -0.f ? 0 : *reinterpret_cast(&input), seed); } - int64_t hashDouble(double input, uint64_t seed) { + static uint64_t hashDouble(double input, uint64_t seed) { return hashInt64( input == -0. ? 0 : *reinterpret_cast(&input), seed); } - uint64_t hashBytes(const StringView& input, uint64_t seed) { + static uint64_t hashBytes(const StringView& input, uint64_t seed) { const char* i = input.data(); const char* const end = input.data() + input.size(); @@ -253,18 +491,24 @@ class XxHash64 final { return fmix(hash); } - int64_t hashLongDecimal(int128_t input, uint32_t seed) { + static uint64_t hashLongDecimal(int128_t input, uint64_t seed) { char out[sizeof(int128_t)]; int32_t length = DecimalUtil::toByteArray(input, out); return hashBytes(StringView(out, length), seed); } - int64_t hashTimestamp(Timestamp input, uint32_t seed) { + static uint64_t hashTimestamp(Timestamp input, uint64_t seed) { return hashInt64(input.toMicros(), seed); } private: - uint64_t fmix(uint64_t hash) { + static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87L; + static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FL; + static const uint64_t PRIME64_3 = 0x165667B19E3779F9L; + static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63L; + static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5L; + + static uint64_t fmix(uint64_t hash) { hash ^= hash >> 33; hash *= PRIME64_2; hash ^= hash >> 29; @@ -273,7 +517,7 @@ class XxHash64 final { return hash; } - uint64_t hashBytesByWords(const StringView& input, uint64_t seed) { + static uint64_t hashBytesByWords(const StringView& input, uint64_t seed) { const char* i = input.data(); const char* const end = input.data() + input.size(); uint32_t length = input.size(); @@ -353,7 +597,7 @@ class XxHash64Function final : public exec::VectorFunction { exec::EvalCtx& context, VectorPtr& resultRef) const final { context.ensureWritable(rows, BIGINT(), resultRef); - applyWithType(rows, args, seed_, context, resultRef); + applyWithType(rows, args, seed_, context, resultRef); } private: @@ -386,6 +630,9 @@ void checkArgTypes(const std::vector& args) { case TypeKind::DOUBLE: case TypeKind::HUGEINT: case TypeKind::TIMESTAMP: + case TypeKind::ARRAY: + case TypeKind::MAP: + case TypeKind::ROW: break; default: VELOX_USER_FAIL("Unsupported type for hash: {}", arg.type->toString()) diff --git a/velox/functions/sparksql/tests/HashTest.cpp b/velox/functions/sparksql/tests/HashTest.cpp index 422d63643e56b..1c6569dc15bf7 100644 --- a/velox/functions/sparksql/tests/HashTest.cpp +++ b/velox/functions/sparksql/tests/HashTest.cpp @@ -18,6 +18,8 @@ #include +using facebook::velox::test::assertEqualVectors; + namespace facebook::velox::functions::sparksql::test { namespace { @@ -27,6 +29,10 @@ class HashTest : public SparkFunctionBaseTest { std::optional hash(std::optional arg) { return evaluateOnce("hash(c0)", arg); } + + VectorPtr hash(VectorPtr vector) { + return evaluate("hash(c0)", makeRowVector({vector})); + } }; TEST_F(HashTest, String) { @@ -128,5 +134,105 @@ TEST_F(HashTest, Float) { EXPECT_EQ(hash(-limits::infinity()), 427440766); } +TEST_F(HashTest, Array) { + assertEqualVectors( + makeFlatVector({2101165938, 42, 1045631400}), + hash(makeArrayVector({{1, 2, 3, 4, 5}, {}, {1, 2, 3}}))); + + assertEqualVectors( + makeFlatVector({-559580957, 1765031574, 42}), + hash(makeNullableArrayVector( + {{1, std::nullopt}, {std::nullopt, 2}, {std::nullopt}}))); + + // Nested array. + { + using innerArrayType = std::vector>; + using outerArrayType = + std::vector>>>; + + innerArrayType a{1, std::nullopt, 2, 3}; + innerArrayType b{4, 5}; + innerArrayType c{6, 7, 8}; + outerArrayType row1{{a}, {b}}; + outerArrayType row2{{a}, {c}}; + outerArrayType row3{{{}}}; + outerArrayType row4{{{std::nullopt}}}; + auto arrayVector = makeNullableNestedArrayVector( + {{row1}, {row2}, {row3}, {row4}, std::nullopt}); + assertEqualVectors( + makeFlatVector({2101165938, -992561130, 42, 42, 42}), + hash(arrayVector)); + } + + // Array of map. + { + using S = StringView; + using P = std::pair>; + std::vector

a{P{1, S{"a"}}, P{2, std::nullopt}}; + std::vector

b{P{3, S{"c"}}}; + std::vector>> data = {{a, b}}; + auto arrayVector = makeArrayOfMapVector(data); + assertEqualVectors( + makeFlatVector(std::vector{-718462205}), + hash(arrayVector)); + } + + // Array of row. + { + std::vector>>> + data = { + {{{1, "red"}}, {{2, "blue"}}, {{3, "green"}}}, + {{{1, "red"}}, std::nullopt, {{3, "green"}}}, + {std::nullopt}, + }; + auto arrayVector = makeArrayOfRowVector(data, ROW({INTEGER(), VARCHAR()})); + assertEqualVectors( + makeFlatVector({-1458343314, 551500425, 42}), + hash(arrayVector)); + } +} + +TEST_F(HashTest, Map) { + auto mapVector = makeMapVector( + {{{1, 17.0}, {2, 36.0}, {3, 8.0}, {4, 28.0}, {5, 24.0}, {6, 32.0}}}); + assertEqualVectors( + makeFlatVector(std::vector{1263683448}), + hash(mapVector)); + + auto mapOfArrays = createMapOfArraysVector( + {{{1, {{1, 2, 3}}}}, {{2, {{4, 5, 6}}}}, {{3, {{7, 8, 9}}}}}); + assertEqualVectors( + makeFlatVector({-1818148947, 529298908, 825098912}), + hash(mapOfArrays)); + + auto mapWithNullArrays = createMapOfArraysVector( + {{{1, std::nullopt}}, {{2, {{4, 5, std::nullopt}}}}, {{3, {{}}}}}); + assertEqualVectors( + makeFlatVector({-1712319331, 2060637564, 519220707}), + hash(mapWithNullArrays)); +} + +TEST_F(HashTest, Row) { + auto row = makeRowVector({ + makeFlatVector({1, 3}), + makeFlatVector({2, 4}), + }); + assertEqualVectors( + makeFlatVector({-1181176833, 1717636039}), hash(row)); + + row = makeRowVector({ + makeNullableFlatVector({1, std::nullopt}), + makeNullableFlatVector({std::nullopt, 4}), + }); + assertEqualVectors( + makeFlatVector({-1712319331, 1344313940}), hash(row)); + + row->setNull(0, true); + assertEqualVectors(makeFlatVector({42, 1344313940}), hash(row)); + + row->setNull(1, true); + assertEqualVectors(makeFlatVector({42, 42}), hash(row)); +} + } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/XxHash64Test.cpp b/velox/functions/sparksql/tests/XxHash64Test.cpp index 09162f4a0279e..d1508f3681fbc 100644 --- a/velox/functions/sparksql/tests/XxHash64Test.cpp +++ b/velox/functions/sparksql/tests/XxHash64Test.cpp @@ -18,6 +18,8 @@ #include +using facebook::velox::test::assertEqualVectors; + namespace facebook::velox::functions::sparksql::test { namespace { class XxHash64Test : public SparkFunctionBaseTest { @@ -26,6 +28,10 @@ class XxHash64Test : public SparkFunctionBaseTest { std::optional xxhash64(std::optional arg) { return evaluateOnce("xxhash64(c0)", arg); } + + VectorPtr xxhash64(VectorPtr vector) { + return evaluate("xxhash64(c0)", makeRowVector({vector})); + } }; // The expected result was obtained by running SELECT xxhash64("Spark") query @@ -138,6 +144,113 @@ TEST_F(XxHash64Test, float) { EXPECT_EQ(xxhash64(-limits::infinity()), -7580553461823983095); } +TEST_F(XxHash64Test, array) { + assertEqualVectors( + makeFlatVector({-6041664978295882827, 42, 4904562767517797033}), + xxhash64(makeArrayVector({{1, 2, 3, 4, 5}, {}, {1, 2, 3}}))); + + assertEqualVectors( + makeFlatVector({-6698625589789238999, 8420071140774656230, 42}), + xxhash64(makeNullableArrayVector( + {{1, std::nullopt}, {std::nullopt, 2}, {std::nullopt}}))); + + // Nested array. + { + using innerArrayType = std::vector>; + using outerArrayType = + std::vector>>>; + + innerArrayType a{1, std::nullopt, 2, 3}; + innerArrayType b{4, 5}; + innerArrayType c{6, 7, 8}; + outerArrayType row1{{a}, {b}}; + outerArrayType row2{{a}, {c}}; + outerArrayType row3{{{}}}; + outerArrayType row4{{{std::nullopt}}}; + auto arrayVector = makeNullableNestedArrayVector( + {{row1}, {row2}, {row3}, {row4}, std::nullopt}); + assertEqualVectors( + makeFlatVector( + {-6041664978295882827, -1052942565807509112, 42, 42, 42}), + xxhash64(arrayVector)); + } + + // Array of map. + { + using S = StringView; + using P = std::pair>; + std::vector

a{P{1, S{"a"}}, P{2, std::nullopt}}; + std::vector

b{P{3, S{"c"}}}; + std::vector>> data = {{a, b}}; + auto arrayVector = makeArrayOfMapVector(data); + assertEqualVectors( + makeFlatVector(std::vector{2880747995994395223}), + xxhash64(arrayVector)); + } + + // Array of row. + { + std::vector>>> + data = { + {{{1, "red"}}, {{2, "blue"}}, {{3, "green"}}}, + {{{1, "red"}}, std::nullopt, {{3, "green"}}}, + {std::nullopt}, + }; + auto arrayVector = makeArrayOfRowVector(data, ROW({INTEGER(), VARCHAR()})); + assertEqualVectors( + makeFlatVector( + {-4096178443626566478, -8973283971856715104, 42}), + xxhash64(arrayVector)); + } +} + +TEST_F(XxHash64Test, map) { + auto mapVector = makeMapVector( + {{{1, 17.0}, {2, 36.0}, {3, 8.0}, {4, 28.0}, {5, 24.0}, {6, 32.0}}}); + assertEqualVectors( + makeFlatVector(std::vector{-6303587702533348160}), + xxhash64(mapVector)); + + auto mapOfArrays = createMapOfArraysVector( + {{{1, {{1, 2, 3}}}}, {{2, {{4, 5, 6}}}}, {{3, {{7, 8, 9}}}}}); + assertEqualVectors( + makeFlatVector( + {-2103781794412908874, 1112887818746642853, 5787852566364222439}), + xxhash64(mapOfArrays)); + + auto mapWithNullArrays = createMapOfArraysVector( + {{{1, std::nullopt}}, {{2, {{4, 5, std::nullopt}}}}, {{3, {{}}}}}); + assertEqualVectors( + makeFlatVector( + {-7001672635703045582, 7217681953522744649, 3188756510806108107}), + xxhash64(mapWithNullArrays)); +} + +TEST_F(XxHash64Test, row) { + auto row = makeRowVector({ + makeFlatVector({1, 3}), + makeFlatVector({2, 4}), + }); + assertEqualVectors( + makeFlatVector({-8198029865082835910, 351067884137457704}), + xxhash64(row)); + + row = makeRowVector({ + makeNullableFlatVector({1, std::nullopt}), + makeNullableFlatVector({std::nullopt, 4}), + }); + assertEqualVectors( + makeFlatVector({-7001672635703045582, 404280023041566627}), + xxhash64(row)); + + row->setNull(0, true); + assertEqualVectors( + makeFlatVector({42, 404280023041566627}), xxhash64(row)); + + row->setNull(1, true); + assertEqualVectors(makeFlatVector({42, 42}), xxhash64(row)); +} + TEST_F(XxHash64Test, hashSeed) { auto xxhash64WithSeed = [&](int64_t seed, const std::optional& arg) { return evaluateOnce(