From e018f54d944e151a16a4b5d61aba520975677381 Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Fri, 6 Sep 2024 04:26:50 +0000 Subject: [PATCH] special judge for single row --- velox/row/CompactRow.cpp | 16 +++--- velox/row/CompactRow.h | 4 +- .../UnsafeRowSerializeBenchmark.cpp | 6 +-- velox/row/tests/CompactRowTest.cpp | 2 +- velox/serializers/CompactRowSerializer.cpp | 53 +++++++++++-------- 5 files changed, 47 insertions(+), 34 deletions(-) diff --git a/velox/row/CompactRow.cpp b/velox/row/CompactRow.cpp index 208448b58d974..9821f6e1d1a56 100644 --- a/velox/row/CompactRow.cpp +++ b/velox/row/CompactRow.cpp @@ -365,9 +365,7 @@ void CompactRow::serializeRow( vector_size_t offset, vector_size_t size, char* buffer, - const std::vector& bufferOffsets) { - VELOX_CHECK_EQ(bufferOffsets.size(), size); - + const size_t* bufferOffsets) { raw_vector rows(size); raw_vector nulls(size); if (decoded_.isIdentityMapping()) { @@ -379,11 +377,11 @@ void CompactRow::serializeRow( } // After serializing each column, the 'offsets' are updated accordingly. - std::vector offsets = bufferOffsets; + std::vector offsets(size); auto* base = reinterpret_cast(buffer); for (auto i = 0; i < size; ++i) { - nulls[i] = base + offsets[i]; - offsets[i] += rowNullBytes_; + nulls[i] = base + bufferOffsets[i]; + offsets[i] = bufferOffsets[i] + rowNullBytes_; } // Fixed-width and varchar/varbinary types are serialized using the vectorized @@ -644,7 +642,11 @@ void CompactRow::serialize( vector_size_t offset, vector_size_t size, char* buffer, - const std::vector& bufferOffsets) { + const size_t* bufferOffsets) { + if (size == 1) { + (void)serializeRow(offset, buffer + *bufferOffsets); + return; + } return serializeRow(offset, size, buffer, bufferOffsets); } diff --git a/velox/row/CompactRow.h b/velox/row/CompactRow.h index 638ee55472ed8..c8b9ef86980a2 100644 --- a/velox/row/CompactRow.h +++ b/velox/row/CompactRow.h @@ -48,7 +48,7 @@ class CompactRow { vector_size_t offset, vector_size_t size, char* buffer, - const std::vector& bufferOffsets); + const size_t* bufferOffsets); /// Deserializes multiple rows into a RowVector of specified type. The type /// must match the contents of the serialized rows. @@ -128,7 +128,7 @@ class CompactRow { vector_size_t offset, vector_size_t size, char* buffer, - const std::vector& bufferOffsets); + const size_t* bufferOffsets); const TypeKind typeKind_; DecodedVector decoded_; diff --git a/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp b/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp index aa487bb24de8d..0c895fb50a425 100644 --- a/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp +++ b/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp @@ -228,10 +228,10 @@ class SerializeBenchmark { CompactRow& compactRow, vector_size_t numRows, BufferPtr& buffer, - std::vector& rowSize, - std::vector& offsets) { + const std::vector& rowSize, + const std::vector& offsets) { auto rawBuffer = buffer->asMutable(); - compactRow.serialize(0, numRows, rawBuffer, offsets); + compactRow.serialize(0, numRows, rawBuffer, offsets.data()); std::vector serialized; for (auto i = 0; i < numRows; ++i) { diff --git a/velox/row/tests/CompactRowTest.cpp b/velox/row/tests/CompactRowTest.cpp index 6940aa10e0553..c56fb9bb3c792 100644 --- a/velox/row/tests/CompactRowTest.cpp +++ b/velox/row/tests/CompactRowTest.cpp @@ -92,7 +92,7 @@ class CompactRowTest : public ::testing::Test, public VectorTestBase { memset(rawBuffer, 0, totalSize); { std::vector serialized; - row.serialize(0, numRows, rawBuffer, offsets); + row.serialize(0, numRows, rawBuffer, offsets.data()); for (auto i = 0; i < numRows; ++i) { serialized.push_back( std::string_view(rawBuffer + offsets[i], rowSize[i])); diff --git a/velox/serializers/CompactRowSerializer.cpp b/velox/serializers/CompactRowSerializer.cpp index 9680465c121df..a2579bde9efda 100644 --- a/velox/serializers/CompactRowSerializer.cpp +++ b/velox/serializers/CompactRowSerializer.cpp @@ -40,21 +40,25 @@ class CompactRowVectorSerializer : public IterativeVectorSerializer { const folly::Range& ranges, Scratch& scratch) override { size_t totalSize = 0; - std::vector> rowSize(ranges.size()); + auto totalRows = std::accumulate( + ranges.begin(), + ranges.end(), + 0, + [](vector_size_t sum, const auto& range) { return sum + range.size; }); + row::CompactRow row(vector); + std::vector rowSize(totalRows); if (auto fixedRowSize = row::CompactRow::fixedRowSize(asRowType(vector->type()))) { - for (auto i = 0; i < ranges.size(); ++i) { - totalSize += (fixedRowSize.value() + sizeof(TRowSize)) * ranges[i].size; - rowSize[i].resize(ranges[i].size, fixedRowSize.value()); - } + totalSize += (fixedRowSize.value() + sizeof(TRowSize)) * totalRows; + std::fill(rowSize.begin(), rowSize.end(), fixedRowSize.value()); } else { - for (auto i = 0; i < ranges.size(); ++i) { - const auto& range = ranges[i]; - rowSize[i].resize(range.size); - for (auto j = 0; j < range.size; ++j) { - rowSize[i][j] = row.rowSize(range.begin + j); - totalSize += rowSize[i][j] + sizeof(TRowSize); + vector_size_t index = 0; + for (const auto& range : ranges) { + for (auto i = 0; i < range.size; ++i) { + rowSize[index] = row.rowSize(range.begin + i); + totalSize += rowSize[index] + sizeof(TRowSize); + index++; } } } @@ -68,17 +72,24 @@ class CompactRowVectorSerializer : public IterativeVectorSerializer { buffers_.push_back(std::move(buffer)); size_t offset = 0; - for (auto i = 0; i < ranges.size(); ++i) { - const auto& range = ranges[i]; - std::vector offsets(range.size); - for (auto j = 0; j < range.size; ++j) { - // Write raw size. Needs to be in big endian order. - *(TRowSize*)(rawBuffer + offset) = folly::Endian::big(rowSize[i][j]); - offsets[j] = offset + sizeof(TRowSize); - offset += rowSize[i][j] + sizeof(TRowSize); + vector_size_t index = 0; + for (const auto& range : ranges) { + if (range.size == 1) { + *(TRowSize*)(rawBuffer) = folly::Endian::big(rowSize[index++]); + static const auto offset = sizeof(TRowSize); + row.serialize(range.begin, range.size, rawBuffer, &offset); + } else { + raw_vector offsets(range.size); + for (auto i = 0; i < range.size; ++i) { + // Write raw size. Needs to be in big endian order. + *(TRowSize*)(rawBuffer + offset) = folly::Endian::big(rowSize[index]); + offsets[i] = offset + sizeof(TRowSize); + offset += rowSize[index] + sizeof(TRowSize); + index++; + } + // Write row data for all rows in range. + row.serialize(range.begin, range.size, rawBuffer, offsets.data()); } - // Write row data for all rows in range. - row.serialize(range.begin, range.size, rawBuffer, offsets); } }