From e80028f88f07553c18c4a239cde9d49d03fc65de Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Tue, 20 Aug 2024 02:34:50 +0000 Subject: [PATCH] address comments --- velox/row/CompactRow.cpp | 41 ++++++++++--------- velox/row/CompactRow.h | 22 +++++----- .../benchmark/UnsafeRowSerializeBenchmark.cpp | 3 +- velox/row/tests/CompactRowTest.cpp | 3 +- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/velox/row/CompactRow.cpp b/velox/row/CompactRow.cpp index eaa05ee724103..481eb2d80a63c 100644 --- a/velox/row/CompactRow.cpp +++ b/velox/row/CompactRow.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ #include "velox/row/CompactRow.h" +#include "velox/common/base/RawVector.h" #include "velox/vector/FlatVector.h" namespace facebook::velox::row { @@ -326,25 +327,26 @@ int32_t CompactRow::serializeRow(vector_size_t index, char* buffer) { } void CompactRow::serializeRow( - const IndexRange& indexRange, + vector_size_t offset, + vector_size_t size, char* buffer, - std::vector& offsets) { - VELOX_CHECK_EQ(offsets.size(), indexRange.size); + std::vector& bufferOffsets) { + VELOX_CHECK_EQ(bufferOffsets.size(), size); - raw_vector rows(indexRange.size); - raw_vector nulls(indexRange.size); + raw_vector rows(size); + raw_vector nulls(size); if (decoded_.isIdentityMapping()) { - std::iota(rows.begin(), rows.end(), indexRange.begin); + std::iota(rows.begin(), rows.end(), offset); } else { - for (auto i = 0; i < indexRange.size; ++i) { - rows[i] = decoded_.index(indexRange.begin + i); + for (auto i = 0; i < size; ++i) { + rows[i] = decoded_.index(offset + i); } } auto* base = reinterpret_cast(buffer); - for (auto i = 0; i < indexRange.size; ++i) { - nulls[i] = base + offsets[i]; - offsets[i] += rowNullBytes_; + for (auto i = 0; i < size; ++i) { + nulls[i] = base + bufferOffsets[i]; + bufferOffsets[i] += rowNullBytes_; } for (auto childIdx = 0; childIdx < children_.size(); ++childIdx) { @@ -361,17 +363,17 @@ void CompactRow::serializeRow( child.valueBytes_, nulls, buffer, - offsets); + bufferOffsets); } else { auto mayHaveNulls = child.decoded_.mayHaveNulls(); - for (auto i = 0; i < indexRange.size; ++i) { + for (auto i = 0; i < size; ++i) { if (mayHaveNulls && child.isNullAt(rows[i])) { bits::setBit(nulls[i], childIdx, true); } else { // Write non-null variable-width value. - auto size = - child.serializeVariableWidth(rows[i], buffer + offsets[i]); - offsets[i] += size; + auto bytes = + child.serializeVariableWidth(rows[i], buffer + bufferOffsets[i]); + bufferOffsets[i] += bytes; } } } @@ -600,10 +602,11 @@ int32_t CompactRow::serialize(vector_size_t index, char* buffer) { } void CompactRow::serialize( - const IndexRange& indexRange, + vector_size_t offset, + vector_size_t size, char* buffer, - std::vector& offsets) { - return serializeRow(indexRange, buffer, offsets); + std::vector& bufferOffsets) { + return serializeRow(offset, size, buffer, bufferOffsets); } void CompactRow::serializeFixedWidth(vector_size_t index, char* buffer) { diff --git a/velox/row/CompactRow.h b/velox/row/CompactRow.h index 6f797f26f27b6..a6ab7793e069b 100644 --- a/velox/row/CompactRow.h +++ b/velox/row/CompactRow.h @@ -17,7 +17,6 @@ #include "velox/vector/ComplexVector.h" #include "velox/vector/DecodedVector.h" -#include "velox/vector/VectorStream.h" namespace facebook::velox::row { @@ -37,14 +36,15 @@ class CompactRow { /// 'buffer' must have sufficient capacity and set to all zeros. int32_t serialize(vector_size_t index, char* buffer); - /// Serializes rows at specified index range into 'buffer' at given offset. - /// 'buffer' must have sufficient capacity and set to all zeros. - /// The size of 'offsets' and indexRange must be the same. - /// The value of 'offsets' will be updated by the actual bytes written. + /// Serializes rows in range [offset, offset + size) into 'buffer' at given + /// 'bufferOffsets'. 'buffer' must have sufficient capacity and set to all + /// zeros. The size of 'bufferOffsets' and 'size' must be the same. + /// 'bufferOffsets' will be updated by the actual bytes written. void serialize( - const IndexRange& indexRange, + vector_size_t offset, + vector_size_t size, char* buffer, - std::vector& offsets); + std::vector& bufferOffsets); /// Deserializes multiple rows into a RowVector of specified type. The type /// must match the contents of the serialized rows. @@ -118,11 +118,13 @@ class CompactRow { /// Serializes struct value to buffer. Value must not be null. int32_t serializeRow(vector_size_t index, char* buffer); - // Serializes struct value to buffer. Value must not be null. + /// Serializes struct values in range [offset, offset + size) to buffer. + /// Value must not be null. void serializeRow( - const IndexRange& indexRange, + vector_size_t offset, + vector_size_t size, char* buffer, - std::vector& offsets); + std::vector& bufferOffsets); const TypeKind typeKind_; DecodedVector decoded_; diff --git a/velox/row/benchmark/UnsafeRowSerializeBenchmark.cpp b/velox/row/benchmark/UnsafeRowSerializeBenchmark.cpp index de54874c63511..5cea04bc20c8a 100644 --- a/velox/row/benchmark/UnsafeRowSerializeBenchmark.cpp +++ b/velox/row/benchmark/UnsafeRowSerializeBenchmark.cpp @@ -225,8 +225,7 @@ class SerializeBenchmark { BufferPtr& buffer, std::vector& offsets) { auto rawBuffer = buffer->asMutable(); - IndexRange indexRange{0, numRows}; - compactRow.serialize(indexRange, rawBuffer, offsets); + compactRow.serialize(0, numRows, rawBuffer, offsets); VELOX_CHECK_EQ(buffer->size(), offsets.back()); std::vector serialized; diff --git a/velox/row/tests/CompactRowTest.cpp b/velox/row/tests/CompactRowTest.cpp index b7bf53c23147b..bfc0a3c80c2e2 100644 --- a/velox/row/tests/CompactRowTest.cpp +++ b/velox/row/tests/CompactRowTest.cpp @@ -49,7 +49,6 @@ class CompactRowTest : public ::testing::Test, public VectorTestBase { auto rowType = asRowType(data->type()); auto numRows = data->size(); - IndexRange range{0, numRows}; std::vector offsets(numRows); CompactRow row(data); @@ -90,7 +89,7 @@ class CompactRowTest : public ::testing::Test, public VectorTestBase { memset(rawBuffer, 0, totalSize); { std::vector serialized; - row.serialize(range, rawBuffer, offsets); + row.serialize(0, numRows, rawBuffer, offsets); serialized.push_back(std::string_view(rawBuffer, offsets[0])); for (auto i = 1; i < numRows; ++i) { serialized.push_back(std::string_view(