Skip to content

Commit

Permalink
Make documentations format aligned with convetion
Browse files Browse the repository at this point in the history
  • Loading branch information
tanjialiang committed May 12, 2024
1 parent 2c98308 commit c2a3f18
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 82 deletions.
28 changes: 14 additions & 14 deletions velox/vector/ComplexVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ class RowVector : public BaseVector {
return childrenSize_;
}

// Resize a row vector by adding trailing nulls to the top level row without
// resizing children.
// Caller should ensure that the vector is unique before calling this method.
/// Resize a row vector by adding trailing nulls to the top level row without
/// resizing children.
/// Caller should ensure that the vector is unique before calling this method.
void appendNulls(vector_size_t numberOfRows);

/// Get the child vector at a given offset.
Expand Down Expand Up @@ -296,8 +296,8 @@ class RowVector : public BaseVector {
VectorPtr rawVectorForBatchReader_;
};

// Common parent class for ARRAY and MAP vectors. Contains 'offsets' and
// 'sizes' data and provide manipulations on them.
/// Common parent class for ARRAY and MAP vectors. Contains 'offsets' and
/// 'sizes' data and provide manipulations on them.
struct ArrayVectorBase : BaseVector {
ArrayVectorBase(const ArrayVectorBase&) = delete;
const BufferPtr& offsets() const {
Expand Down Expand Up @@ -342,8 +342,8 @@ struct ArrayVectorBase : BaseVector {
BaseVector::resize(size, setNotNull);
}

// Its the caller responsibility to make sure that `offsets_` and `sizes_` are
// safe to write at index i, i.ex not shared, or not large enough.
/// Its the caller responsibility to make sure that `offsets_` and `sizes_`
/// are safe to write at index i, i.ex not shared, or not large enough.
void
setOffsetAndSize(vector_size_t i, vector_size_t offset, vector_size_t size) {
DCHECK_LT(i, BaseVector::length_);
Expand Down Expand Up @@ -632,17 +632,17 @@ class MapVector : public ArrayVectorBase {

std::string toString(vector_size_t index) const override;

// Sorts all maps smallest key first. This enables linear time
// comparison and log time lookup. This may only be done if there
// are no other references to 'map'. Checks that 'map' is uniquely
// referenced. This is guaranteed after construction or when
// retrieving values from aggregation or join row containers.
/// Sorts all maps smallest key first. This enables linear time
/// comparison and log time lookup. This may only be done if there
/// are no other references to 'map'. Checks that 'map' is uniquely
/// referenced. This is guaranteed after construction or when
/// retrieving values from aggregation or join row containers.
static void canonicalize(
const std::shared_ptr<MapVector>& map,
bool useStableSort = false);

// Returns indices into the map at 'index' such
// that keys[indices[i]] < keys[indices[i + 1]].
/// Returns indices into the map at 'index' such
/// that keys[indices[i]] < keys[indices[i + 1]].
std::vector<vector_size_t> sortedKeyIndices(vector_size_t index) const;

void ensureWritable(const SelectivityVector& rows) override;
Expand Down
32 changes: 14 additions & 18 deletions velox/vector/ConstantVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ class ConstantVector final : public SimpleVector<T> {
}
}

// Creates constant vector with value coming from 'index' element of the
// 'base' vector. Base vector can be flat or lazy vector. Base vector cannot
// be a constant or dictionary vector. Use BaseVector::wrapInConstant to
// automatically peel off encodings of the base vector.
//
// If base vector is lazy and has not been loaded yet, loading will be delayed
// until loadedVector() is called.
/// Creates constant vector with value coming from 'index' element of the
/// 'base' vector. Base vector can be flat or lazy vector. Base vector cannot
/// be a constant or dictionary vector. Use BaseVector::wrapInConstant to
/// automatically peel off encodings of the base vector.
///
/// If base vector is lazy and has not been loaded yet, loading will be
/// delayed until loadedVector() is called.
ConstantVector(
velox::memory::MemoryPool* pool,
vector_size_t length,
Expand Down Expand Up @@ -184,13 +184,9 @@ class ConstantVector final : public SimpleVector<T> {
return &value_;
}

/**
* Loads a 256bit vector of data at the virtual byteOffset given
* Note this method is implemented on each vector type, but is intentionally
* not virtual for performance reasons
*
* @param byteOffset - the byte offset to laod from
*/
/// Loads a 256bit vector of data at the virtual byteOffset given
/// Note this method is implemented on each vector type, but is intentionally
/// not virtual for performance reasons
xsimd::batch<T> loadSIMDValueBufferAt(size_t /* byteOffset */) const {
VELOX_DCHECK(initialized_);
return valueBuffer_;
Expand Down Expand Up @@ -247,8 +243,8 @@ class ConstantVector final : public SimpleVector<T> {
kDummy);
}

// Base vector if isScalar() is false (e.g. complex type vector) or if base
// vector is a lazy vector that hasn't been loaded yet.
/// Base vector if isScalar() is false (e.g. complex type vector) or if base
/// vector is a lazy vector that hasn't been loaded yet.
const VectorPtr& valueVector() const override {
return valueVector_;
}
Expand All @@ -257,8 +253,8 @@ class ConstantVector final : public SimpleVector<T> {
return valueVector_;
}

// Index of the element of the base vector that determines the value of this
// constant vector.
/// Index of the element of the base vector that determines the value of this
/// constant vector.
vector_size_t index() const {
return index_;
}
Expand Down
72 changes: 31 additions & 41 deletions velox/vector/FlatVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@

namespace facebook::velox {

// FlatVector is marked final to allow for inlining on virtual methods called
// on a pointer that has the static type FlatVector<T>; this can be a
// significant performance win when these methods are called in loops.
/// FlatVector is marked final to allow for inlining on virtual methods called
/// on a pointer that has the static type FlatVector<T>; this can be a
/// significant performance win when these methods are called in loops.
template <typename T>
class FlatVector final : public SimpleVector<T> {
public:
Expand All @@ -43,9 +43,9 @@ class FlatVector final : public SimpleVector<T> {
std::is_same_v<T, int16_t> || std::is_same_v<T, int8_t> ||
std::is_same_v<T, bool> || std::is_same_v<T, size_t>);

// Minimum size of a string buffer. 32 KB value is chosen to ensure that a
// single buffer is sufficient for a "typical" vector: 1K rows, medium size
// strings.
/// Minimum size of a string buffer. 32 KB value is chosen to ensure that a
/// single buffer is sufficient for a "typical" vector: 1K rows, medium size
/// strings.
static constexpr size_t kInitialStringSize =
(32 * 1024) - sizeof(AlignedBuffer);
/// Maximum size of a string buffer to re-use (see
Expand Down Expand Up @@ -121,28 +121,23 @@ class FlatVector final : public SimpleVector<T> {

std::unique_ptr<SimpleVector<uint64_t>> hashAll() const override;

/**
* Loads a SIMD vector of data at the virtual byteOffset given
* Note this method is implemented on each vector type, but is intentionally
* not virtual for performance reasons
*
* @param byteOffset - the byte offset to load from
*/
/// Loads a SIMD vector of data at the virtual byteOffset given
/// Note this method is implemented on each vector type, but is intentionally
/// not virtual for performance reasons.
/// 'index' indicates the byte offset to load from
xsimd::batch<T> loadSIMDValueBufferAt(size_t index) const;

// dictionary vector makes internal usehere for SIMD functions
/// dictionary vector makes internal usehere for SIMD functions
template <typename X>
friend class DictionaryVector;

// Sequence vector needs to get shared_ptr to value array
/// Sequence vector needs to get shared_ptr to value array
template <typename X>
friend class SequenceVector;

/**
* @return a smart pointer holding the values for
* this vector. This is used during execution to process over the subset of
* values when possible.
*/
/// Returns a smart pointer holding the values for
/// this vector. This is used during execution to process over the subset of
/// values when possible.
const BufferPtr& values() const override {
return values_;
}
Expand Down Expand Up @@ -188,16 +183,12 @@ class FlatVector final : public SimpleVector<T> {
return values_;
}

/**
* @return true if this number of comparison values on this vector should use
* simd for equality constraint filtering, false to use standard set
* examination filtering.
*/
/// Returns true if this number of comparison values on this vector should use
/// simd for equality constraint filtering, false to use standard set
/// examination filtering.
bool useSimdEquality(size_t numCmpVals) const;

/**
* @return the raw values of this vector as a continuous array.
*/
/// Returns the raw values of this vector as a continuous array.
const T* rawValues() const;

const void* valuesAsVoid() const override {
Expand All @@ -209,8 +200,8 @@ class FlatVector final : public SimpleVector<T> {
return reinterpret_cast<const As*>(rawValues_);
}

// Bool uses compact representation, use mutableRawValues<uint64_t> and
// bits::setBit instead.
/// Bool uses compact representation, use mutableRawValues<uint64_t> and
/// bits::setBit instead.
T* mutableRawValues() {
if (!(values_ && values_->isMutable())) {
BufferPtr newValues =
Expand Down Expand Up @@ -402,12 +393,10 @@ class FlatVector final : public SimpleVector<T> {
return size;
}

/**
* Used for vectors of type VARCHAR and VARBINARY to hold data referenced by
* StringView's. It is safe to share these among multiple vectors. These
* buffers are append only. It is allowed to append data, but it is prohibited
* to modify already written data.
*/
/// Used for vectors of type VARCHAR and VARBINARY to hold data referenced by
/// StringView's. It is safe to share these among multiple vectors. These
/// buffers are append only. It is allowed to append data, but it is
/// prohibited to modify already written data.
const std::vector<BufferPtr>& stringBuffers() const {
return stringBuffers_;
}
Expand Down Expand Up @@ -447,13 +436,14 @@ class FlatVector final : public SimpleVector<T> {
return true;
}

// Acquire ownership for any string buffer that appears in source, the
// function does nothing if the vector type is not Varchar or Varbinary.
// The function throws if input encoding is lazy.
/// Acquire ownership for any string buffer that appears in source, the
/// function does nothing if the vector type is not Varchar or Varbinary.
/// The function throws if input encoding is lazy.
void acquireSharedStringBuffers(const BaseVector* source);

// Acquire ownership for any string buffer that appears in source or any
// of its children recursively. The function throws if input encoding is lazy.
/// Acquire ownership for any string buffer that appears in source or any
/// of its children recursively. The function throws if input encoding is
/// lazy.
void acquireSharedStringBuffersRecursive(const BaseVector* source);

/// This API is available only for string vectors (T = StringView).
Expand Down
18 changes: 9 additions & 9 deletions velox/vector/SimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,15 @@ struct AsciiInfo {
folly::Synchronized<SelectivityVector> asciiComputedRows_;
};

// This class abstracts over various Columnar Storage Formats such that Velox
// can select the most appropriate one on a per field / per block basis.
// The goal is to use the most appropriate type to optimize for:
// - Lazy deserialization if desired.
// - serialization / rehydration cost, ideally we use a smart view into the
// data without fully rehydrating.
// - serialized bytes
// - cpu cost of filtering
// - optimize aggregation of sequential values
/// This class abstracts over various Columnar Storage Formats such that Velox
/// can select the most appropriate one on a per field / per block basis.
/// The goal is to use the most appropriate type to optimize for:
/// - Lazy deserialization if desired.
/// - serialization / rehydration cost, ideally we use a smart view into the
/// data without fully rehydrating.
/// - serialized bytes
/// - cpu cost of filtering
/// - optimize aggregation of sequential values
template <typename T>
class SimpleVector : public BaseVector {
public:
Expand Down

0 comments on commit c2a3f18

Please sign in to comment.