diff --git a/velox/common/base/Counters.cpp b/velox/common/base/Counters.cpp index cb431b827b3a5..8d93226dd5e40 100644 --- a/velox/common/base/Counters.cpp +++ b/velox/common/base/Counters.cpp @@ -421,5 +421,11 @@ void registerVeloxMetrics() { // flushed due to memory reclaiming. DEFINE_METRIC( kMetricFileWriterEarlyFlushedRawBytes, facebook::velox::StatType::SUM); + + // The current spilling memory usage in bytes. + DEFINE_METRIC(kMetricSpillMemoryBytes, facebook::velox::StatType::AVG); + + // The peak spilling memory usage in bytes. + DEFINE_METRIC(kMetricSpillPeakMemoryBytes, facebook::velox::StatType::AVG); } } // namespace facebook::velox diff --git a/velox/common/base/Counters.h b/velox/common/base/Counters.h index 522bc4f484950..ccd31ec877a64 100644 --- a/velox/common/base/Counters.h +++ b/velox/common/base/Counters.h @@ -126,6 +126,12 @@ constexpr folly::StringPiece kMetricSpillFlushTimeMs{ constexpr folly::StringPiece kMetricSpillWriteTimeMs{ "velox.spill_write_time_ms"}; +constexpr folly::StringPiece kMetricSpillMemoryBytes{ + "velox.spill_memory_bytes"}; + +constexpr folly::StringPiece kMetricSpillPeakMemoryBytes{ + "velox.spill_peak_memory_bytes"}; + constexpr folly::StringPiece kMetricFileWriterEarlyFlushedRawBytes{ "velox.file_writer_early_flushed_raw_bytes"}; diff --git a/velox/common/base/PeriodicStatsReporter.cpp b/velox/common/base/PeriodicStatsReporter.cpp index 23fdfc2309d70..b444f7bcdbcbd 100644 --- a/velox/common/base/PeriodicStatsReporter.cpp +++ b/velox/common/base/PeriodicStatsReporter.cpp @@ -79,6 +79,10 @@ void PeriodicStatsReporter::start() { "report_arbitrator_stats", [this]() { reportArbitratorStats(); }, options_.arbitratorStatsIntervalMs); + addTask( + "report_spill_stats", + [this]() { reportSpillStats(); }, + options_.spillStatsIntervalMs); } void PeriodicStatsReporter::stop() { @@ -228,4 +232,13 @@ void PeriodicStatsReporter::reportCacheStats() { lastCacheStats_ = cacheStats; } +void PeriodicStatsReporter::reportSpillStats() { + const auto spillMemoryStats = velox::memory::spillMemoryPool()->stats(); + LOG(INFO) << "Spill memory usage: current[" + << velox::succinctBytes(spillMemoryStats.currentBytes) << "] peak[" + << velox::succinctBytes(spillMemoryStats.peakBytes) << "]"; + RECORD_METRIC_VALUE(kMetricSpillMemoryBytes, spillMemoryStats.currentBytes); + RECORD_METRIC_VALUE(kMetricSpillPeakMemoryBytes, spillMemoryStats.peakBytes); +} + } // namespace facebook::velox diff --git a/velox/common/base/PeriodicStatsReporter.h b/velox/common/base/PeriodicStatsReporter.h index 2e5a01a15001c..cced0f857b59f 100644 --- a/velox/common/base/PeriodicStatsReporter.h +++ b/velox/common/base/PeriodicStatsReporter.h @@ -50,6 +50,8 @@ class PeriodicStatsReporter { const memory::MemoryArbitrator* arbitrator{nullptr}; uint64_t arbitratorStatsIntervalMs{60'000}; + uint64_t spillStatsIntervalMs{60'000}; + std::string toString() const { return fmt::format( "allocatorStatsIntervalMs:{}, cacheStatsIntervalMs:{}, " @@ -90,6 +92,7 @@ class PeriodicStatsReporter { void reportCacheStats(); void reportAllocatorStats(); void reportArbitratorStats(); + void reportSpillStats(); const velox::memory::MemoryAllocator* const allocator_{nullptr}; const velox::cache::AsyncDataCache* const cache_{nullptr}; diff --git a/velox/common/base/tests/StatsReporterTest.cpp b/velox/common/base/tests/StatsReporterTest.cpp index 2c21f6bc81d3b..9e736700fd8ad 100644 --- a/velox/common/base/tests/StatsReporterTest.cpp +++ b/velox/common/base/tests/StatsReporterTest.cpp @@ -264,6 +264,7 @@ TEST_F(PeriodicStatsReporterTest, basic) { options.allocatorStatsIntervalMs = 4'000; options.arbitrator = &arbitrator; options.arbitratorStatsIntervalMs = 4'000; + options.spillStatsIntervalMs = 4'000; PeriodicStatsReporter periodicReporter(options); periodicReporter.start(); @@ -293,6 +294,8 @@ TEST_F(PeriodicStatsReporterTest, basic) { ASSERT_EQ(counterMap.count(kMetricAllocatedMemoryBytes.str()), 1); ASSERT_EQ(counterMap.count(kMetricMmapDelegatedAllocBytes.str()), 1); ASSERT_EQ(counterMap.count(kMetricMmapExternalMappedBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSpillMemoryBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSpillPeakMemoryBytes.str()), 1); // Check deltas are not reported ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumHits.str()), 0); ASSERT_EQ(counterMap.count(kMetricMemoryCacheHitBytes.str()), 0); @@ -321,7 +324,7 @@ TEST_F(PeriodicStatsReporterTest, basic) { ASSERT_EQ(counterMap.count(kMetricSsdCacheRegionsEvicted.str()), 0); ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutEntries.str()), 0); ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutRegions.str()), 0); - ASSERT_EQ(counterMap.size(), 20); + ASSERT_EQ(counterMap.size(), 22); // Update stats auto newSsdStats = std::make_shared(); @@ -391,7 +394,7 @@ TEST_F(PeriodicStatsReporterTest, basic) { ASSERT_EQ(counterMap.count(kMetricSsdCacheRegionsEvicted.str()), 1); ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutEntries.str()), 1); ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutRegions.str()), 1); - ASSERT_EQ(counterMap.size(), 47); + ASSERT_EQ(counterMap.size(), 49); } TEST_F(PeriodicStatsReporterTest, globalInstance) { diff --git a/velox/docs/monitoring/metrics.rst b/velox/docs/monitoring/metrics.rst index 273d13246c913..4b63f0346d850 100644 --- a/velox/docs/monitoring/metrics.rst +++ b/velox/docs/monitoring/metrics.rst @@ -414,6 +414,12 @@ Spilling * - file_writer_early_flushed_raw_bytes - Sum - Number of bytes pre-maturely flushed from file writers because of memory reclaiming. + * - spill_memory_bytes + - Avg + - The current spilling memory usage in bytes. + * - spill_peak_memory_bytes + - Avg + - The peak spilling memory usage in bytes. Hive Connector --------------