add ddr_cap config to shard_quant_model in TGIF (#2539)

Summary: Pull Request resolved: #2539 add ddr_cap config to shard_quant_model in TGIF inference path, so that we can fully utilize the CPU memory Reviewed By: ljyuva83 Differential Revision: D65451305 fbshipit-source-id: a77a5457283d7993d4b68b18bb7736c8cf4d7f64
pytorch · Nov 5, 2024 · 63d604a · 63d604a
1 parent 786bb1e
commit 63d604a
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 2 deletions.
diff --git a/torchrec/inference/include/torchrec/inference/GPUExecutor.h b/torchrec/inference/include/torchrec/inference/GPUExecutor.h
@@ -32,7 +32,7 @@
 #include "torchrec/inference/BatchingQueue.h"
 #include "torchrec/inference/Observer.h"
 #include "torchrec/inference/ResultSplit.h"
-#include "torchrec/inference/include/torchrec/inference/Observer.h"
+#include "torchrec/inference/include/torchrec/inference/Observer.h" // @manual
 
 namespace torchrec {
 

diff --git a/torchrec/inference/inference_legacy/src/GPUExecutor.cpp b/torchrec/inference/inference_legacy/src/GPUExecutor.cpp
@@ -25,7 +25,7 @@
 #include <folly/stop_watch.h>
 #include <gflags/gflags.h>
 #include <glog/logging.h>
-#include <torch/csrc/autograd/profiler.h>
+#include <torch/csrc/autograd/profiler.h> // @manual
 
 // remove this after we switch over to multipy externally for torchrec
 #ifdef FBCODE_CAFFE2

diff --git a/torchrec/inference/modules.py b/torchrec/inference/modules.py
@@ -488,6 +488,7 @@ def shard_quant_model(
     sharders: Optional[List[ModuleSharder[torch.nn.Module]]] = None,
     device_memory_size: Optional[int] = None,
     constraints: Optional[Dict[str, ParameterConstraints]] = None,
+    ddr_cap: Optional[int] = None,
 ) -> Tuple[torch.nn.Module, ShardingPlan]:
     """
     Shard a quantized TorchRec model, used for generating the most optimal model for inference and
@@ -557,6 +558,7 @@ def shard_quant_model(
         compute_device=compute_device,
         local_world_size=world_size,
         hbm_cap=hbm_cap,
+        ddr_cap=ddr_cap,
     )
     batch_size = 1
     model_plan = trec_dist.planner.EmbeddingShardingPlanner(