Register output dist for inference modules and add set_device helper (p…

…ytorch#1323) Summary: Pull Request resolved: pytorch#1323 - add `set_device` to EmbeddingsAllToOne and SequenceEmbeddingsAllToOne (the infernece output dist modules), so you have the option to set these modules from external runtime - Register the output dist of torchrec inference sharded modules so you can actually find these modules from the named_modules of the top level module Reviewed By: sayitmemory Differential Revision: D48249155 fbshipit-source-id: c0fc405e46a12e80ddc17b91637b0130a2dc697a
s4ayub · Aug 11, 2023 · d274ac8 · d274ac8
1 parent 9a6c5ee
commit d274ac8
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 4 deletions.
diff --git a/torchrec/distributed/dist_data.py b/torchrec/distributed/dist_data.py
@@ -714,6 +714,12 @@ def __init__(
         self._world_size = world_size
         self._cat_dim = cat_dim
 
+    # This method can be used by an inference runtime to update the
+    # device information for this module.
+    @torch.jit.export
+    def set_device(self, device_str: str) -> None:
+        self._device = torch.device(device_str)
+
     def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor:
         """
         Performs AlltoOne operation on pooled/sequence embeddings tensors.
@@ -762,6 +768,12 @@ def __init__(
         self._device = device
         self._world_size = world_size
 
+    # This method can be used by an inference runtime to update the
+    # device information for this module.
+    @torch.jit.export
+    def set_device(self, device_str: str) -> None:
+        self._device = torch.device(device_str)
+
     def forward(self, tensors: List[torch.Tensor]) -> List[torch.Tensor]:
         """
         Performs AlltoOne operation on pooled embeddings tensors.

diff --git a/torchrec/distributed/quant_embedding.py b/torchrec/distributed/quant_embedding.py
@@ -389,7 +389,9 @@ def __init__(
         self._input_dists: List[nn.Module] = []
         self._lookups: List[nn.Module] = []
         self._create_lookups(fused_params, device)
-        self._output_dists: List[nn.Module] = []
+
+        # Ensure output dist is set for post processing from an inference runtime (ie. setting device from runtime).
+        self._output_dists: torch.nn.ModuleList = torch.nn.ModuleList()
 
         self._feature_splits: List[int] = []
         self._features_order: List[int] = []

diff --git a/torchrec/distributed/quant_embeddingbag.py b/torchrec/distributed/quant_embeddingbag.py
@@ -124,7 +124,10 @@ def __init__(
         self._input_dists: List[nn.Module] = []
         self._lookups: List[nn.Module] = []
         self._create_lookups(fused_params, device)
-        self._output_dists: List[nn.Module] = []
+
+        # Ensure output dist is set for post processing from an inference runtime (ie. setting device from runtime).
+        self._output_dists: torch.nn.ModuleList = torch.nn.ModuleList()
+
         self._embedding_names: List[str] = []
         self._embedding_dims: List[int] = []
         self._feature_splits: List[int] = []

diff --git a/torchrec/distributed/sharding/tw_sequence_sharding.py b/torchrec/distributed/sharding/tw_sequence_sharding.py
@@ -183,7 +183,7 @@ def forward(
         Returns:
             Awaitable[torch.Tensor]: awaitable of sequence embeddings.
         """
-        return self._dist.forward(local_embs)
+        return self._dist(local_embs)
 
 
 class InferTwSequenceEmbeddingSharding(

diff --git a/torchrec/distributed/sharding/tw_sharding.py b/torchrec/distributed/sharding/tw_sharding.py
@@ -419,7 +419,7 @@ def forward(
             Awaitable[torch.Tensor]: awaitable of merged pooled embedding tensor.
         """
 
-        return self._dist.forward(local_embs)
+        return self._dist(local_embs)
 
 
 class InferTwEmbeddingSharding(