allocate GPU memory directly

facebookresearch · Jun 9, 2024 · 1fa3fb1 · 1fa3fb1
1 parent 00772ed
commit 1fa3fb1
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/fairscale/nn/misc/flatten_params_wrapper.py b/fairscale/nn/misc/flatten_params_wrapper.py
@@ -431,7 +431,7 @@ def _unflatten_params_as_views(self) -> None:
 
         if self.optimize_backward_concat and self.fp32_grads is None:
             total_numels = sum([torch.numel(p) for p in param_views])
-            self.fp32_grads = torch.zeros(total_numels, dtype=torch.float32).cuda()
+            self.fp32_grads = torch.zeros(total_numels, dtype=torch.float32, device=torch.cuda.current_device())
 
 
         # Save param views for easy access if anyone still wants to access