From fbcf9fc75413d4d60cb57ac840fd034e6a29caab Mon Sep 17 00:00:00 2001 From: Shuai Yang Date: Thu, 24 Oct 2024 11:56:43 -0700 Subject: [PATCH] Update the tests Summary: We updated the `torch._dynamo.config.optimize_ddp` from `python_reducer` to `python_reducer_without_compiled_forward`, which will generate different number of compiled autograd graph. This is expected. We thus update the test. Differential Revision: D64815497 --- .../tests/test_train_pipelines.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/torchrec/distributed/train_pipeline/tests/test_train_pipelines.py b/torchrec/distributed/train_pipeline/tests/test_train_pipelines.py index 8ff170992..d5cce5d07 100644 --- a/torchrec/distributed/train_pipeline/tests/test_train_pipelines.py +++ b/torchrec/distributed/train_pipeline/tests/test_train_pipelines.py @@ -862,7 +862,7 @@ def custom_model_fwd( batch_size = pred.size(0) return loss, pred.expand(batch_size * 2, -1) - pipeline = TrainPipelineSparseDist( + pipeline = self.pipeline_class( model=sharded_model_pipelined, optimizer=optim_pipelined, device=self.device, @@ -2201,17 +2201,21 @@ def gpu_preproc(x: StageOut) -> StageOut: class TrainPipelineSparseDistCompAutogradTest(TrainPipelineSparseDistTest): + orig_optimize_ddp: Union[bool, str] = torch._dynamo.config.optimize_ddp + def setUp(self) -> None: super().setUp() + torch.manual_seed(42) self.pipeline_class = TrainPipelineSparseDistCompAutograd torch._dynamo.reset() counters["compiled_autograd"].clear() # Compiled Autograd don't work with Anomaly Mode torch.autograd.set_detect_anomaly(False) + torch._dynamo.config.optimize_ddp = "python_reducer_without_compiled_forward" def tearDown(self) -> None: - # Every single test has two captures, one for forward and one for backward - self.assertEqual(counters["compiled_autograd"]["captures"], 2) + torch._dynamo.config.optimize_ddp = self.orig_optimize_ddp + self.assertEqual(counters["compiled_autograd"]["captures"], 3) return super().tearDown() @unittest.skip("Dynamo only supports FSDP with use_orig_params=True") @@ -2219,3 +2223,14 @@ def tearDown(self) -> None: @given(execute_all_batches=st.booleans()) def test_pipelining_fsdp_pre_trace(self, execute_all_batches: bool) -> None: super().test_pipelining_fsdp_pre_trace() + + @unittest.skip( + "TrainPipelineSparseDistTest.test_equal_to_non_pipelined was called from multiple different executors, which makes counters['compiled_autograd']['captures'] uncertain" + ) + def test_equal_to_non_pipelined( + self, + sharding_type: str, + kernel_type: str, + execute_all_batches: bool, + ) -> None: + super().test_equal_to_non_pipelined()