Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix MinGPT Example - Data Class Default Field Before Required Field -…
… Allow for Non-Distributed Training Depending on Env (#707) Summary: Pull Request resolved: #707 ```buck2 run :mingpt_example ``` fails as the dataclass fields are initialized in the wrong order. This script also fails on a CPU machine as training strategy provided appears to require a process group. With a single process the script fails with: ``` File "/usr/local/fbcode/platform010/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/local/fbcode/platform010/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/examples/mingpt/main.py", line 190, in <module> main(get_args()) File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/examples/mingpt/main.py", line 141, in main my_unit = MinGPTUnit( File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/framework/auto_unit.py", line 119, in __call__ x = super().__call__(*args, **kwargs) File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/examples/mingpt/main.py", line 75, in __init__ super().__init__( File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/framework/auto_unit.py", line 480, in __init__ self.module: torch.nn.Module = prepare_module( File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/utils/prepare_module.py", line 294, in prepare_module module = prepare_ddp(module, device, strategy) File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torchtnt/utils/prepare_module.py", line 178, in prepare_ddp module = DDP(module, device_ids=device_ids, **params_dict) File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torch/nn/parallel/distributed.py", line 731, in __init__ self.process_group = _get_default_group() File "/data/users/ethenderson/fbsource/buck-out/v2/gen/fbcode/6481d33c0dd0a120/torchtnt/examples/mingpt/__mingpt_example__/mingpt_example#link-tree/torch/distributed/distributed_c10d.py", line 1001, in _get_default_group raise ValueError( ValueError: Default process group has not been initialized, please make sure to call init_process_group. ``` Reviewed By: JKSenthil Differential Revision: D53872532 fbshipit-source-id: 2a793707cc3fa430bf362fb75ce184acf55f550f
- Loading branch information