From c7e8c73edd7a3025c6ce55a05cc779f6057f13ad Mon Sep 17 00:00:00 2001 From: Michael Benayoun Date: Fri, 20 Sep 2024 17:32:43 +0200 Subject: [PATCH] Remove obsolete code --- optimum/neuron/distributed/base.py | 3 --- optimum/neuron/distributed/utils.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/optimum/neuron/distributed/base.py b/optimum/neuron/distributed/base.py index 98d211be3..29e207b8d 100644 --- a/optimum/neuron/distributed/base.py +++ b/optimum/neuron/distributed/base.py @@ -777,9 +777,6 @@ def should_parallelize_layer_predicate_func(layer): f"Could not find information for the parameter {name} to set its `requires_grad` attribute." ) - # for name, parameter in model.named_parameters(): - # print(f"{name} => {parameter.requires_grad}") - if is_main_worker(): logger.info("Load and initialization of the weights done.") diff --git a/optimum/neuron/distributed/utils.py b/optimum/neuron/distributed/utils.py index 012869d11..b57da920d 100644 --- a/optimum/neuron/distributed/utils.py +++ b/optimum/neuron/distributed/utils.py @@ -155,7 +155,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: parent_module._gqa_qkv_output = gqa_qkv_column_parallel_linear(hidden_states) parent_module._gqa_qkv_output_fetch_counter = 0 parent_module._gqa_qkv_output_fetch_counter += 1 - output = torch.nn.Identity()(parent_module._gqa_qkv_output)[self.output_index] + output = parent_module._gqa_qkv_output[self.output_index] if parent_module._gqa_qkv_output_fetch_counter == 3: del parent_module._gqa_qkv_output return output