Skip to content

Commit

Permalink
Fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbenayoun committed Mar 20, 2024
1 parent b3d10e1 commit c47e7a2
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 6 deletions.
2 changes: 2 additions & 0 deletions optimum/neuron/accelerate/accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,8 @@ def prepare_model(

# We do not want to use the cache here as it would imply more communication that we do not need.
model.config.use_cache = False
model.config.output_attentions = False
model.config.output_hidden_states = False

if self.distributed_type is NeuronDistributedType.XLA_FSDP:
return self.prepare_model_for_xla_fsdp(
Expand Down
8 changes: 2 additions & 6 deletions optimum/neuron/distributed/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Type, Union

import torch
from transformers import PreTrainedModel
from transformers import PreTrainedModel, PretrainedConfig
from transformers.utils import WEIGHTS_NAME

from ...utils import logging
Expand Down Expand Up @@ -588,11 +588,6 @@ def should_parallelize_layer_predicate_func(layer):
names = {parameter_to_name[p] for p in layer.parameters()}
return names < names_of_the_parameters_to_consider

# It solves some compilation issues.
# Investigate if using the cache becomes needed.
# Note: it is mandatory to set it to False when using pipeline parallelism.
model.config.use_cache = False

if tp_size > 1:
# TODO: remove that once it is solved on the `neuronx_distributed` side.
try:
Expand Down Expand Up @@ -688,6 +683,7 @@ def should_parallelize_layer_predicate_func(layer):
if not cls.supports_pipeline_parallelism():
raise NotImplementedError("{cls} does not support pipeline parallelism.")

model.config.use_cache = False
model.config.return_dict = False
model.config.output_attentions = False
model.config.output_hidden_states = False
Expand Down
3 changes: 3 additions & 0 deletions tests/distributed/test_model_parallelization.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ def _parallel_model_matches_original_model(
)
orig_model = NeuronAccelerator.patch_model_for_neuron(orig_model)

# TODO: enable that again once it's working, seems to be an AWS issue.
orig_model.config.use_cache = False

set_neuron_cc_optlevel_for_model(orig_model)

move_model_to_device(orig_model, xm.xla_device())
Expand Down
2 changes: 2 additions & 0 deletions tests/test_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import json
import logging
import os
import pytest
import random
from dataclasses import FrozenInstanceError
from pathlib import Path
Expand Down Expand Up @@ -483,6 +484,7 @@ def test_neuron_hash_is_private(self):

@is_trainium_test
@is_staging_test
@pytest.skip("This is not needed anymore and will be removed.")
class CachedModelOnTheHubTestCase(StagingTestMixin, TestCase):
def test_push_to_hub_fails_with_private_model_and_public_repo(self):
with TemporaryDirectory() as tmpdirname:
Expand Down

0 comments on commit c47e7a2

Please sign in to comment.