From bf913c285b3ef149070572a8e509d0ac7b874ceb Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Sep 2024 19:04:53 +0200 Subject: [PATCH] udpate setup --- .github/workflows/test_onnxruntime.yml | 1 - optimum/bettertransformer/models/attention.py | 7 +++---- optimum/onnxruntime/modeling_decoder.py | 1 + setup.py | 3 ++- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index c4157797e7..291a3b0833 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -42,7 +42,6 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install git+https://github.com/huggingface/transformers pip install .[tests,onnxruntime] - name: Test with pytest (in series) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 53e6a676e6..63f0275464 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -207,9 +207,10 @@ def codegen_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - if not check_if_transformers_greater("4.44.99"): - causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to( + torch.bool + ) causal_mask = torch.where(causal_mask, 0, mask_value) @@ -219,7 +220,6 @@ def codegen_wrapped_scaled_dot_product( # we use torch.min to avoid having tensor(-inf) attention_mask = torch.min(causal_mask, attention_mask) else: - attention_mask = attention_mask[:, :, :, : key.shape[-2]] sdpa_result = torch.nn.functional.scaled_dot_product_attention( @@ -229,7 +229,6 @@ def codegen_wrapped_scaled_dot_product( return sdpa_result, None - # Adapted from transformers.models.opt.modeling_opt.OPTAttention.forward def opt_forward( self, diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 378fdbb6db..bda3ec98d9 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -724,6 +724,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): super()._save_pretrained(save_directory) self.generation_config.save_pretrained(save_directory) + class ORTGPTBigCodeForCausalLM(ORTModelForCausalLM): # Adapted from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM.prepare_inputs_for_generation def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): diff --git a/setup.py b/setup.py index 231dc9110e..c961cf973c 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ REQUIRED_PKGS = [ "coloredlogs", "sympy", - "transformers[sentencepiece]>=4.29,<4.46.0", + "transformers @ git+https://github.com/huggingface/transformers.git", + # "transformers[sentencepiece]>=4.29,<4.46.0", "torch>=1.11", "packaging", "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569