diff --git a/requirements/runtime.txt b/requirements/runtime.txt index f531754b3..05b38c1b7 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -19,5 +19,8 @@ tiktoken torch<=2.1.2 torchvision<=0.16.2 # Minimum 4.36.0 to support `Cache` data structure used by KV Cache -transformers>=4.36.0 +# Registering a causal mask in `LlamaModel` is not friendly for very large +# `max_position_embeddings`. Refer to +# https://github.com/huggingface/transformers/blob/v4.38.0/src/transformers/models/llama/modeling_llama.py#L921-L923 +transformers>=4.36.0,!=4.38.0,!=4.38.1,!=4.38.2 transformers_stream_generator