diff --git a/tests/distributed/test_model_parallelization.py b/tests/distributed/test_model_parallelization.py index e0b2d166a..1b12323e8 100644 --- a/tests/distributed/test_model_parallelization.py +++ b/tests/distributed/test_model_parallelization.py @@ -172,6 +172,13 @@ def _generate_supported_model_classes( MODEL_CLASSES_TO_IGNORE = [ "BertForPreTraining", # There is a compilation issue, and testing TP for BertForPretraining is not really important. + # TODO + # GPTNeo's attention mechanism is broken in transformers==4.36.2, this should be re-enabled once there is a release + # containing this PR: https://github.com/huggingface/transformers/pull/28533 + "GPTNeoForSequenceClassification", + "GPTNeoForTokenClassification", + "GPTNeoForQuestionAnswering", + "GPTNeoForCausalLM", ]