Skip to content

Fix GQA permutation computation and sequential weight initialization / loading when doing TP #797

Fix GQA permutation computation and sequential weight initialization / loading when doing TP

Fix GQA permutation computation and sequential weight initialization / loading when doing TP #797

name: Optimum Neuron - Common tests on Trainium
on:
push:
branches: [ main ]
paths:
- "setup.py"
- "optimum/**.py"
pull_request:
branches: [ main ]
paths:
- "setup.py"
- "optimum/**.py"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
optimum-neuron-tests:
runs-on: [self-hosted, 1-aws-trn, 8-cpu, ci] # run the job on the newly created runner
env:
AWS_REGION: us-east-1
TESTS_TO_IGNORE_FLAGS: --ignore tests/distributed/ --ignore tests/test_examples.py
steps:
- name: Check AMI
run: dpkg -l | grep neuron
- name: Checkout
uses: actions/checkout@v2
- name: Setup PATH
run: echo "/home/ubuntu/.local/bin" >> $GITHUB_PATH
- name: Set pip repository pointing to the Neuron repository
run: pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
- name: Update pip
run: pip install -U pip
- name: Install Python dependencies
run: pip install .[tests,neuronx]
- name: Run tests on Neuron cores
run: |
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} USE_VENV="false" pytest -m "is_trainium_test" $TESTS_TO_IGNORE_FLAGS tests
- name: Run staging tests on Neuron cores
run: HUGGINGFACE_CO_STAGING=1 pytest -m "is_trainium_test and is_staging_test" $TESTS_TO_IGNORE_FLAGS tests -s