-
Notifications
You must be signed in to change notification settings - Fork 21
135 lines (130 loc) · 6.38 KB
/
main.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: CI
on: [push, pull_request]
env:
OPENUCX_LINK: https://github.com/openucx/ucx.git
UCC_LINK: https://github.com/openucx/ucc.git
PARAM_LINK: https://github.com/facebookresearch/param
jobs:
torch-ucc-tests:
runs-on: ubuntu-latest
container:
image: pytorch/pytorch:latest
steps:
- name: Checkout Action
uses: actions/checkout@v1
- name: Build TorchUCC
uses: ./.github/actions/build
with:
ucx: ${OPENUCX_LINK}
ucc: ${UCC_LINK}
- name: Tests
run: |
export LD_LIBRARY_PATH=/opt/ucx/lib:/opt/ucc/lib:$LD_LIBRARY_PATH
/opt/ucx/bin/ucx_info -e -u t
export UCX_LOG_LEVEL=info
export TORCH_UCC_ENABLE_HEALTH_CHECK=1
export TORCH_SHOW_CPP_STACKTRACES=1
for np in `seq 4`
do
echo "Test comm size $np"
export TORCH_UCC_TEST_SIZE=$np
echo "UCC barrier"
/bin/bash ./test/start_test.sh ./test/torch_barrier_test.py --backend=gloo
echo "UCC alltoall"
/bin/bash ./test/start_test.sh ./test/torch_alltoall_test.py --backend=gloo
echo "UCC alltoallv"
/bin/bash ./test/start_test.sh ./test/torch_alltoallv_test.py --backend=gloo
echo "UCC allgather"
/bin/bash ./test/start_test.sh ./test/torch_allgather_test.py --backend=gloo
echo "UCC allreduce"
/bin/bash ./test/start_test.sh ./test/torch_allreduce_test.py --backend=gloo
echo "UCC broadcast"
/bin/bash ./test/start_test.sh ./test/torch_bcast_test.py --backend=gloo
echo "UCC reduce"
/bin/bash ./test/start_test.sh ./test/torch_reduce_test.py --backend=gloo
# FIXME: disabled as UCC does not support gather on CPU tensor yet
# echo "UCC gather"
# /bin/bash ./test/start_test.sh ./test/torch_gather_test.py --backend=gloo
done
echo "UCC basic functionality test"
/bin/bash ./test/start_test.sh ./test/torch_work_test.py --backend=gloo
echo "UCC pt2pt"
/bin/bash ./test/start_test.sh ./test/torch_pt2pt_test.py --backend=gloo
echo "UCC timeout test"
/bin/bash ./test/start_test.sh ./test/torch_timeout_test.py --backend=gloo
echo "UCC multiple comms test"
TORCH_UCC_SHARED_COMM=0 UCX_TLS=tcp /bin/bash ./test/start_test.sh ./test/torch_multiple_comms_test.py
echo "UCC multiple comms test shared comm"
TORCH_UCC_SHARED_COMM=1 /bin/bash ./test/start_test.sh ./test/torch_multiple_comms_test.py
pytorch-unit-tests:
runs-on: ubuntu-latest
container:
image: pytorch/pytorch:latest
steps:
- name: Checkout Action
uses: actions/checkout@v1
- name: Build TorchUCC
uses: ./.github/actions/build
with:
ucx: ${OPENUCX_LINK}
ucc: ${UCC_LINK}
- name: PyTorch Unit Tests
run: |
export LD_LIBRARY_PATH=/opt/ucx/lib:/opt/ucc/lib:$LD_LIBRARY_PATH
/opt/ucx/bin/ucx_info -e -u t
export UCX_LOG_LEVEL=info
export TORCH_UCC_ENABLE_HEALTH_CHECK=1
export TORCH_SHOW_CPP_STACKTRACES=1
pip3 install expecttest hypothesis xmlrunner unittest-xml-reporting
cd test
for np in `seq 4`
do
export BACKEND='ucc'
export WORLD_SIZE=$np
python torch_tests.py --subprocess
done
param-comm-tests:
runs-on: ubuntu-latest
container:
image: pytorch/pytorch:latest
steps:
- name: Checkout Action
uses: actions/checkout@v1
- name: Build TorchUCC
uses: ./.github/actions/build
with:
ucx: ${OPENUCX_LINK}
ucc: ${UCC_LINK}
- name: Test PARAM
run: |
git clone ${PARAM_LINK} /tmp/param
export LD_LIBRARY_PATH=/opt/ucx/lib:/opt/ucc/lib:$LD_LIBRARY_PATH
export TORCH_UCC_TEST_SIZE=4
echo "PARAM-Comms Reduce w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective reduce
echo "PARAM-Comms Allreduce w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective all_reduce
echo "PARAM-Comms Alltoall w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective all_to_all
echo "PARAM-Comms Alltoallv w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective all_to_allv
echo "PARAM-Comms Broadcast w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective broadcast
echo "PARAM-Comms Allgather w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective all_gather
echo "PARAM-Comms Allgather_base w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective all_gather_base
# FIXME: disabled as UCC does not support gather on CPU tensor yet
# echo "PARAM-Comms Gather w/ UCC"
# /bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --collective gather
echo "PARAM-Comms Quantized Allreduce w/ UCC (use of c10d future)"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --bitwidth 16 --collective all_reduce
echo "PARAM-Comms Non-blocking Allreduce w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --z 0 --collective all_reduce
echo "PARAM-Comms Non-blocking Alltoall w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --z 0 --collective all_to_all
echo "PARAM-Comms Non-blocking Alltoallv w/ UCC"
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --z 0 --collective all_to_allv
echo "PARAM-Comms Pt2pt w/ UCC"
export TORCH_UCC_TEST_SIZE=2
/bin/bash ./test/start_test.sh /tmp/param/train/comms/pt/comms.py --backend ucc --device cpu --b 4 --e 4M --c 1 --pt2pt one2one --src-ranks 0 --dst-ranks 1