-
Notifications
You must be signed in to change notification settings - Fork 7
/
docker-entrypoint.sh
executable file
·86 lines (71 loc) · 3.36 KB
/
docker-entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/sh
set -eu
# model target, sha256 hash, and url
MODEL_LIST=$(cat << EOF
llama-2-13b ef36e090240040f97325758c1ad8e23f3801466a8eece3a9eac2d22d942f548a https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf
mistral-7b b85cdd596ddd76f3194047b9108a73c74d77ba04bef49255a50fc0cfbda83d32 https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf
solar-10b 4ade240f5dcc253272158f3659a56f5b1da8405510707476d23a7df943aa35f7 https://huggingface.co/TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF/resolve/main/solar-10.7b-instruct-v1.0.Q5_K_M.gguf
starling-7b c67b033bff47e7b8574491c6c296c094e819488d146aca1c6326c10257450b99 https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF/resolve/main/Starling-LM-7B-beta-Q5_K_M.gguf
command-r 1a59aeb034b64e430d25bc9f2b29d9f2cc658af38670fae36226585603da8ecc https://huggingface.co/bartowski/c4ai-command-r-v01-GGUF/resolve/main/c4ai-command-r-v01-Q5_K_M.gguf
llama-3-8b 14e10feba0c82a55da198dcd69d137206ad22d116a809926d27fa5f2398c69c7 https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf
phi-3-mini 597a483b0e56360cb488d3f8a5ec0fd2c3a3eb44da7bb69020b79ba7c1f6ce85 https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q6_K.gguf
phi-3-medium 5e9d850d6c899e7fdf39a19cdf6fecae225e0c5bb3d13d6f277cbda508a15f0c https://huggingface.co/bartowski/Phi-3-medium-4k-instruct-GGUF/resolve/main/Phi-3-medium-4k-instruct-Q5_K_M.gguf
gemma-2-9b a4b0b55ce809a09baaefb789b0046ac77ecd502aba8aeb2ed63cc237d9f40ce7 https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q5_K_M.gguf
gemma-2-27b fbefa7ddf24b32dee231c40e0bdd55f9a3ef0e64c8559b0cb48b66cce66fe671 https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q5_K_M.gguf
EOF
)
usage() {
MODEL_NAMES=$(echo "$MODEL_LIST" | awk '{print " " $1}')
cat << EOF
Usage: $0 [MODEL]...
Run llama-server or download a model and exit
If no MODEL is provided, run llama-server with default settings.
Available models to download:
$MODEL_NAMES
EOF
}
parse_args_download_model() {
if [ "$#" -ge 1 ]; then
MODEL_LINE=$(echo "$MODEL_LIST" | grep "$1" || true)
if [ "$1" = "--help" ] || [ -z "$MODEL_LINE" ]; then
usage
exit 1
fi
MODEL_SHA256=$(echo "$MODEL_LINE" | awk '{print $2}')
MODEL_URL=$(echo "$MODEL_LINE" | awk '{print $3}')
MODEL_NAME=$(basename "$MODEL_URL")
curl -LO "$MODEL_URL"
echo "$MODEL_SHA256 $MODEL_NAME" | sha256sum -c -
exit 0
fi
}
set_default_env_vars() {
if [ -z ${LLAMA_HOST+x} ]; then
export LLAMA_HOST="0.0.0.0"
fi
if [ -z ${LLAMA_MODEL+x} ]; then
export LLAMA_MODEL="/models/llama-2-13b-chat.Q5_K_M.gguf"
fi
}
convert_llama_env_vars() {
LLAMA_ARGS=$(env | grep LLAMA_ | awk '{
# for each environment variable
for (n = 1; n <= NF; n++) {
# replace LLAMA_ prefix with --
sub("^LLAMA_", "--", $n)
# find first = and split into argument name and value
eq = index($n, "=")
s1 = tolower(substr($n, 1, eq - 1))
s2 = substr($n, eq + 1)
# replace _ with - in argument name
gsub("_", "-", s1)
# print argument name and value
print s1 " " s2
}
}')
}
parse_args_download_model "$@"
set_default_env_vars
convert_llama_env_vars
set -x
llama-server $LLAMA_ARGS