Skip to content

Commit

Permalink
Merge branch 'master' into fix_ci
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored Oct 26, 2023
2 parents 1d51659 + c62504a commit 3b6e4a6
Show file tree
Hide file tree
Showing 17 changed files with 95 additions and 308 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ jobs:
if [ ! -e /run/systemd/system ]; then
sudo mkdir /run/systemd/system
fi
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
make \
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
BUILD_TYPE=cublas \
Expand All @@ -57,4 +59,5 @@ jobs:
make \
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
teardown-e2e || true
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
docker system prune -f -a --volumes || true
42 changes: 3 additions & 39 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

CPPLLAMA_VERSION?=465219b9143ac01db0990bbcb0a081ef72ec2008
CPPLLAMA_VERSION?=9d02956443e5c1ded29b7b5ed8a21bc01ba6f563

# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
Expand All @@ -30,15 +30,9 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
# go-piper version
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7

# go-bloomz version
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f

# stablediffusion version
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632

# Go-ggllm
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b

export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
Expand Down Expand Up @@ -129,7 +123,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_GRPC+=backend-assets/grpc/piper
endif

ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)

# If empty, then we build all
Expand All @@ -146,14 +140,6 @@ gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1

## go-ggllm
go-ggllm:
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1

go-ggllm/libggllm.a: go-ggllm
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a

## go-piper
go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
Expand All @@ -180,14 +166,6 @@ go-rwkv:
go-rwkv/librwkv.a: go-rwkv
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..

## bloomz
bloomz:
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1

bloomz/libbloomz.a: bloomz
cd bloomz && make libbloomz.a

go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a

Expand Down Expand Up @@ -241,7 +219,7 @@ go-llama-stable/libbinding.a: go-llama-stable
go-piper/libpiper_binding.a: go-piper
$(MAKE) -C go-piper libpiper_binding.a example/main

get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
touch $@

replace:
Expand All @@ -250,10 +228,8 @@ replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm

prepare-sources: get-sources replace
$(GOCMD) mod download
Expand All @@ -269,9 +245,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) -C go-piper clean
$(MAKE) -C go-ggllm clean
$(MAKE) build

prepare: prepare-sources $(OPTIONAL_TARGETS)
Expand All @@ -289,10 +263,8 @@ clean: ## Remove build related file
rm -rf ./backend-assets
rm -rf ./go-rwkv
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf ./go-piper
rm -rf ./go-ggllm
rm -rf $(BINARY_NAME)
rm -rf release/
$(MAKE) -C backend/cpp/llama clean
Expand Down Expand Up @@ -418,10 +390,6 @@ protogen-python:
backend-assets/grpc:
mkdir -p backend-assets/grpc

backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/

backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
Expand Down Expand Up @@ -486,10 +454,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/

backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/

backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
Expand Down
8 changes: 6 additions & 2 deletions backend/cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ set(TARGET grpc-server)
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
link_directories("/opt/homebrew/lib")
include_directories("/opt/homebrew/include")
endif()

find_package(absl CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)
Expand All @@ -15,8 +20,7 @@ find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${Protobuf_INCLUDE_DIRS})

message(STATUS "Using protobuf ${Protobuf_VERSION} ${Protobuf_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}")

message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")

# Proto file
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
Expand Down
16 changes: 8 additions & 8 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,11 @@ struct llama_server_context
if (suff_rm_leading_spc && suffix_tokens[0] == space_token) {
suffix_tokens.erase(suffix_tokens.begin());
}
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx));
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx));
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
prefix_tokens.push_back(llama_token_middle(ctx));
prefix_tokens.push_back(llama_token_middle(model));

auto prompt_tokens = prefix_tokens;

Expand Down Expand Up @@ -419,7 +419,7 @@ struct llama_server_context
if (params.n_predict == 0)
{
has_next_token = false;
result.tok = llama_token_eos(ctx);
result.tok = llama_token_eos(model);
return result;
}

Expand Down Expand Up @@ -453,7 +453,7 @@ struct llama_server_context
// decrement remaining sampling budget
--n_remain;

if (!embd.empty() && embd.back() == llama_token_eos(ctx))
if (!embd.empty() && embd.back() == llama_token_eos(model))
{
// stopping_word = llama_token_to_piece(ctx, embd.back());
has_next_token = false;
Expand Down Expand Up @@ -594,7 +594,7 @@ static void parse_options_completion(bool streaming,const backend::PredictOption

if (predict->ignoreeos())
{
llama.params.sparams.logit_bias[llama_token_eos(llama.ctx)] = -INFINITY;
llama.params.sparams.logit_bias[llama_token_eos(llama.model)] = -INFINITY;
}

// const auto &logit_bias = body.find("logit_bias");
Expand Down Expand Up @@ -676,7 +676,7 @@ static void params_parse(const backend::ModelOptions* request,
}

static bool is_at_eob(llama_server_context &server_context, const llama_token *tokens, const size_t n_tokens) {
return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.ctx);
return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.model);
}

// Function matching type llama_beam_search_callback_fn_t.
Expand Down
23 changes: 0 additions & 23 deletions cmd/grpc/bloomz/main.go

This file was deleted.

25 changes: 0 additions & 25 deletions cmd/grpc/falcon/main.go

This file was deleted.

42 changes: 42 additions & 0 deletions examples/configurations/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
## Advanced configuration

This section contains examples on how to install models manually with config files.

### Prerequisites

First clone LocalAI:

```bash
git clone https://github.com/go-skynet/LocalAI

cd LocalAI
```

Setup the model you prefer from the examples below and then start LocalAI:

```bash
docker compose up -d --pull always
```

If LocalAI is already started, you can restart it with

```bash
docker compose restart
```

See also the getting started: https://localai.io/basics/getting_started/

### Mistral

To setup mistral copy the files inside `mistral` in the `models` folder:

```bash
cp -r examples/configurations/mistral/* models/
```

Now download the model:

```bash
wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf
```

3 changes: 3 additions & 0 deletions examples/configurations/mistral/chatml-block.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{.Input}}
<|im_start|>assistant

3 changes: 3 additions & 0 deletions examples/configurations/mistral/chatml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
1 change: 1 addition & 0 deletions examples/configurations/mistral/completion.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{.Input}}
16 changes: 16 additions & 0 deletions examples/configurations/mistral/mistral.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: mistral
mmap: true
parameters:
model: mistral-7b-openorca.Q6_K.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat_message: chatml
chat: chatml-block
completion: completion
context_size: 4096
f16: true
stopwords:
- <|im_end|>
threads: 4
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ require (
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530
github.com/onsi/ginkgo/v2 v2.13.0
github.com/onsi/gomega v1.28.0
github.com/onsi/gomega v1.28.1
github.com/otiai10/openaigo v1.6.0
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.17.0
Expand Down Expand Up @@ -89,7 +89,7 @@ require (
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/klauspost/compress v1.16.7 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
Expand Down Expand Up @@ -153,6 +155,8 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y
github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c=
github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8=
github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA=
github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8=
Expand Down
Loading

0 comments on commit 3b6e4a6

Please sign in to comment.