Skip to content

Commit

Permalink
Merge branch 'master' into tokenization
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler authored Oct 1, 2024
2 parents 79d2324 + 88b99d3 commit 8d6f623
Show file tree
Hide file tree
Showing 25 changed files with 575 additions and 88 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

Hot topics (looking for contributors):

- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
Expand Down
13 changes: 13 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ service Backend {
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}

rpc Rerank(RerankRequest) returns (RerankResult) {}

rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
}

// Define the empty request
message MetricsRequest {}

message MetricsResponse {
int32 slot_id = 1;
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
float tokens_per_second = 3;
int32 tokens_generated = 4;
int32 prompt_tokens_processed = 5;
}

message RerankRequest {
Expand Down
35 changes: 35 additions & 0 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,16 @@ struct llama_server_context
}
}

llama_client_slot* get_active_slot() {
for (llama_client_slot& slot : slots) {
// Check if the slot is currently processing
if (slot.is_processing()) {
return &slot; // Return the active slot
}
}
return nullptr; // No active slot found
}

void initialize() {
// create slots
all_slots_are_idle = true;
Expand Down Expand Up @@ -2420,6 +2430,31 @@ class BackendServiceImpl final : public backend::Backend::Service {

return grpc::Status::OK;
}

grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();

if (active_slot != nullptr) {
// Calculate the tokens per second using existing logic
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;

// Populate the response with metrics
response->set_slot_id(active_slot->id);
response->set_prompt_json_for_slot(active_slot->prompt.dump());
response->set_tokens_per_second(tokens_per_second);
response->set_tokens_generated(active_slot->n_decoded);
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
} else {
// Handle case when no active slot exists
response->set_slot_id(0);
response->set_prompt_json_for_slot("");
response->set_tokens_per_second(0);
response->set_tokens_generated(0);
response->set_prompt_tokens_processed(0);
}

return grpc::Status::OK;
}
};

void RunServer(const std::string& server_address) {
Expand Down
2 changes: 1 addition & 1 deletion backend/python/openvoice/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ python-dotenv
pypinyin==0.50.0
cn2an==0.5.22
jieba==0.42.1
gradio==4.38.1
gradio==4.44.1
langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git
44 changes: 44 additions & 0 deletions core/backend/token_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package backend

import (
"context"
"fmt"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
)

func TokenMetrics(
backend,
modelFile string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}

grpcOpts := GRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
model, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
}

if model == nil {
return nil, fmt.Errorf("could not loadmodel model")
}

res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})

return res, err
}
38 changes: 36 additions & 2 deletions core/cli/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ import (
)

type UtilCMD struct {
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
}

type GGUFInfoCMD struct {
Expand All @@ -30,6 +31,11 @@ type HFScanCMD struct {
ToScan []string `arg:""`
}

type UsecaseHeuristicCMD struct {
ConfigName string `name:"The config file to check"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}

func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
if u.Args == nil || len(u.Args) == 0 {
return fmt.Errorf("no GGUF file provided")
Expand Down Expand Up @@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
return nil
}
}

func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
if len(uhcmd.ConfigName) == 0 {
log.Error().Msg("ConfigName is a required parameter")
return fmt.Errorf("config name is a required parameter")
}
if len(uhcmd.ModelsPath) == 0 {
log.Error().Msg("ModelsPath is a required parameter")
return fmt.Errorf("model path is a required parameter")
}
bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
err := bcl.LoadBackendConfig(uhcmd.ConfigName)
if err != nil {
log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
return err
}
bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
if !exists {
log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
}
for name, uc := range config.GetAllBackendConfigUsecases() {
if bc.HasUsecases(uc) {
log.Info().Str("Usecase", name)
}
}
log.Info().Msg("---")
return nil
}
147 changes: 140 additions & 7 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package config
import (
"os"
"regexp"
"slices"
"strings"

"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"gopkg.in/yaml.v3"
)

const (
Expand All @@ -27,13 +29,15 @@ type BackendConfig struct {
schema.PredictionOptions `yaml:"parameters"`
Name string `yaml:"name"`

F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases"`
KnownUsecases *BackendConfigUsecases `yaml:"-"`

PromptStrings, InputStrings []string `yaml:"-"`
InputToken [][]int `yaml:"-"`
Expand Down Expand Up @@ -194,6 +198,17 @@ type TemplateConfig struct {
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
}

func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
type BCAlias BackendConfig
var aux BCAlias
if err := value.Decode(&aux); err != nil {
return err
}
*c = BackendConfig(aux)
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
return nil
}

func (c *BackendConfig) SetFunctionCallString(s string) {
c.functionCallString = s
}
Expand Down Expand Up @@ -410,3 +425,121 @@ func (c *BackendConfig) Validate() bool {
func (c *BackendConfig) HasTemplate() bool {
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
}

type BackendConfigUsecases int

const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000

// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
)

func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
return map[string]BackendConfigUsecases{
"FLAG_ANY": FLAG_ANY,
"FLAG_CHAT": FLAG_CHAT,
"FLAG_COMPLETION": FLAG_COMPLETION,
"FLAG_EDIT": FLAG_EDIT,
"FLAG_EMBEDDINGS": FLAG_EMBEDDINGS,
"FLAG_RERANK": FLAG_RERANK,
"FLAG_IMAGE": FLAG_IMAGE,
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_LLM": FLAG_LLM,
}
}

func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
if len(input) == 0 {
return nil
}
result := FLAG_ANY
flags := GetAllBackendConfigUsecases()
for _, str := range input {
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
if exists {
result |= flag
}
}
return &result
}

// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
return true
}
return c.GuessUsecases(u)
}

// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
if (u & FLAG_CHAT) == FLAG_CHAT {
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
return false
}
}
if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
if c.TemplateConfig.Completion == "" {
return false
}
}
if (u & FLAG_EDIT) == FLAG_EDIT {
if c.TemplateConfig.Edit == "" {
return false
}
}
if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
if c.Embeddings == nil || !*c.Embeddings {
return false
}
}
if (u & FLAG_IMAGE) == FLAG_IMAGE {
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
if !slices.Contains(imageBackends, c.Backend) {
return false
}

if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
return false
}

}
if (u & FLAG_RERANK) == FLAG_RERANK {
if c.Backend != "rerankers" {
return false
}
}
if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
if c.Backend != "whisper" {
return false
}
}
if (u & FLAG_TTS) == FLAG_TTS {
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
if !slices.Contains(ttsBackends, c.Backend) {
return false
}
}

if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
if c.Backend != "transformers-musicgen" {
return false
}
}

return true
}
35 changes: 35 additions & 0 deletions core/config/backend_config_filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package config

import "regexp"

type BackendConfigFilterFn func(string, *BackendConfig) bool

func NoFilterFn(_ string, _ *BackendConfig) bool { return true }

func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
if filter == "" {
return NoFilterFn, nil
}
rxp, err := regexp.Compile(filter)
if err != nil {
return nil, err
}
return func(name string, config *BackendConfig) bool {
if config != nil {
return rxp.MatchString(config.Name)
}
return rxp.MatchString(name)
}, nil
}

func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
if usecases == FLAG_ANY {
return NoFilterFn
}
return func(name string, config *BackendConfig) bool {
if config == nil {
return false // TODO: Potentially make this a param, for now, no known usecase to include
}
return config.HasUsecases(usecases)
}
}
Loading

0 comments on commit 8d6f623

Please sign in to comment.