Merge branch 'master' into tokenization

mudler · Oct 1, 2024 · 8d6f623 · 8d6f623
2 parents 79d2324 + 88b99d3
commit 8d6f623
Show file tree

Hide file tree

Showing 25 changed files with 575 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -85,6 +85,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 
 Hot topics (looking for contributors):
 
+- Realtime API https://github.com/mudler/LocalAI/issues/3714
 - 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126

diff --git a/backend/backend.proto b/backend/backend.proto
@@ -26,6 +26,19 @@ service Backend {
   rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
 
   rpc Rerank(RerankRequest) returns (RerankResult) {}
+
+  rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
+}
+
+// Define the empty request
+message MetricsRequest {}
+
+message MetricsResponse {
+  int32 slot_id = 1;
+  string prompt_json_for_slot = 2;  // Stores the prompt as a JSON string.
+  float tokens_per_second = 3;
+  int32 tokens_generated = 4;
+  int32 prompt_tokens_processed = 5;
 }
 
 message RerankRequest {

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -495,6 +495,16 @@ struct llama_server_context
         }
     }
 
+    llama_client_slot* get_active_slot() {
+        for (llama_client_slot& slot : slots) {
+            // Check if the slot is currently processing
+            if (slot.is_processing()) {
+                return &slot;  // Return the active slot
+            }
+        }
+        return nullptr;  // No active slot found
+    }
+
     void initialize() {
         // create slots
         all_slots_are_idle = true;
@@ -2420,6 +2430,31 @@ class BackendServiceImpl final : public backend::Backend::Service {
 
         return grpc::Status::OK;
     }
+
+    grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
+        llama_client_slot* active_slot = llama.get_active_slot();
+
+        if (active_slot != nullptr) {
+            // Calculate the tokens per second using existing logic
+            double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
+
+            // Populate the response with metrics
+            response->set_slot_id(active_slot->id);
+            response->set_prompt_json_for_slot(active_slot->prompt.dump());
+            response->set_tokens_per_second(tokens_per_second);
+            response->set_tokens_generated(active_slot->n_decoded);
+            response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
+        } else {
+            // Handle case when no active slot exists
+            response->set_slot_id(0);
+            response->set_prompt_json_for_slot("");
+            response->set_tokens_per_second(0);
+            response->set_tokens_generated(0);
+            response->set_prompt_tokens_processed(0);
+        }
+
+        return grpc::Status::OK;
+    } 
 };
 
 void RunServer(const std::string& server_address) {

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
@@ -18,6 +18,6 @@ python-dotenv
 pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio==4.38.1
+gradio==4.44.1
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
diff --git a/core/backend/token_metrics.go b/core/backend/token_metrics.go
@@ -0,0 +1,44 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
+)
+
+func TokenMetrics(
+	backend,
+	modelFile string,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
+	bb := backend
+	if bb == "" {
+		return nil, fmt.Errorf("backend is required")
+	}
+
+	grpcOpts := GRPCModelOpts(backendConfig)
+
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(bb),
+		model.WithModel(modelFile),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
+	})
+	model, err := loader.BackendLoader(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	if model == nil {
+		return nil, fmt.Errorf("could not loadmodel model")
+	}
+
+	res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
+
+	return res, err
+}
diff --git a/core/cli/util.go b/core/cli/util.go
@@ -15,8 +15,9 @@ import (
 )
 
 type UtilCMD struct {
-	GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
-	HFScan   HFScanCMD   `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
+	GGUFInfo         GGUFInfoCMD         `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
+	HFScan           HFScanCMD           `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
+	UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
 }
 
 type GGUFInfoCMD struct {
@@ -30,6 +31,11 @@ type HFScanCMD struct {
 	ToScan     []string `arg:""`
 }
 
+type UsecaseHeuristicCMD struct {
+	ConfigName string `name:"The config file to check"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+}
+
 func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
 	if u.Args == nil || len(u.Args) == 0 {
 		return fmt.Errorf("no GGUF file provided")
@@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		return nil
 	}
 }
+
+func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
+	if len(uhcmd.ConfigName) == 0 {
+		log.Error().Msg("ConfigName is a required parameter")
+		return fmt.Errorf("config name is a required parameter")
+	}
+	if len(uhcmd.ModelsPath) == 0 {
+		log.Error().Msg("ModelsPath is a required parameter")
+		return fmt.Errorf("model path is a required parameter")
+	}
+	bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
+	err := bcl.LoadBackendConfig(uhcmd.ConfigName)
+	if err != nil {
+		log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
+		return err
+	}
+	bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
+	if !exists {
+		log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
+	}
+	for name, uc := range config.GetAllBackendConfigUsecases() {
+		if bc.HasUsecases(uc) {
+			log.Info().Str("Usecase", name)
+		}
+	}
+	log.Info().Msg("---")
+	return nil
+}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
@@ -3,11 +3,13 @@ package config
 import (
 	"os"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"gopkg.in/yaml.v3"
 )
 
 const (
@@ -27,13 +29,15 @@ type BackendConfig struct {
 	schema.PredictionOptions `yaml:"parameters"`
 	Name                     string `yaml:"name"`
 
-	F16            *bool             `yaml:"f16"`
-	Threads        *int              `yaml:"threads"`
-	Debug          *bool             `yaml:"debug"`
-	Roles          map[string]string `yaml:"roles"`
-	Embeddings     *bool             `yaml:"embeddings"`
-	Backend        string            `yaml:"backend"`
-	TemplateConfig TemplateConfig    `yaml:"template"`
+	F16                 *bool                  `yaml:"f16"`
+	Threads             *int                   `yaml:"threads"`
+	Debug               *bool                  `yaml:"debug"`
+	Roles               map[string]string      `yaml:"roles"`
+	Embeddings          *bool                  `yaml:"embeddings"`
+	Backend             string                 `yaml:"backend"`
+	TemplateConfig      TemplateConfig         `yaml:"template"`
+	KnownUsecaseStrings []string               `yaml:"known_usecases"`
+	KnownUsecases       *BackendConfigUsecases `yaml:"-"`
 
 	PromptStrings, InputStrings                []string               `yaml:"-"`
 	InputToken                                 [][]int                `yaml:"-"`
@@ -194,6 +198,17 @@ type TemplateConfig struct {
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
 }
 
+func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
+	type BCAlias BackendConfig
+	var aux BCAlias
+	if err := value.Decode(&aux); err != nil {
+		return err
+	}
+	*c = BackendConfig(aux)
+	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
+	return nil
+}
+
 func (c *BackendConfig) SetFunctionCallString(s string) {
 	c.functionCallString = s
 }
@@ -410,3 +425,121 @@ func (c *BackendConfig) Validate() bool {
 func (c *BackendConfig) HasTemplate() bool {
 	return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
 }
+
+type BackendConfigUsecases int
+
+const (
+	FLAG_ANY              BackendConfigUsecases = 0b000000000
+	FLAG_CHAT             BackendConfigUsecases = 0b000000001
+	FLAG_COMPLETION       BackendConfigUsecases = 0b000000010
+	FLAG_EDIT             BackendConfigUsecases = 0b000000100
+	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b000001000
+	FLAG_RERANK           BackendConfigUsecases = 0b000010000
+	FLAG_IMAGE            BackendConfigUsecases = 0b000100000
+	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b001000000
+	FLAG_TTS              BackendConfigUsecases = 0b010000000
+	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
+
+	// Common Subsets
+	FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
+)
+
+func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
+	return map[string]BackendConfigUsecases{
+		"FLAG_ANY":              FLAG_ANY,
+		"FLAG_CHAT":             FLAG_CHAT,
+		"FLAG_COMPLETION":       FLAG_COMPLETION,
+		"FLAG_EDIT":             FLAG_EDIT,
+		"FLAG_EMBEDDINGS":       FLAG_EMBEDDINGS,
+		"FLAG_RERANK":           FLAG_RERANK,
+		"FLAG_IMAGE":            FLAG_IMAGE,
+		"FLAG_TRANSCRIPT":       FLAG_TRANSCRIPT,
+		"FLAG_TTS":              FLAG_TTS,
+		"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
+		"FLAG_LLM":              FLAG_LLM,
+	}
+}
+
+func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
+	if len(input) == 0 {
+		return nil
+	}
+	result := FLAG_ANY
+	flags := GetAllBackendConfigUsecases()
+	for _, str := range input {
+		flag, exists := flags["FLAG_"+strings.ToUpper(str)]
+		if exists {
+			result |= flag
+		}
+	}
+	return &result
+}
+
+// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
+func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
+	if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
+		return true
+	}
+	return c.GuessUsecases(u)
+}
+
+// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
+// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
+// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
+func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
+	if (u & FLAG_CHAT) == FLAG_CHAT {
+		if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
+			return false
+		}
+	}
+	if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
+		if c.TemplateConfig.Completion == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EDIT) == FLAG_EDIT {
+		if c.TemplateConfig.Edit == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
+		if c.Embeddings == nil || !*c.Embeddings {
+			return false
+		}
+	}
+	if (u & FLAG_IMAGE) == FLAG_IMAGE {
+		imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
+		if !slices.Contains(imageBackends, c.Backend) {
+			return false
+		}
+
+		if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
+			return false
+		}
+
+	}
+	if (u & FLAG_RERANK) == FLAG_RERANK {
+		if c.Backend != "rerankers" {
+			return false
+		}
+	}
+	if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
+		if c.Backend != "whisper" {
+			return false
+		}
+	}
+	if (u & FLAG_TTS) == FLAG_TTS {
+		ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
+		if !slices.Contains(ttsBackends, c.Backend) {
+			return false
+		}
+	}
+
+	if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
+		if c.Backend != "transformers-musicgen" {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/core/config/backend_config_filter.go b/core/config/backend_config_filter.go
@@ -0,0 +1,35 @@
+package config
+
+import "regexp"
+
+type BackendConfigFilterFn func(string, *BackendConfig) bool
+
+func NoFilterFn(_ string, _ *BackendConfig) bool { return true }
+
+func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
+	if filter == "" {
+		return NoFilterFn, nil
+	}
+	rxp, err := regexp.Compile(filter)
+	if err != nil {
+		return nil, err
+	}
+	return func(name string, config *BackendConfig) bool {
+		if config != nil {
+			return rxp.MatchString(config.Name)
+		}
+		return rxp.MatchString(name)
+	}, nil
+}
+
+func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
+	if usecases == FLAG_ANY {
+		return NoFilterFn
+	}
+	return func(name string, config *BackendConfig) bool {
+		if config == nil {
+			return false // TODO: Potentially make this a param, for now, no known usecase to include
+		}
+		return config.HasUsecases(usecases)
+	}
+}