Merge branch 'master' into gw-list-model-filter-upgrade

mudler · Jul 15, 2024 · ca462c8 · ca462c8
2 parents 46b651b + 6564e7e
commit ca462c8
Show file tree

Hide file tree

Showing 15 changed files with 1,189 additions and 94 deletions.
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
@@ -27,9 +27,6 @@ jobs:
           - repository: "go-skynet/bloomz.cpp"
             variable: "BLOOMZ_VERSION"
             branch: "main"
-          - repository: "nomic-ai/gpt4all"
-            variable: "GPT4ALL_VERSION"
-            branch: "main"
           - repository: "mudler/go-ggllm.cpp"
             variable: "GOGGLLM_VERSION"
             branch: "master"

diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=17eb6aa8a992cda37ee65cf848d9289bd6cad860
+CPPLLAMA_VERSION?=aaab2419eaa17ab3aa38f4ba49c7eea406999e99
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -384,7 +384,7 @@ endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 
 build-minimal:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
 
 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
@@ -4,13 +4,13 @@ torch
 optimum[openvino]
 grpcio==1.64.1
 protobuf
-librosa==0.10.2.post1
+librosa==0.9.1
 faster-whisper==1.0.3
 pydub==0.25.1
 wavmark==0.0.3
-numpy==2.0.0
+numpy==1.26.4
 eng_to_ipa==0.0.2
-inflect==7.3.1
+inflect==7.0.0
 unidecode==1.3.7
 whisper-timestamped==1.15.4
 openai

diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go
@@ -12,6 +12,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
+// @Summary Reranks a list of phrases by relevance to a given text query.
+// @Param request body schema.JINARerankRequest true "query params"
+// @Success 200 {object} schema.JINARerankResponse "Response"
+// @Router /v1/rerank [post]
 func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		req := new(schema.JINARerankRequest)

diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
 )
@@ -49,6 +50,11 @@ func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx
 	}
 }
 
+// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
+// @Summary Install models to LocalAI.
+// @Param request body GalleryModel true "query params"
+// @Success 200 {object} schema.GalleryResponse "Response"
+// @Router /models/apply [post]
 func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(GalleryModel)
@@ -68,13 +74,15 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 			Galleries:        mgs.galleries,
 			ConfigURL:        input.ConfigURL,
 		}
-		return c.JSON(struct {
-			ID        string `json:"uuid"`
-			StatusURL string `json:"status"`
-		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 
+// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
+// @Summary delete models to LocalAI.
+// @Param name	path string	true	"Model name"
+// @Success 200 {object} schema.GalleryResponse "Response"
+// @Router /models/delete/{name} [post]
 func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		modelName := c.Params("name")
@@ -89,13 +97,14 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 			return err
 		}
 
-		return c.JSON(struct {
-			ID        string `json:"uuid"`
-			StatusURL string `json:"status"`
-		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 
+// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
+// @Summary List installable models.
+// @Success 200 {object} []gallery.GalleryModel "Response"
+// @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
@@ -116,6 +125,10 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
 	}
 }
 
+// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
+// @Summary List all Galleries
+// @Success 200 {object} []config.Gallery "Response"
+// @Router /models/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
 func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
@@ -128,6 +141,11 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
 	}
 }
 
+// AddModelGalleryEndpoint adds a gallery in LocalAI
+// @Summary Adds a gallery in LocalAI
+// @Param request body config.Gallery true "Gallery details"
+// @Success 200 {object} []config.Gallery "Response"
+// @Router /models/galleries [post]
 func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(config.Gallery)
@@ -150,6 +168,11 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
 	}
 }
 
+// RemoveModelGalleryEndpoint remove a gallery in LocalAI
+// @Summary removes a gallery from LocalAI
+// @Param request body config.Gallery true "Gallery details"
+// @Success 200 {object} []config.Gallery "Response"
+// @Router /models/galleries [delete]
 func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(config.Gallery)
@@ -165,6 +188,10 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
 		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		})
-		return c.Send(nil)
+		dat, err := json.Marshal(mgs.galleries)
+		if err != nil {
+			return err
+		}
+		return c.Send(dat)
 	}
 }
diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go
@@ -0,0 +1,28 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
+)
+
+// ShowP2PNodes returns the P2P Nodes
+// @Summary Returns available P2P nodes
+// @Success 200 {object} []schema.P2PNodesResponse "Response"
+// @Router /api/p2p [get]
+func ShowP2PNodes(c *fiber.Ctx) error {
+	// Render index
+	return c.JSON(schema.P2PNodesResponse{
+		Nodes:          p2p.GetAvailableNodes(""),
+		FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
+	})
+}
+
+// ShowP2PToken returns the P2P token
+// @Summary Show the P2P token
+// @Success 200 {string} string	 "Response"
+// @Router /api/p2p/token [get]
+func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
@@ -59,16 +59,8 @@ func RegisterLocalAIRoutes(app *fiber.App,
 
 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
-			// Render index
-			return c.JSON(map[string]interface{}{
-				"Nodes":          p2p.GetAvailableNodes(""),
-				"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
-			})
-		})
-		app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
-			return c.Send([]byte(appConfig.P2PToken))
-		})
+		app.Get("/api/p2p", auth, localai.ShowP2PNodes)
+		app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
 	}
 
 	app.Get("/version", auth, func(c *fiber.Ctx) error {

diff --git a/core/schema/localai.go b/core/schema/localai.go
@@ -1,6 +1,7 @@
 package schema
 
 import (
+	"github.com/mudler/LocalAI/core/p2p"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )
 
@@ -14,6 +15,11 @@ type BackendMonitorResponse struct {
 	CPUPercent    float64
 }
 
+type GalleryResponse struct {
+	ID        string `json:"uuid"`
+	StatusURL string `json:"status"`
+}
+
 // @Description TTS request body
 type TTSRequest struct {
 	Model    string `json:"model" yaml:"model"` // model name or full path
@@ -59,3 +65,8 @@ type StoresFindResponse struct {
 	Values       []string    `json:"values" yaml:"values"`
 	Similarities []float32   `json:"similarities" yaml:"similarities"`
 }
+
+type P2PNodesResponse struct {
+	Nodes          []p2p.NodeData `json:"nodes" yaml:"nodes"`
+	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
+}
diff --git a/docs/content/docs/faq.md b/docs/content/docs/faq.md
@@ -16,6 +16,10 @@ Here are answers to some of the most common questions.
 
 Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
 
+### Benchmarking LocalAI and llama.cpp shows different results!
+
+LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
+
 ### What's the difference with Serge, or XXX?
 
 LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.