Merge branch 'master' into rf-service-wiring-1c

mudler · Jun 19, 2024 · 563a416 · 563a416
2 parents 6f5d46e + 8142bdc
commit 563a416
Show file tree

Hide file tree

Showing 13 changed files with 140 additions and 18 deletions.
diff --git a/.github/ci/modelslist.go b/.github/ci/modelslist.go
@@ -114,12 +114,12 @@ var modelPageTemplate string = `
 
 	<h2 class="text-center text-3xl font-semibold text-gray-100">
 
-	 🖼️ Available {{.AvailableModels}} models</i> repositories     <a href="https://localai.io/models/" target="_blank" >
+	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
 			<i class="fas fa-circle-info pr-2"></i>
 		</a></h2> 
 
 	<h3>	  
-	Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.
+	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
 
 	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
 	</h3>

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -102,8 +102,9 @@ jobs:
           export PATH=/usr/local/cuda/bin:$PATH
           sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
           sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
+          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
           GO_TAGS=p2p \
-          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0" \
+          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
           GOOS=linux \
           GOARCH=arm64 \
           CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
@@ -212,8 +213,9 @@ jobs:
           export PATH=/usr/local/cuda/bin:$PATH
           export PATH=/opt/rocm/bin:$PATH
           source /opt/intel/oneapi/setvars.sh
+          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
           GO_TAGS=p2p \
-          BACKEND_LIBS="/usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
+          BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
           make -j4 dist
       - uses: actions/upload-artifact@v4
         with:

diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0c7b3595b9e5ad2355818e259f06b0dc3f0065b3
+CPPLLAMA_VERSION?=37bef8943312d91183ff06d8f1214082a17344a5
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -315,7 +315,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
-	cp -r $(BACKEND_LIBS) backend-assets/lib/
+	cp $(BACKEND_LIBS) backend-assets/lib/
 endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@
 
 import grpc
 
-from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
@@ -225,6 +225,17 @@ def LoadModel(self, request, context):
                         torch_dtype=torchType, 
                         use_safetensors=True, 
                         variant=variant)
+            elif request.PipelineType == "StableDiffusion3Pipeline":
+                if fromSingleFile:
+                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
+                                                               torch_dtype=torchType,
+                                                               use_safetensors=True)
+                else:
+                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
+                        request.Model, 
+                        torch_dtype=torchType, 
+                        use_safetensors=True, 
+                        variant=variant)
 
             if CLIPSKIP and request.CLIPSkip != 0:
                 self.clip_skip = request.CLIPSkip

diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
@@ -5,6 +5,7 @@ grpcio==1.64.0
 opencv-python
 pillow
 protobuf
+sentencepiece
 torch
 transformers
-certifi
+certifi
diff --git a/core/cli/models.go b/core/cli/models.go
@@ -57,6 +57,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
+
 	for _, modelName := range mi.ModelArgs {
 
 		progressBar := progressbar.NewOptions(

diff --git a/core/http/views/talk.html b/core/http/views/talk.html
@@ -50,6 +50,10 @@
       </div>
       <div id="loader" class="my-2 loader" style="display: none;"></div>
       <div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
+      <!-- Note for recording box -->
+      <div class="text-sm mb-4 text-white-500">
+        <strong>Note:</strong> You need an LLM a audio-transcription(whisper) and a tts model installed in order for this to work. Select the appropariate model from the toolbox and then click the 'Talk' button to start recording. The recording will continue until you click 'Stop recording'. Make sure your microphone is set up and enabled.
+      </div>
       <div class="mb-4" >
         <label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
         <select id="modelSelect"
@@ -95,7 +99,7 @@
         class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
       ><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
       <a id="resetButton"
-      class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
+      class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-gray-200"
       href="#"
       >Reset conversation</a>
       <audio id="audioPlayback" controls hidden></audio>

diff --git a/docs/data/version.json b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.16.0"
+  "version": "v2.17.0"
 }
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -55,6 +55,20 @@
     - filename: dolphin-2.9.2-qwen2-7b-Q4_K_M.gguf
       sha256: a15b5db4df6be4f4bfb3632b2009147332ef4c57875527f246b4718cb0d3af1f
       uri: huggingface://cognitivecomputations/dolphin-2.9.2-qwen2-7b-gguf/dolphin-2.9.2-qwen2-7b-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "samantha-qwen-2-7B"
+  description: |
+    Samantha based on qwen2
+  urls:
+    - https://huggingface.co/bartowski/Samantha-Qwen-2-7B-GGUF
+    - https://huggingface.co/macadeliccc/Samantha-Qwen2-7B
+  overrides:
+    parameters:
+      model: Samantha-Qwen-2-7B-Q4_K_M.gguf
+  files:
+    - filename: Samantha-Qwen-2-7B-Q4_K_M.gguf
+      sha256: 5d1cf1c35a7a46c536a96ba0417d08b9f9e09c24a4e25976f72ad55d4904f6fe
+      uri: huggingface://bartowski/Samantha-Qwen-2-7B-GGUF/Samantha-Qwen-2-7B-Q4_K_M.gguf
 ## START Mistral
 - &mistral03
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -224,6 +238,22 @@
     - filename: firefly-gemma-7b-Q4_K_S-imatrix.gguf
       sha256: 622e0b8e4f12203cc40c7f87915abf99498c2e0582203415ca236ea37643e428
       uri: huggingface://Lewdiculous/firefly-gemma-7b-GGUF-IQ-Imatrix/firefly-gemma-7b-Q4_K_S-imatrix.gguf
+- !!merge <<: *gemma
+  name: "gemma-1.1-7b-it"
+  urls:
+    - https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF
+    - https://huggingface.co/google/gemma-1.1-7b-it
+  description: |
+      This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
+
+      Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
+  overrides:
+    parameters:
+      model: gemma-1.1-7b-it-Q4_K_M.gguf
+  files:
+    - filename: gemma-1.1-7b-it-Q4_K_M.gguf
+      sha256: 47821da72ee9e80b6fd43c6190ad751b485fb61fa5664590f7a73246bcd8332e
+      uri: huggingface://bartowski/gemma-1.1-7b-it-GGUF/gemma-1.1-7b-it-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -2550,6 +2580,21 @@
     - filename: DreamShaper_8_pruned.safetensors
       uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
       sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
+- name: stable-diffusion-3-medium
+  icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
+  license: other
+  description: |
+    Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
+  urls:
+    - https://huggingface.co/stabilityai/stable-diffusion-3-medium
+    - https://huggingface.co/leo009/stable-diffusion-3-medium
+  tags:
+    - text-to-image
+    - stablediffusion
+    - python
+    - sd-3
+    - gpu
+  url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
 - &whisper
   ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"

diff --git a/gallery/stablediffusion3.yaml b/gallery/stablediffusion3.yaml
@@ -0,0 +1,14 @@
+---
+name: "stable-diffusion-3-medium"
+
+config_file: |
+  backend: diffusers
+  diffusers:
+    cuda: true
+    enable_parameters: negative_prompt,num_inference_steps
+    pipeline_type: StableDiffusion3Pipeline
+  f16: false
+  name: sd3
+  parameters:
+    model: v2ray/stable-diffusion-3-medium-diffusers
+  step: 25
diff --git a/pkg/library/dynaload.go b/pkg/library/dynaload.go
@@ -5,11 +5,20 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+
+	"github.com/rs/zerolog/log"
 )
 
+/*
+	This file contains functions to load libraries from the asset directory to keep the business logic clean.
+*/
+
+// skipLibraryPath checks if LOCALAI_SKIP_LIBRARY_PATH is set
+var skipLibraryPath = os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != ""
+
+// LoadExtractedLibs loads the extracted libraries from the asset dir
 func LoadExtractedLibs(dir string) {
-	// Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
-	if os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != "" {
+	if skipLibraryPath {
 		return
 	}
 
@@ -18,9 +27,38 @@ func LoadExtractedLibs(dir string) {
 	}
 }
 
+// LoadLDSO checks if there is a ld.so in the asset dir and if so, prefixes the grpc process with it.
+// In linux, if we find a ld.so in the asset dir we prefix it to run with the libs exposed in
+// LD_LIBRARY_PATH for more compatibility
+// If we don't do this, we might run into stack smash
+// See also: https://stackoverflow.com/questions/847179/multiple-glibc-libraries-on-a-single-host/851229#851229
+// In this case, we expect a ld.so in the lib asset dir.
+// If that's present, we use it to run the grpc backends as supposedly built against
+// that specific version of ld.so
+func LoadLDSO(assetDir string, args []string, grpcProcess string) ([]string, string) {
+	if skipLibraryPath {
+		return args, grpcProcess
+	}
+
+	if runtime.GOOS != "linux" {
+		return args, grpcProcess
+	}
+
+	// Check if there is a ld.so file in the assetDir, if it does, we need to run the grpc process with it
+	ldPath := filepath.Join(assetDir, "backend-assets", "lib", "ld.so")
+	if _, err := os.Stat(ldPath); err == nil {
+		log.Debug().Msgf("ld.so found")
+		// We need to run the grpc process with the ld.so
+		args = append(args, grpcProcess)
+		grpcProcess = ldPath
+	}
+
+	return args, grpcProcess
+}
+
+// LoadExternal sets the LD_LIBRARY_PATH to include the given directory
 func LoadExternal(dir string) {
-	// Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
-	if os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != "" {
+	if skipLibraryPath {
 		return
 	}
 

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	"github.com/go-skynet/LocalAI/pkg/library"
 	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
 	"github.com/klauspost/cpuid/v2"
 	"github.com/phayes/freeport"
@@ -326,8 +327,13 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 				return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
 			}
 
-			// Make sure the process is executable
-			if err := ml.startProcess(grpcProcess, o.model, serverAddress); err != nil {
+			args := []string{}
+
+			// Load the ld.so if it exists
+			args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
+
+			// Make sure the process is executable in any circumstance
+			if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
 				return "", err
 			}
 

diff --git a/pkg/model/process.go b/pkg/model/process.go
@@ -69,7 +69,7 @@ func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
 	return strconv.Atoi(p.PID)
 }
 
-func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error {
+func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
 	// Make sure the process is executable
 	if err := os.Chmod(grpcProcess, 0700); err != nil {
 		return err
@@ -82,7 +82,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 	grpcControlProcess := process.New(
 		process.WithTemporaryStateDir(),
 		process.WithName(grpcProcess),
-		process.WithArgs("--addr", serverAddress),
+		process.WithArgs(append(args, []string{"--addr", serverAddress}...)...),
 		process.WithEnvironment(os.Environ()...),
 	)