Skip to content

Commit

Permalink
feat: track internally started models by ID (#3693)
Browse files Browse the repository at this point in the history
* chore(refactor): track internally started models by ID

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Just extend options, no need to copy

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Improve debugging for rerankers failures

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Simplify model loading with rerankers

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Be more consistent when generating model options

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Uncommitted code

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Make deleteProcess more idiomatic

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Adapt CLI for sound generation

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Fixup threads definition

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Handle corner case where c.Seed is nil

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Consistently use ModelOptions

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Adapt new code to refactoring

Signed-off-by: Ettore Di Giacinto <[email protected]>

---------

Signed-off-by: Ettore Di Giacinto <[email protected]>
Co-authored-by: Dave <[email protected]>
  • Loading branch information
mudler and dave-gray101 authored Oct 2, 2024
1 parent db70419 commit 0965c6c
Show file tree
Hide file tree
Showing 20 changed files with 169 additions and 185 deletions.
11 changes: 1 addition & 10 deletions core/backend/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,11 @@ import (
)

func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model

grpcOpts := GRPCModelOpts(backendConfig)

var inferenceModel interface{}
var err error

opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{})

if backendConfig.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
Expand Down
15 changes: 2 additions & 13 deletions core/backend/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,8 @@ import (
)

func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
threads := backendConfig.Threads
if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads
}
gRPCOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithThreads(uint32(*threads)),
model.WithContext(appConfig.Context),
model.WithModel(backendConfig.Model),
model.WithLoadGRPCLoadModelOpts(gRPCOpts),
})

opts := ModelOptions(backendConfig, appConfig, []model.Option{})

inferenceModel, err := loader.BackendLoader(
opts...,
Expand Down
13 changes: 1 addition & 12 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,11 @@ type TokenUsage struct {

func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
threads := c.Threads
if *threads == 0 && o.Threads != 0 {
threads = &o.Threads
}
grpcOpts := GRPCModelOpts(c)

var inferenceModel grpc.Backend
var err error

opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
model.WithAssetDir(o.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(o.Context),
})
opts := ModelOptions(c, o, []model.Option{})

if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
Expand Down
101 changes: 85 additions & 16 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,128 @@ import (
"github.com/rs/zerolog/log"
)

func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
name := c.Name
if name == "" {
name = c.Model
}

defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}

threads := 1

if c.Threads != nil {
threads = *c.Threads
}

if so.Threads != 0 {
threads = so.Threads
}

c.Threads = &threads

grpcOpts := grpcModelOpts(c)
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))

if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
defOpts = append(defOpts, model.WithSingleActiveBackend())
}

if so.ParallelBackendRequests {
opts = append(opts, model.EnableParallelRequests)
defOpts = append(defOpts, model.EnableParallelRequests)
}

if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
}

if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}

for k, v := range so.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
defOpts = append(defOpts, model.WithExternalBackend(k, v))
}

return opts
return append(defOpts, opts...)
}

func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
var seed int32 = config.RAND_SEED

if c.Seed != nil {
seed = int32(*c.Seed)
}

if seed == config.RAND_SEED {
seed = rand.Int31()
}

return seed
}

func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}

f16 := false
if c.F16 != nil {
f16 = *c.F16
}

embeddings := false
if c.Embeddings != nil {
embeddings = *c.Embeddings
}

lowVRAM := false
if c.LowVRAM != nil {
lowVRAM = *c.LowVRAM
}

mmap := false
if c.MMap != nil {
mmap = *c.MMap
}

ctxSize := 1024
if c.ContextSize != nil {
ctxSize = *c.ContextSize
}

mmlock := false
if c.MMlock != nil {
mmlock = *c.MMlock
}

nGPULayers := 9999999
if c.NGPULayers != nil {
nGPULayers = *c.NGPULayers
}

return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
F16Memory: *c.F16,
F16Memory: f16,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(*c.ContextSize),
ContextSize: int32(ctxSize),
Seed: getSeed(c),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
Expand All @@ -85,16 +154,16 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
YarnBetaSlow: c.YarnBetaSlow,
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
MLock: *c.MMlock,
MLock: mmlock,
RopeFreqBase: c.RopeFreqBase,
RopeScaling: c.RopeScaling,
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: *c.Embeddings,
LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap,
Embeddings: embeddings,
LowVRAM: lowVRAM,
NGPULayers: int32(nGPULayers),
MMap: mmap,
MainGPU: c.MainGPU,
Threads: int32(*c.Threads),
TensorSplit: c.TensorSplit,
Expand Down
16 changes: 2 additions & 14 deletions core/backend/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,9 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)

func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}

grpcOpts := GRPCModelOpts(backendConfig)
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
rerankModel, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
Expand Down
13 changes: 1 addition & 12 deletions core/backend/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
)

func SoundGeneration(
backend string,
modelFile string,
text string,
duration *float32,
Expand All @@ -25,18 +24,8 @@ func SoundGeneration(
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
if backend == "" {
return "", nil, fmt.Errorf("backend is a required parameter")
}

grpcOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})

soundGenModel, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
13 changes: 1 addition & 12 deletions core/backend/token_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,13 @@ import (
)

func TokenMetrics(
backend,
modelFile string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}

grpcOpts := GRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
model, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
12 changes: 5 additions & 7 deletions core/backend/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ import (

func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {

opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModel(backendConfig.Model),
model.WithContext(appConfig.Context),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
})
if backendConfig.Backend == "" {
backendConfig.Backend = model.WhisperBackend
}

opts := ModelOptions(backendConfig, appConfig, []model.Option{})

transcriptionModel, err := ml.BackendLoader(opts...)
if err != nil {
Expand Down
7 changes: 1 addition & 6 deletions core/backend/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,9 @@ func ModelTTS(
bb = model.PiperBackend
}

grpcOpts := GRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
ttsModel, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
3 changes: 2 additions & 1 deletion core/cli/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {

options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend

var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}

filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
filePath, _, err := backend.SoundGeneration(t.Model, text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

Expand Down
2 changes: 1 addition & 1 deletion core/http/endpoints/elevenlabs/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
}

// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions core/http/endpoints/jina/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)

if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}

log.Debug().Msgf("Request for model: %s", modelFile)

if input.Backend != "" {
Expand All @@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
Documents: req.Documents,
}

results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 0965c6c

Please sign in to comment.