From 50a3b54e3474fd552b352222a90f70c8ab624ceb Mon Sep 17 00:00:00 2001 From: siddimore Date: Sat, 28 Sep 2024 08:23:56 -0700 Subject: [PATCH] feat(api): add correlationID to Track Chat requests (#3668) * Add CorrelationID to chat request Signed-off-by: Siddharth More * remove get_token_metrics Signed-off-by: Siddharth More * Add CorrelationID to proto Signed-off-by: Siddharth More * fix correlation method name Signed-off-by: Siddharth More * Update core/http/endpoints/openai/chat.go Co-authored-by: Ettore Di Giacinto Signed-off-by: Siddharth More * Update core/http/endpoints/openai/chat.go Signed-off-by: Ettore Di Giacinto Signed-off-by: Siddharth More --------- Signed-off-by: Siddharth More Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- backend/backend.proto | 1 + backend/cpp/llama/grpc-server.cpp | 14 ++++++++++++++ core/http/endpoints/openai/chat.go | 7 +++++++ core/http/endpoints/openai/completion.go | 2 ++ core/http/endpoints/openai/request.go | 13 ++++++++++++- 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/backend/backend.proto b/backend/backend.proto index 31bd63e5086..b2d4518e133 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -136,6 +136,7 @@ message PredictOptions { repeated Message Messages = 44; repeated string Videos = 45; repeated string Audios = 46; + string CorrelationId = 47; } // The response message containing the result diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 56d59d217a7..791612dbcc9 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["ignore_eos"] = predict->ignoreeos(); data["embeddings"] = predict->embeddings(); + // Add the correlationid to json data + data["correlation_id"] = predict->correlationid(); + // for each image in the request, add the image data // for (int i = 0; i < predict->images_size(); i++) { @@ -2344,6 +2347,11 @@ class BackendServiceImpl final : public backend::Backend::Service { int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); reply.set_prompt_tokens(tokens_evaluated); + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + // Send the reply writer->Write(reply); @@ -2367,6 +2375,12 @@ class BackendServiceImpl final : public backend::Backend::Service { std::string completion_text; task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { + + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + completion_text = result.result_json.value("content", ""); int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index b937120a333..1ac1387eed3 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup textContentToReturn = "" id = uuid.New().String() created = int(time.Now().Unix()) + // Set CorrelationID + correlationID := c.Get("X-Correlation-ID") + if len(strings.TrimSpace(correlationID)) == 0 { + correlationID = id + } + c.Set("X-Correlation-ID", correlationID) modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { @@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + c.Set("X-Correlation-ID", id) responses := make(chan schema.OpenAIResponse) diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index b087cc5f8d3..e5de1b3f029 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } return func(c *fiber.Ctx) error { + // Add Correlation + c.Set("X-Correlation-ID", id) modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index e24dd28f2e4..d6182a391fe 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" fiberContext "github.com/mudler/LocalAI/core/http/ctx" "github.com/mudler/LocalAI/core/schema" @@ -15,6 +16,11 @@ import ( "github.com/rs/zerolog/log" ) +type correlationIDKeyType string + +// CorrelationIDKey to track request across process boundary +const CorrelationIDKey correlationIDKeyType = "correlationID" + func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) @@ -24,9 +30,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo } received, _ := json.Marshal(input) + // Extract or generate the correlation ID + correlationID := c.Get("X-Correlation-ID", uuid.New().String()) ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx + // Add the correlation ID to the new context + ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID) + + input.Context = ctxWithCorrelationID input.Cancel = cancel log.Debug().Msgf("Request received: %s", string(received))