Merge pull request kubeagi#877 from nkwangleiGIT/main

Fix conversation chat with doc for 1st time and update ray to 2.9.3(vllm 0.3.3)
nkwangleiGIT · Mar 18, 2024 · d1aed4d · d1aed4d
2 parents 9cfb0e2 + 15a61da
commit d1aed4d
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 12 deletions.
diff --git a/apiserver/service/chat.go b/apiserver/service/chat.go
@@ -41,7 +41,7 @@ import (
 
 const (
 	// Time interval to check if the chat stream should be closed if no more message arrives
-	WaitTimeoutForChatStreaming = 30
+	WaitTimeoutForChatStreaming = 120
 	// default prompt starter
 	PromptLimit = 4
 )
@@ -232,7 +232,7 @@ func (cs *ChatService) ChatFile() gin.HandlerFunc {
 
 		messageID := string(uuid.NewUUID())
 		// Upload the file to specific dst.
-		resp, err := cs.server.ReceiveConversationFile(c, messageID, req, file)
+		resp, err := cs.server.ReceiveConversationFile(c.Request.Context(), messageID, req, file)
 		if err != nil {
 			klog.FromContext(c.Request.Context()).Error(err, "error receive conversational file")
 			c.JSON(http.StatusInternalServerError, chat.ErrorResp{Err: err.Error()})

diff --git a/deploy/llms/Dockerfile.fastchat-worker-vllm b/deploy/llms/Dockerfile.fastchat-worker-vllm
@@ -4,6 +4,7 @@ FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}
 # Official: https://pypi.org/simple 
 ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
 # Install requirements for vllm worker
+# Ray v2.9.3 and vllm v0.3.3
 RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}
 
 # Allow to use environment variable to set ray & python version to pass the version check

diff --git a/deploy/llms/utils.py b/deploy/llms/utils.py
@@ -1,5 +1,7 @@
+# From Ray v2.9.3 and add KubeAGI changes 
 import asyncio
 import binascii
+from collections import defaultdict
 import contextlib
 import errno
 import functools
@@ -17,22 +19,33 @@
 import tempfile
 import threading
 import time
+from urllib.parse import urlencode, unquote, urlparse, parse_qsl, urlunparse
 import warnings
-from collections import defaultdict
 from inspect import signature
 from pathlib import Path
 from subprocess import list2cmdline
-from typing import (TYPE_CHECKING, Any, Coroutine, Dict, List, Mapping,
-                    Optional, Sequence, Tuple, Union)
-from urllib.parse import parse_qsl, unquote, urlencode, urlparse, urlunparse
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    Coroutine,
+    List,
+    Mapping,
+)
 
 # Import psutil after ray so the packaged version is used.
 import psutil
+from google.protobuf import json_format
+
 import ray
 import ray._private.ray_constants as ray_constants
-from google.protobuf import json_format
-from ray.core.generated.runtime_env_common_pb2 import \
-    RuntimeEnvInfo as ProtoRuntimeEnvInfo
+from ray.core.generated.runtime_env_common_pb2 import (
+    RuntimeEnvInfo as ProtoRuntimeEnvInfo,
+)
 
 if TYPE_CHECKING:
     from ray.runtime_env import RuntimeEnv
@@ -269,8 +282,9 @@ def get_visible_accelerator_ids() -> Mapping[str, Optional[List[str]]]:
     to the visible ids."""
 
     from ray._private.accelerators import (
+        get_all_accelerator_resource_names,
         get_accelerator_manager_for_resource,
-        get_all_accelerator_resource_names)
+    )
 
     return {
         accelerator_resource_name: get_accelerator_manager_for_resource(

diff --git a/deploy/ray/Dockerfile b/deploy/ray/Dockerfile
@@ -1,6 +1,6 @@
-# Use ray 2.9.0 built using python 3.9.x
+# Use ray 2.9.3 and vllm 0.3.3
 # Add vllm and tiktoken package
-FROM rayproject/ray-ml:2.9.0-py39
+FROM rayproject/ray-ml:2.9.3-py39
 # Official: https://pypi.org/simple
 ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"