Skip to content

Commit

Permalink
Merge pull request kubeagi#877 from nkwangleiGIT/main
Browse files Browse the repository at this point in the history
Fix conversation chat with doc for 1st time and update ray to 2.9.3(vllm 0.3.3)
  • Loading branch information
nkwangleiGIT authored Mar 18, 2024
2 parents 9cfb0e2 + 15a61da commit d1aed4d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 12 deletions.
4 changes: 2 additions & 2 deletions apiserver/service/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import (

const (
// Time interval to check if the chat stream should be closed if no more message arrives
WaitTimeoutForChatStreaming = 30
WaitTimeoutForChatStreaming = 120
// default prompt starter
PromptLimit = 4
)
Expand Down Expand Up @@ -232,7 +232,7 @@ func (cs *ChatService) ChatFile() gin.HandlerFunc {

messageID := string(uuid.NewUUID())
// Upload the file to specific dst.
resp, err := cs.server.ReceiveConversationFile(c, messageID, req, file)
resp, err := cs.server.ReceiveConversationFile(c.Request.Context(), messageID, req, file)
if err != nil {
klog.FromContext(c.Request.Context()).Error(err, "error receive conversational file")
c.JSON(http.StatusInternalServerError, chat.ErrorResp{Err: err.Error()})
Expand Down
1 change: 1 addition & 0 deletions deploy/llms/Dockerfile.fastchat-worker-vllm
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}
# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
# Install requirements for vllm worker
# Ray v2.9.3 and vllm v0.3.3
RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}

# Allow to use environment variable to set ray & python version to pass the version check
Expand Down
30 changes: 22 additions & 8 deletions deploy/llms/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# From Ray v2.9.3 and add KubeAGI changes
import asyncio
import binascii
from collections import defaultdict
import contextlib
import errno
import functools
Expand All @@ -17,22 +19,33 @@
import tempfile
import threading
import time
from urllib.parse import urlencode, unquote, urlparse, parse_qsl, urlunparse
import warnings
from collections import defaultdict
from inspect import signature
from pathlib import Path
from subprocess import list2cmdline
from typing import (TYPE_CHECKING, Any, Coroutine, Dict, List, Mapping,
Optional, Sequence, Tuple, Union)
from urllib.parse import parse_qsl, unquote, urlencode, urlparse, urlunparse
from typing import (
TYPE_CHECKING,
Any,
Dict,
Optional,
Sequence,
Tuple,
Union,
Coroutine,
List,
Mapping,
)

# Import psutil after ray so the packaged version is used.
import psutil
from google.protobuf import json_format

import ray
import ray._private.ray_constants as ray_constants
from google.protobuf import json_format
from ray.core.generated.runtime_env_common_pb2 import \
RuntimeEnvInfo as ProtoRuntimeEnvInfo
from ray.core.generated.runtime_env_common_pb2 import (
RuntimeEnvInfo as ProtoRuntimeEnvInfo,
)

if TYPE_CHECKING:
from ray.runtime_env import RuntimeEnv
Expand Down Expand Up @@ -269,8 +282,9 @@ def get_visible_accelerator_ids() -> Mapping[str, Optional[List[str]]]:
to the visible ids."""

from ray._private.accelerators import (
get_all_accelerator_resource_names,
get_accelerator_manager_for_resource,
get_all_accelerator_resource_names)
)

return {
accelerator_resource_name: get_accelerator_manager_for_resource(
Expand Down
4 changes: 2 additions & 2 deletions deploy/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Use ray 2.9.0 built using python 3.9.x
# Use ray 2.9.3 and vllm 0.3.3
# Add vllm and tiktoken package
FROM rayproject/ray-ml:2.9.0-py39
FROM rayproject/ray-ml:2.9.3-py39
# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"

Expand Down

0 comments on commit d1aed4d

Please sign in to comment.