Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dependents #71

Merged
merged 5 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ pip install chattool --upgrade

```bash
export OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
export OPENAI_BASE_URL="https://api.example.com"
export OPENAI_API_BASEL="https://api.example.com/v1"
export OPENAI_API_BASE_URL="https://api.example.com" # 可选
```

Win 在系统中设置环境变量。
Expand Down Expand Up @@ -63,6 +64,7 @@ def msg2chat(msg):
chat = Chat()
chat.system("你是一个熟练的数字翻译家。")
chat.user(f"请将该数字翻译为罗马数字:{msg}")
# 注意,在函数内获取返回
chat.getresponse()
return chat

Expand All @@ -77,11 +79,16 @@ continue_chats = process_chats(msgs, msg2chat, checkpoint)
示例3,批量处理数据(异步并行),用不同语言打印 hello,并使用两个协程:

```python
from chattool import async_chat_completion, load_chats
from chattool import async_chat_completion, load_chats, Chat

langs = ["python", "java", "Julia", "C++"]
chatlogs = ["请用语言 %s 打印 hello world" % lang for lang in langs]
async_chat_completion(chatlogs, chkpoint="async_chat.jsonl", nproc=2)
def data2chat(msg):
chat = Chat()
chat.user("请用语言 %s 打印 hello world" % msg)
# 注意,这里不需要 getresponse 而交给异步处理
return chat

async_chat_completion(langs, chkpoint="async_chat.jsonl", nproc=2, data2chat=data2chat)
chats = load_chats("async_chat.jsonl")
```

Expand Down
10 changes: 8 additions & 2 deletions README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def msg2chat(msg):
chat = Chat()
chat.system("You are a helpful translator for numbers.")
chat.user(f"Please translate the digit to Roman numerals: {msg}")
# We need to call `getresponse` here to get the response
chat.getresponse()
return chat

Expand All @@ -77,8 +78,13 @@ Example 3, process data in batch (asynchronous), print hello using different lan
from chattool import async_chat_completion, load_chats

langs = ["python", "java", "Julia", "C++"]
chatlogs = ["print hello using %s" % lang for lang in langs]
async_chat_completion(chatlogs, chkpoint="async_chat.jsonl", nproc=2)
def data2chat(msg):
chat = Chat()
chat.user("Please print hello world using %s" % msg)
# Note that we don't need to call `getresponse` here, and leave it to the asynchronous processing
return chat

async_chat_completion(langs, chkpoint="async_chat.jsonl", nproc=2, data2chat=data2chat)
chats = load_chats("async_chat.jsonl")
```

Expand Down
52 changes: 23 additions & 29 deletions chattool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,35 @@

__author__ = """Rex Wang"""
__email__ = '[email protected]'
__version__ = '3.0.1'
__version__ = '3.1.0'

import os, sys, requests
from .chattool import Chat, Resp
from .checkpoint import load_chats, process_chats
from .proxy import proxy_on, proxy_off, proxy_status
from . import request
from .tokencalc import num_tokens_from_messages, model_cost_perktoken, findcost
from .tokencalc import model_cost_perktoken, findcost
from .asynctool import async_chat_completion
from .functioncall import generate_json_schema, exec_python_code
from typing import Union
import dotenv

def load_envs(env:Union[None, str, dict]=None):
"""Read the environment variables for the API call"""
global api_key, base_url, model
global api_key, base_url, api_base, model
# update the environment variables
if isinstance(env, str):
# load the environment file
dotenv.load_dotenv(env, override=True)
elif isinstance(env, dict):
for key, value in env.items():
os.environ[key] = value
api_key = os.environ.get('OPENAI_API_KEY')
if os.environ.get('OPENAI_API_BASE_URL'):
# adapt to the environment variable of chatgpt-web
base_url = os.environ.get("OPENAI_API_BASE_URL")
else:
base_url = "https://api.openai.com"
# else: load from environment variables
api_key = os.getenv('OPENAI_API_KEY')
base_url = os.getenv('OPENAI_API_BASE_URL') or "https://api.openai.com"
api_base = os.getenv('OPENAI_API_BASE', os.path.join(base_url, 'v1'))
base_url = request.normalize_url(base_url)
if os.environ.get('OPENAI_API_MODEL'):
model = os.environ.get('OPENAI_API_MODEL')
else:
model = "gpt-3.5-turbo"
api_base = request.normalize_url(api_base)
model = os.getenv('OPENAI_API_MODEL', "gpt-3.5-turbo")
return True

def save_envs(env_file:str):
Expand All @@ -43,6 +39,7 @@ def save_envs(env_file:str):
with open(env_file, "w") as f:
f.write(f"OPENAI_API_KEY={api_key}\n")
f.write(f"OPENAI_API_BASE_URL={base_url}\n")
f.write(f"OPENAI_API_BASE={api_base}\n")
f.write(f"OPENAI_API_MODEL={model}\n")
return True

Expand All @@ -58,14 +55,6 @@ def save_envs(env_file:str):
elif platform.startswith("darwin"):
platform = "macos"

def show_apikey():
if api_key is not None:
print(f"API key:\t{api_key}")
return True
else:
print("API key is not set!")
return False

def default_prompt(msg:str):
"""Default prompt message for the API call

Expand All @@ -77,9 +66,13 @@ def default_prompt(msg:str):
"""
return [{"role": "user", "content": msg},]

def show_base_url():
"""Show the base url of the API call"""
print(f"Base url:\t{base_url}")
def get_valid_models(api_key:str=api_key, base_url:str=base_url):
"""Get valid models

Returns:
List[str]: list of valid models
"""
return request.valid_models(api_key, base_url)

def debug_log( net_url:str="https://www.baidu.com"
, timeout:int=5
Expand Down Expand Up @@ -111,14 +104,14 @@ def debug_log( net_url:str="https://www.baidu.com"

## Base url
print("\nCheck your base url:")
show_base_url()
print(base_url)
print("\nCheck the OpenAI Base url:")
print(os.environ.get("OPENAI_API_BASE"))
print(api_base)

## Please check your API key
if test_apikey:
print("\nPlease verify your API key:")
show_apikey()
print(api_key)

# Get model list
if test_model:
Expand All @@ -133,4 +126,5 @@ def debug_log( net_url:str="https://www.baidu.com"
chat.print_log()

print("\nDebug is finished.")
return True
return True

33 changes: 13 additions & 20 deletions chattool/chattool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import List, Dict, Union
import chattool
from .response import Resp
from .tokencalc import num_tokens_from_messages
from .request import chat_completion, valid_models
import time, random, json, warnings
import aiohttp
Expand All @@ -16,6 +15,7 @@ def __init__( self
, api_key:Union[None, str]=None
, chat_url:Union[None, str]=None
, base_url:Union[None, str]=None
, api_base:Union[None, str]=None
, model:Union[None, str]=None
, functions:Union[None, List[Dict]]=None
, function_call:Union[None, str]=None
Expand All @@ -35,20 +35,24 @@ def __init__( self
Raises:
ValueError: msg should be a list of dict, a string or None
"""
# initial message
if msg is None:
self._chat_log = []
elif isinstance(msg, str):
if chattool.default_prompt is None:
self._chat_log = [{"role": "user", "content": msg}]
else:
self._chat_log = chattool.default_prompt(msg)
self._chat_log = chattool.default_prompt(msg)
elif isinstance(msg, list):
assert all(isinstance(m, dict) for m in msg), "msg should be a list of dict"
self._chat_log = msg.copy() # avoid changing the original list
else:
raise ValueError("msg should be a list of dict, a string or None")
self._api_key = api_key or chattool.api_key
self._base_url = base_url or chattool.base_url
self._chat_url = chat_url or self._base_url.rstrip('/') + '/v1/chat/completions'
# try: api_base => base_url => chattool.api_base => chattool.base_url
if api_base is None:
api_base = os.path.join(base_url, 'v1') if base_url is not None else chattool.api_base
base_url = base_url or chattool.base_url
self._base_url = base_url
self._api_base = api_base or os.path.join(base_url, "v1")
self._chat_url = chat_url or self._api_base.rstrip('/') + '/chat/completions'
self._model = model or chattool.model
if functions is not None:
assert isinstance(functions, list), "functions should be a list of dict"
Expand Down Expand Up @@ -191,7 +195,7 @@ def getresponse( self
Resp: API response
"""
# initialize data
api_key, model = self.api_key, self.model
api_key, model, chat_url = self.api_key, self.model, self.chat_url
funcs = options.get('functions', self.functions)
func_call = options.get('function_call', self.function_call)
if api_key is None: warnings.warn("API key is not set!")
Expand All @@ -207,7 +211,7 @@ def getresponse( self
if func_call is not None: options['function_call'] = func_call
response = chat_completion(
api_key=api_key, messages=msg, model=model,
chat_url=self.chat_url, timeout=timeout, **options)
chat_url=chat_url, timeout=timeout, **options)
resp = Resp(response)
assert resp.is_valid(), resp.error_message
break
Expand Down Expand Up @@ -350,17 +354,6 @@ def get_valid_models(self, gpt_only:bool=True)->List[str]:
List[str]: valid models
"""
return valid_models(self.api_key, self.base_url, gpt_only=gpt_only)

def prompt_token(self, model:str="gpt-3.5-turbo-0613"):
"""Get the prompt token for the model

Args:
model (str): model to use

Returns:
str: prompt token
"""
return num_tokens_from_messages(self.chat_log, model=model)

# Part5: properties and setters
@property
Expand Down
42 changes: 0 additions & 42 deletions chattool/tokencalc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import tiktoken

# model cost($ per 1K tokens)
## Refernece: https://openai.com/pricing
## model | input | output
Expand Down Expand Up @@ -42,43 +40,3 @@ def findcost(model:str, prompt_tokens:int, completion_tokens:int=0):
else:
inprice, outprice = model_cost_perktoken['gpt-4']
return (inprice * prompt_tokens + outprice * completion_tokens) / 1000

def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
"""Return the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" in model:
print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4" in model:
print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return num_tokens_from_messages(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
with open('README.md') as readme_file:
readme = readme_file.read()

VERSION = '3.0.1'
VERSION = '3.1.0'

requirements = [
'Click>=7.0', 'requests>=2.20', "responses>=0.23", 'aiohttp>=3.8',
'tqdm>=4.60', 'tiktoken>=0.4.0', 'docstring_parser>=0.10', "python-dotenv>=0.17.0"]
'tqdm>=4.60', 'docstring_parser>=0.10', "python-dotenv>=0.17.0"]
test_requirements = ['pytest>=3', 'unittest']

setup(
Expand Down
8 changes: 1 addition & 7 deletions tests/test_async.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import chattool, time, os
from chattool import Chat, process_chats, num_tokens_from_messages
from chattool import Chat, process_chats
from chattool.asynctool import async_chat_completion
import asyncio, pytest

Expand Down Expand Up @@ -78,9 +78,3 @@ def data2chat(data):
process_chats(chatlogs, data2chat, chkpoint, clearfile=True)
print(f"Time elapsed: {time.time() - t:.2f}s")

def test_tokencounter():
message = [{"role": "user", "content": "hello world!"}]
prompttoken = num_tokens_from_messages(message)
chat = Chat(message)
resp = chat.getresponse()
assert resp.prompt_tokens == prompttoken
20 changes: 6 additions & 14 deletions tests/test_chattool.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,7 @@ def test_with_template():
assert chat.chat_log == [
{"role": "system", "content": "I am a system message"},
{"role": "user", "content": "hello!"}]
chattool.default_prompt = None
chat = Chat("hello!")
assert chat.chat_log == [{"role": "user", "content": "hello!"}]
chattool.default_prompt = lambda msg: [{"role": "user", "content": msg}]

def test_error_message():
resp = Resp(response=err_api_key_resp)
Expand Down Expand Up @@ -236,16 +234,10 @@ def test_show():
assert repr(resp) == "<Resp with finished reason: stop>"

def test_token():
chat = Chat()
chat.user("hello!")
chat.assistant("Hello, how can I assist you today?")
print(f'gpt-3.5-cost: {findcost(chat.model, chat.prompt_token())}')
chat.model = "gpt-3.5-turbo-16k"
print(f'gpt-3.5 16k cost: {findcost(chat.model, chat.prompt_token())}')
chat.model = "gpt-4"
print(f'gpt-4 cost: {findcost(chat.model, chat.prompt_token())}')
chat.model = "gpt-4-32k"
print(f'gpt-4 32k cost: {findcost(chat.model, chat.prompt_token())}')
chat.model = "ft:gpt-3.5-turbo-0613:personal:recipe-ner:819klqSI"
models = ["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k",
"ft:gpt-3.5-turbo-0613:personal:recipe-ner:819klqSI"]
ntokens = 1000
for model in models:
print(f"model: {model}", "ntokens:", ntokens, "cost:", findcost(model, ntokens))
with pytest.raises(AssertionError):
findcost("test-model", 100)
2 changes: 1 addition & 1 deletion tests/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_add_and_mult():
# test multichoice
chat.clear()
chat.user("find the value of 23723 + 12312, and 23723 * 12312")
chat.autoresponse(max_tries=3, timeinterval=2)
# chat.autoresponse(max_tries=3, timeinterval=2)

def test_mock_resp():
chat = Chat("find the sum of 1235 and 3423")
Expand Down
3 changes: 0 additions & 3 deletions tests/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ def test_debug_log():
"""Test the debug log"""
assert debug_log(net_url="https://www.baidu.com") or debug_log(net_url="https://www.google.com")
assert not debug_log(net_url="https://baidu123.com") # invalid url
chattool.api_key = None
chattool.show_apikey()
chattool.api_key = api_key

# normalize base url
def test_is_valid_url():
Expand Down
Loading