Skip to content

Commit

Permalink
[Feat] minor update agent related (#839)
Browse files Browse the repository at this point in the history
* [Feat] update cibench

* [Feat] Support CIBench

* [Feat] Support CIBench

* [Feat] Support CIBench

* [Feat] Support CIBench
  • Loading branch information
yingfhu authored Jan 26, 2024
1 parent 77be07d commit 4aa7456
Show file tree
Hide file tree
Showing 6 changed files with 286 additions and 11 deletions.
96 changes: 96 additions & 0 deletions configs/eval_chat_cibench_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from mmengine.config import read_base

from opencompass.lagent.actions.ipython_interpreter import IPythonInterpreter
from opencompass.lagent.agents.react import CIReAct, ReActProtocol
from opencompass.models.lagent import CodeAgent
from opencompass.models.openai_api import OpenAI
from opencompass.partitioners import SizePartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLInferTask

with read_base():
from .datasets.CIBench.CIBench_template_gen_e6b12a import \
cibench_datasets as datasets

FORCE_STOP_PROMPT_EN = """You should directly give results based on history information."""

FEWSHOT_INSTRUCTION = """\
You are an assistant who can utilize external tools.
{tool_description}
To use a tool, please response with the following format:
```
{thought} Think what you need to solve, do you need to use tools?
{action} The tool name, should be one of [{action_names}].
{action_input} The input to the tool that you want to use.
```
The tool will give you response after your response using the following format:
```
{response} the results after call the tool.
```
Therefore DO NOT generate tool response by yourself.
Also please follow the guidelines:
1. Always use code interpreter to solve the problem.
2. The generated codes should always in a markdown code block format.
3. The generated codes will be executed in an ipython manner and the results will be cached.
4. Your responded code should always be simple and only solves the problem in current step.
For example:
File url: `xxxx`
### Step 1. Load the dataset from the url into a pandas DataFrame named `df`.
{thought} We should use `pandas` to solve this step.
{action} IPythonInterpreter
{action_input} ```python
import pandas as pd
url = "xxxx"
data = pd.read_csv(url)
```
{response} The code is succeed without any outputs.
Let us begin from here!
"""

IPYTHON_INTERPRETER_DESCRIPTION = '''\
It can run Python code in a manner as jupyter notebook. The code must be a valid code that contains only python method.'''

models = [
dict(
abbr='gpt-3.5-code',
type=CodeAgent,
agent_type=CIReAct,
max_turn=3,
llm=dict(
type=OpenAI,
path='gpt-3.5-turbo',
key='ENV',
query_per_second=1,
max_seq_len=4096,
),
actions=[
dict(type=IPythonInterpreter,
description=IPYTHON_INTERPRETER_DESCRIPTION,
user_data_dir='./data/cibench_dataset/datasources')
],
protocol=dict(
type=ReActProtocol,
call_protocol=FEWSHOT_INSTRUCTION,
force_stop=FORCE_STOP_PROMPT_EN,
finish=dict(role='FINISH', begin='Final Answer:', end='\n'),
),
batch_size=1,
use_system_role=False, # use `user` role instead of system role
first_system_role=False, # use `user` role of the first instruction prompt
merge_adjacent_role=True, # merge adjacent same user content
),
]


infer = dict(
partitioner=dict(type=SizePartitioner, max_task_size=1000),
runner=dict(
type=LocalRunner,
max_num_workers=16,
task=dict(type=OpenICLInferTask)),
)
30 changes: 23 additions & 7 deletions opencompass/datasets/cibench.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,11 @@ def load_experiment_template(file: str) -> dict:
for _output in cell['outputs']:
if _output['output_type'] == 'display_data':
assert not output_flag
output_flag = True
tags.append('vis')
outputs.append(_output['data']['image/png'])
for _output in cell['outputs']:
if 'image/png' in _output['data']:
output_flag = True
tags.append('vis')
outputs.append(_output['data']['image/png'])
for _output in cell['outputs'][::-1]:
if output_flag:
break
if _output['output_type'] == 'stream' and _output[
Expand Down Expand Up @@ -290,11 +291,26 @@ def correct_step(step, target):
if action['result']:
try:
pred = action['result']['text']
match = re.search('execute_result:\n\n```\n(.*?)\n```',
pred, re.DOTALL)
match_exec = re.search(
'execute_result:\n\n```\n(.*?)\n```', pred,
re.DOTALL)
match_stdout = re.search('stdout:\n\n```\n(.*?)\n```',
pred, re.DOTALL)
# get pred result from execute_result by default
# else stdout
if match_exec and match_stdout:
match = match_exec
elif match_exec:
match = match_exec
elif match_stdout:
match = match_stdout
else:
match = None
if match:
out = match.group(1)
return out.strip() == target.strip()
score = (out.strip() == target.strip()
or target.strip() in out.strip())
return score
except Exception:
return False
# Fall back to False
Expand Down
138 changes: 135 additions & 3 deletions opencompass/lagent/agents/react.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,136 @@
from lagent.agents.react import ReAct
import copy
from typing import Dict, List

from lagent.actions import ActionExecutor
from lagent.agents.react import ReAct as _ReAct
from lagent.agents.react import ReActProtocol as _ReActProtocol
from lagent.schema import ActionReturn, ActionStatusCode, AgentReturn


class ReActProtocol(_ReActProtocol):

def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
# defaults to system
self.system_role = 'system'
self.first_system_role = 'system'
self.merge_adjacent_role = False

def format(self,
chat_history: List[Dict],
inner_step: List[Dict],
action_executor: ActionExecutor,
force_stop: bool = False) -> list:
"""Generate the ReAct format prompt.
Args:
chat_history (List[Dict]): The history log in previous runs.
inner_step (List[Dict]): The log in the current run.
action_executor (ActionExecutor): the action manager to
execute actions.
force_stop (boolean): whether force the agent to give responses
under pre-defined turns.
Returns:
List[Dict]: ReAct format prompt.
"""

call_protocol = self.call_protocol.format(
tool_description=action_executor.get_actions_info(),
action_names=action_executor.action_names(),
thought=self.thought['begin'],
action=self.action['begin'],
action_input=self.action_input['begin'],
response=self.response['begin'],
finish=self.finish['begin'],
)
formatted = []
formatted.append(
dict(role=self.first_system_role, content=call_protocol))
formatted += chat_history
formatted += inner_step
if force_stop:
formatted.append(
dict(role=self.system_role, content=self.force_stop))

if self.merge_adjacent_role and formatted:
merged = [formatted[0]] # Add the first dict

for d in formatted[1:]:
# If the 'role' of current dict matches with the 'role' of the
# last dict in merged list,
# append its 'content' to the 'content' of the last dict.
if d['role'] == merged[-1]['role']:
merged[-1]['content'] += d['content']
else:
# If 'role' does not match, add it as a new dict in the
# merged list
merged.append(d)

return merged

return formatted


class ReAct(_ReAct):

def __init__(self,
use_system_role: bool = True,
first_system_role: bool = True,
merge_adjacent_role: bool = False,
**kwargs) -> None:
super().__init__(**kwargs)
if use_system_role:
self.system_role = 'system'
else:
self.system_role = 'user'
if use_system_role or first_system_role:
first_system_role = 'system'
else:
first_system_role = 'user'
self._protocol.first_system_role = first_system_role
self._protocol.system_role = self.system_role
self._protocol.merge_adjacent_role = merge_adjacent_role

def chat(self, message: str) -> AgentReturn:
for hist in self._session_history:
if hist['role'] == 'system':
hist['role'] = self.system_role
self._inner_history = []
self._inner_history.append(dict(role='user', content=message))
agent_return = AgentReturn()
default_response = 'Sorry that I cannot answer your question.'
for turn in range(self.max_turn):
prompt = self._protocol.format(
chat_history=self.session_history,
inner_step=self._inner_history,
action_executor=self._action_executor,
force_stop=(turn == self.max_turn - 1))
response = self._llm.generate_from_template(prompt, 512)
self._inner_history.append(dict(role='assistant',
content=response))
thought, action, action_input = self._protocol.parse(
response, self._action_executor)
action_return: ActionReturn = self._action_executor(
action, action_input)
action_return.thought = thought
agent_return.actions.append(action_return)
if action_return.type == self._action_executor.finish_action.name:
agent_return.response = action_return.result['text']
break
self._inner_history.append(
dict(role=self.system_role,
content=self._protocol.format_response(action_return)))
else:
agent_return.response = default_response
agent_return.inner_steps = copy.deepcopy(self._inner_history)
# only append the user and final response
self._session_history.append(dict(role='user', content=message))
self._session_history.append(
dict(role='assistant', content=agent_return.response))
return agent_return


class CIReAct(ReAct):
"""Code Interpreter version of ReAct. The success state is different from
ReAct.
Expand All @@ -27,6 +156,9 @@ def reset(self):
b.reset()

def chat(self, message: str) -> AgentReturn:
for hist in self._session_history:
if hist['role'] == 'system':
hist['role'] = self.system_role
self._inner_history = []
# append the user message for session history
self._session_history.append(dict(role='user', content=message))
Expand Down Expand Up @@ -54,14 +186,14 @@ def chat(self, message: str) -> AgentReturn:
dict(role='assistant', content=response))
self._session_history.append(
dict(
role='system',
role=self.system_role,
content=self._protocol.format_response(action_return)))
agent_return.response = action_return.result['text']
return agent_return
elif action_return.type == self._action_executor.invalid_action.name: # noqa
action_return.errmsg = 'The action is invalid, please check the action name.' # noqa
self._inner_history.append(
dict(role='system',
dict(role=self.system_role,
content=self._protocol.format_response(action_return)))
if turn == self.max_turn - 1:
force_stop = True
Expand Down
20 changes: 20 additions & 0 deletions opencompass/models/lagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@ def reset(self):
def set_history(self, history):
self.agent._session_history = deepcopy(history)

def gt_response(self, prompt):
if 'CIReAct' in str(self.agent.__class__):
gold = prompt
prompt = f"""{self.agent._protocol.action['begin']} IPythonInterpreter
{self.agent._protocol.action_input['begin']} ```python\n{gold}\n```\n""" # noqa
action_input = dict(
command=f"""```python\n{gold}\n```\n""",
timeout=120,
)
response = self.agent._action_executor('IPythonInterpreter',
action_input)
gt_response = dict(role='assistant', content=prompt)
system_response = dict(
role='system',
content=self.agent._protocol.format_response(response))
return [gt_response, system_response]
else:
gt_response = dict(role='assistant', content=prompt)
return [gt_response]

@property
def template_parser(self):
return self.agent._llm.template_parser
Expand Down
11 changes: 10 additions & 1 deletion opencompass/openicl/icl_inferencer/icl_agent_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,15 @@ def infer_every_with_gt(self, chat: List[dict], index: int,
i for i, item in enumerate(chat) if item['role'] == 'assistant'
]

history = chat[:assistant_indices[0] - 1]
prev_idx = 0
for i in assistant_indices:
self.model.set_history(chat[:i - 1])
for j in range(prev_idx, i - 1):
if chat[j]['role'] == 'assistant':
history += self.model.gt_response(chat[j]['content'])
elif chat[j]['role'] == 'user':
history += [chat[j]]
self.model.set_history(history)
answer, steps, _ = self.model.chat(chat[i - 1]['content'])
output_handler.save_multiround_results(
origin_prompt=chat[i - 1]['content'],
Expand All @@ -134,4 +141,6 @@ def infer_every_with_gt(self, chat: List[dict], index: int,
idx=index,
gold=chat[i]['content'],
)
history += [chat[i - 1]]
prev_idx = i
self.model.reset()
2 changes: 2 additions & 0 deletions requirements/agent.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ jupyter
jupyter_client
jupytext
lagent
lightgbm==4.1.0
networkx
scikit-image
sympy==1.12
tensorflow==2.14.0

0 comments on commit 4aa7456

Please sign in to comment.