From 1e8a5b1d0f81d352a7ccd4aa7259cf10b5e76a34 Mon Sep 17 00:00:00 2001 From: Evan Griffiths <56087052+evangriffiths@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:13:25 +0100 Subject: [PATCH] Add GoalManager, and make a DeployableMicrochainAgent with this feature (#413) --- poetry.lock | 6 +- .../agents/goal_manager.py | 307 ++++++++++++++++++ .../agents/microchain_agent/deploy.py | 52 +++ .../agents/microchain_agent/memory.py | 6 + .../microchain_agent/microchain_agent.py | 6 + .../agents/microchain_agent/prompts.py | 23 +- prediction_market_agent/agents/utils.py | 1 + .../db/evaluated_goal_table_handler.py | 43 +++ prediction_market_agent/db/models.py | 19 ++ prediction_market_agent/run_agent.py | 3 + pyproject.toml | 2 +- scripts/delete_agent_db_entries.py | 12 + tests/agents/test_goal_manager.py | 284 ++++++++++++++++ tests/db/test_evaluated_goal_table_handler.py | 114 +++++++ tests/test_chat_history.py | 14 + 15 files changed, 887 insertions(+), 5 deletions(-) create mode 100644 prediction_market_agent/agents/goal_manager.py create mode 100644 prediction_market_agent/db/evaluated_goal_table_handler.py create mode 100644 tests/agents/test_goal_manager.py create mode 100644 tests/db/test_evaluated_goal_table_handler.py diff --git a/poetry.lock b/poetry.lock index ebcb21da..19d09a07 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -5154,7 +5154,7 @@ termcolor = "2.4.0" [package.source] type = "git" -url = "https://github.com/galatolofederico/microchain" +url = "https://github.com/galatolofederico/microchain.git" reference = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" resolved_reference = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" @@ -10742,4 +10742,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "~3.10.0" -content-hash = "8a4c0170bad71dc0af9bf36244bab6990cf4f2afd1379aebdfcfcb2c5877c3ae" +content-hash = "39328798ec6e388bcf06c3f8d66cf5d68c6448294d9106f8082b949d64321142" diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py new file mode 100644 index 00000000..b9039f35 --- /dev/null +++ b/prediction_market_agent/agents/goal_manager.py @@ -0,0 +1,307 @@ +from langchain_core.output_parsers import PydanticOutputParser +from langchain_core.prompts import PromptTemplate +from langchain_openai import ChatOpenAI +from prediction_market_agent_tooling.tools.langfuse_ import ( + get_langfuse_langchain_config, + observe, +) +from prediction_market_agent_tooling.tools.utils import utcnow +from pydantic import BaseModel, Field + +from prediction_market_agent.agents.microchain_agent.memory import ChatHistory +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) +from prediction_market_agent.db.models import EvaluatedGoalModel +from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL, APIKeys + +GENERATE_GOAL_PROMPT_TEMPLATE = """ +Generate a specific goal for an open-ended, autonomous agent that has a high-level description and a number of specific capabilities. +If applicable, use the agent's previous evaluated goals when considering its new goal. + +The goal should satisfy the following: +- have a narrow focus +- be completable immediately, within a single session +- be realistically achievable given the agen't specific capabilities +- have a clear motivation and completion criteria +- advance the aims of the agent +- balance the need for exploration and exploitation +- not be contingent on external factors that are out of the agent's control + +[HIGH LEVEL DESCRIPTION] +{high_level_description} + +[AGENT CAPABILITIES] +{agent_capabilities} + +{previous_evaluated_goals} +{format_instructions} +""" + +EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE = """ +An agent and user are working together to achieve a well defined goal. +Given their chat history, and the goal definition, evaluate whether the goal has been completed. + +[GOAL] +{goal_prompt} + +[CHAT HISTORY] +{chat_history} + +{format_instructions} +""" + + +class Goal(BaseModel): + goal: str = Field(..., description="A clear description of the goal") + motivation: str = Field(..., description="The reason for the goal") + completion_criteria: str = Field( + ..., + description="The criteria that will be used to evaluate whether the goal has been completed", + ) + + def to_prompt(self) -> str: + return ( + f"# Goal:\n" + f"{self.goal}\n\n" + f"## Motivation:\n{self.motivation}\n\n" + f"## Completion Criteria:\n{self.completion_criteria}" + ) + + +class GoalEvaluation(BaseModel): + reasoning: str = Field( + ..., description="An explanation of why the goal is deemed completed or not" + ) + is_complete: bool = Field(..., description="Whether the goal is complete") + output: str | None = Field( + ..., + description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output", + ) + + def __str__(self) -> str: + return ( + f"Is Complete: {self.is_complete}\n" + f"Reasoning: {self.reasoning}\n" + f"Output: {self.output}" + ) + + +class EvaluatedGoal(Goal): + reasoning: str + is_complete: bool + output: str | None + + def __str__(self) -> str: + return ( + f"Goal: {self.goal}\n" + f"Motivation: {self.motivation}\n" + f"Completion Criteria: {self.completion_criteria}\n" + f"Is Complete: {self.is_complete}\n" + f"Reasoning: {self.reasoning}\n" + f"Output: {self.output}" + ) + + @classmethod + def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal": + return EvaluatedGoal( + goal=model.goal, + motivation=model.motivation, + completion_criteria=model.completion_criteria, + is_complete=model.is_complete, + reasoning=model.reasoning, + output=model.output, + ) + + def to_model(self, agent_id: str) -> EvaluatedGoalModel: + return EvaluatedGoalModel( + goal=self.goal, + motivation=self.motivation, + completion_criteria=self.completion_criteria, + is_complete=self.is_complete, + reasoning=self.reasoning, + output=self.output, + agent_id=agent_id, + datetime_=utcnow(), + ) + + def to_goal(self) -> Goal: + return Goal( + goal=self.goal, + motivation=self.motivation, + completion_criteria=self.completion_criteria, + ) + + +class GoalManager: + def __init__( + self, + agent_id: str, + high_level_description: str, + agent_capabilities: str, + retry_limit: int = 3, + model: str = DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url: str | None = None, + ): + self.agent_id = agent_id + self.high_level_description = high_level_description + self.agent_capabilities = agent_capabilities + self.retry_limit = retry_limit + self.model = model + self.table_handler = EvaluatedGoalTableHandler( + agent_id=agent_id, + sqlalchemy_db_url=sqlalchemy_db_url, + ) + + def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGoal]: + evaluated_goal_models = self.table_handler.get_latest_evaluated_goals( + limit=limit + ) + return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models] + + @observe() + def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: + """ + Generate a new goal based on the high-level description and the latest + evaluated goals. + + TODO support generation of long-horizon goals with a specified + completion date, until which the goal's status is 'pending'. + """ + parser = PydanticOutputParser(pydantic_object=Goal) + prompt = PromptTemplate( + template=GENERATE_GOAL_PROMPT_TEMPLATE, + input_variables=[ + "high_level_description", + "agent_capabilities", + "previous_evaluated_goals", + ], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + latest_evaluated_goals_str = self.evaluated_goals_to_str(latest_evaluated_goals) + llm = ChatOpenAI( + temperature=0, + model=self.model, + api_key=APIKeys().openai_api_key_secretstr_v1, + ) + chain = prompt | llm | parser + + goal: Goal = chain.invoke( + { + "high_level_description": self.high_level_description, + "agent_capabilities": self.agent_capabilities, + "previous_evaluated_goals": latest_evaluated_goals_str, + }, + config=get_langfuse_langchain_config(), + ) + return goal + + def have_reached_retry_limit( + self, latest_evaluated_goals: list[EvaluatedGoal] + ) -> bool: + if self.retry_limit == 0: + return True + + if len(latest_evaluated_goals) < self.retry_limit + 1: + return False + + latest_goal = latest_evaluated_goals[0].to_goal() + return all( + g.to_goal() == latest_goal + for g in latest_evaluated_goals[: self.retry_limit + 1] + ) + + def get_goal(self) -> Goal: + """ + Manage the fetching of goals from memory, and deciding when to generate + a new goal vs. retrying an incomplete one. + + TODO add the ability to continue from a previous session if the goal + is not complete. + """ + latest_evaluated_goals = self.get_latest_evaluated_goals_from_memory( + limit=self.retry_limit + ) + if latest_evaluated_goals: + # Previous goals have been retrieved from memory. Generate a new + # goal based on these, or retry the last on if it did not complete. + latest_evaluated_goal = latest_evaluated_goals[0] + + if latest_evaluated_goal.is_complete: + # Generate a new goal + return self.generate_goal(latest_evaluated_goals) + else: + # Try again, unless we've reached the retry limit + if self.have_reached_retry_limit(latest_evaluated_goals): + return self.generate_goal(latest_evaluated_goals) + else: + return latest_evaluated_goal.to_goal() + + # No evaluated goals in memory. Generate a new goal from scratch + return self.generate_goal(latest_evaluated_goals=[]) + + @classmethod + def get_chat_history_after_goal_prompt( + cls, goal: Goal, chat_history: ChatHistory + ) -> ChatHistory: + """ + Return the chat history after the goal prompt, or None if the goal + prompt is not found. + """ + for i, chat_message in enumerate(chat_history.chat_messages): + if chat_message.content == goal.to_prompt(): + return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :]) + raise ValueError("Goal prompt not found in chat history") + + @observe() + def evaluate_goal_progress( + self, + goal: Goal, + chat_history: ChatHistory, + ) -> GoalEvaluation: + relevant_chat_history = self.get_chat_history_after_goal_prompt( + goal=goal, + chat_history=chat_history, + ) + parser = PydanticOutputParser(pydantic_object=GoalEvaluation) + prompt = PromptTemplate( + template=EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE, + input_variables=["goal_prompt", "chat_history"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + llm = ChatOpenAI( + temperature=0, + model=self.model, + api_key=APIKeys().openai_api_key_secretstr_v1, + ) + chain = prompt | llm | parser + + goal_evaluation: GoalEvaluation = chain.invoke( + { + "goal_prompt": goal.to_prompt(), + "chat_history": str(relevant_chat_history), + }, + config=get_langfuse_langchain_config(), + ) + return goal_evaluation + + def save_evaluated_goal(self, goal: Goal, evaluation: GoalEvaluation) -> None: + evaluated_goal = EvaluatedGoal( + goal=goal.goal, + motivation=goal.motivation, + completion_criteria=goal.completion_criteria, + is_complete=evaluation.is_complete, + reasoning=evaluation.reasoning, + output=evaluation.output, + ) + model = evaluated_goal.to_model(agent_id=self.agent_id) + self.table_handler.save_evaluated_goal(model) + + @staticmethod + def evaluated_goals_to_str(evaluated_goals: list[EvaluatedGoal]) -> str: + goals_str = "" + for i, goal in enumerate(evaluated_goals): + goals_str += f"## Goal {i+1}:\n{goal}\n" + if i < len(evaluated_goals) - 1: + goals_str += "\n" + return goals_str diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index eebe5fbe..89cf0664 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -2,11 +2,18 @@ from prediction_market_agent_tooling.deploy.agent import DeployableAgent from prediction_market_agent_tooling.loggers import logger from prediction_market_agent_tooling.markets.markets import MarketType +from prediction_market_agent_tooling.tools.utils import check_not_none +from prediction_market_agent.agents.goal_manager import GoalManager +from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, +) from prediction_market_agent.agents.microchain_agent.microchain_agent import ( SupportedModel, build_agent, get_editable_prompt_from_agent, + get_functions_summary_list, get_unformatted_system_prompt, save_agent_history, ) @@ -32,6 +39,12 @@ class DeployableMicrochainAgent(DeployableAgent): system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN + def build_goal_manager( + self, + agent: Agent, + ) -> GoalManager | None: + return None + def run( self, market_type: MarketType, @@ -54,6 +67,7 @@ def run( prompt_handler if self.load_historical_prompt else None ), ) + agent: Agent = build_agent( market_type=market_type, model=self.model, @@ -67,6 +81,10 @@ def run( enable_langfuse=self.enable_langfuse, ) + if goal_manager := self.build_goal_manager(agent=agent): + goal = goal_manager.get_goal() + agent.prompt = goal.to_prompt() + # Save formatted system prompt initial_formatted_system_prompt = agent.system_prompt @@ -76,6 +94,23 @@ def run( logger.error(e) raise e finally: + if goal_manager: + goal = check_not_none(goal) + goal_evaluation = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=ChatHistory.from_list_of_dicts(agent.history), + ) + goal_manager.save_evaluated_goal( + goal=goal, + evaluation=goal_evaluation, + ) + agent.history.append( + ChatMessage( + role="user", + content=str(f"# Goal evaluation\n{goal_evaluation}"), + ).model_dump() + ) + save_agent_history( agent=agent, long_term_memory=long_term_memory, @@ -116,3 +151,20 @@ class DeployableMicrochainModifiableSystemPromptAgent3( ): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3 model = SupportedModel.llama_31_instruct + + +class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): + task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER + model = SupportedModel.gpt_4o + system_prompt_choice = SystemPromptChoice.TRADING_AGENT_MINIMAL + + def build_goal_manager( + self, + agent: Agent, + ) -> GoalManager: + return GoalManager( + agent_id=self.task_description, + high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.", + agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine)}", + retry_limit=3, + ) diff --git a/prediction_market_agent/agents/microchain_agent/memory.py b/prediction_market_agent/agents/microchain_agent/memory.py index 27788466..53f667bb 100644 --- a/prediction_market_agent/agents/microchain_agent/memory.py +++ b/prediction_market_agent/agents/microchain_agent/memory.py @@ -23,6 +23,9 @@ class ChatMessage(BaseModel): def is_system_message(self) -> bool: return self.role == "system" + def __str__(self) -> str: + return f"{self.role}: {self.content}" + class DatedChatMessage(ChatMessage): datetime_: datetime @@ -98,6 +101,9 @@ def iterations(self) -> int: else: return (self.num_messages - 1) // 2 + def __str__(self) -> str: + return "\n".join(str(m) for m in self.chat_messages) + class DatedChatHistory(ChatHistory): chat_messages: Sequence[DatedChatMessage] diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py index 9669fd5c..064efb94 100644 --- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py +++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py @@ -247,3 +247,9 @@ def save_agent_history( def get_editable_prompt_from_agent(agent: Agent) -> str: return extract_updatable_system_prompt(str(agent.system_prompt)) + + +def get_functions_summary_list(engine: Engine) -> str: + return "\n".join( + [f"- {fname}: {f.description}" for fname, f in engine.functions.items()] + ) diff --git a/prediction_market_agent/agents/microchain_agent/prompts.py b/prediction_market_agent/agents/microchain_agent/prompts.py index 4cb04a90..2bc71593 100644 --- a/prediction_market_agent/agents/microchain_agent/prompts.py +++ b/prediction_market_agent/agents/microchain_agent/prompts.py @@ -33,6 +33,22 @@ Only output a single function call per message. Make 'Reasoning' calls frequently - at least every other call. """ + +# This is similar to the TRADING_AGENT_SYSTEM_PROMPT, except that it doesn't +# contain any specific instructions on what to do. This is appropriate to use +# for an agent when combined with a user-prompt containing the instructions for +# the session. +TRADING_AGENT_SYSTEM_PROMPT_MINIMAL = f"""You are a helpful assistant, who specializes as an expert trader agent in prediction markets. + +{NON_UPDATABLE_DIVIDOR} + +{{engine_help}} + +Only output valid Python function calls, without code formatting characters, without any other text. i.e. it should run if passed to Python's `eval` function. +Only output a single function call per message. +Make 'Reasoning' calls frequently - at least every other call. You need to reason step by step. +""" + # Experimental system prompt for task-solving agent. TASK_AGENT_SYSTEM_PROMPT = f"""Act as a task-solving agents that picks up available tasks and solves them for getting rewards. @@ -74,6 +90,7 @@ def build_full_unformatted_system_prompt(system_prompt: str) -> str: class SystemPromptChoice(str, Enum): JUST_BORN = "just_born" TRADING_AGENT = "trading_agent" + TRADING_AGENT_MINIMAL = "trading_agent_minimal" TASK_AGENT = "task_agent" @@ -98,7 +115,10 @@ def from_system_prompt_choice( include_agent_functions = True include_trading_functions = True - elif system_prompt_choice == SystemPromptChoice.TRADING_AGENT: + elif system_prompt_choice in [ + SystemPromptChoice.TRADING_AGENT, + SystemPromptChoice.TRADING_AGENT_MINIMAL, + ]: include_trading_functions = True elif system_prompt_choice == SystemPromptChoice.TASK_AGENT: @@ -118,4 +138,5 @@ def from_system_prompt_choice( SystemPromptChoice.JUST_BORN: SYSTEM_PROMPT, SystemPromptChoice.TRADING_AGENT: TRADING_AGENT_SYSTEM_PROMPT, SystemPromptChoice.TASK_AGENT: TASK_AGENT_SYSTEM_PROMPT, + SystemPromptChoice.TRADING_AGENT_MINIMAL: TRADING_AGENT_SYSTEM_PROMPT_MINIMAL, } diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py index 9b5910ff..4e903850 100644 --- a/prediction_market_agent/agents/utils.py +++ b/prediction_market_agent/agents/utils.py @@ -31,6 +31,7 @@ class AgentIdentifier(str, Enum): MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2" MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3" MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app" + MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "trader-agent-0-with-goal-manager" MEMORIES_TO_LEARNINGS_TEMPLATE = """ diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py new file mode 100644 index 00000000..e2f41abc --- /dev/null +++ b/prediction_market_agent/db/evaluated_goal_table_handler.py @@ -0,0 +1,43 @@ +import typing as t + +from sqlmodel import col + +from prediction_market_agent.db.models import EvaluatedGoalModel +from prediction_market_agent.db.sql_handler import SQLHandler + + +class EvaluatedGoalTableHandler: + def __init__( + self, + agent_id: str, + sqlalchemy_db_url: str | None = None, + ): + self.agent_id = agent_id + self.sql_handler = SQLHandler( + model=EvaluatedGoalModel, + sqlalchemy_db_url=sqlalchemy_db_url, + ) + + def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None: + self.sql_handler.save_multiple([model]) + + def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]: + column_to_order: str = EvaluatedGoalModel.datetime_.key # type: ignore + items: t.Sequence[ + EvaluatedGoalModel + ] = self.sql_handler.get_with_filter_and_order( + query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id], + order_by_column_name=column_to_order, + order_desc=True, + limit=limit, + ) + return list(items) + + def delete_all_evaluated_goals(self) -> None: + """ + Delete all evaluated goals with `agent_id` + """ + self.sql_handler.delete_all_entries( + col_name=EvaluatedGoalModel.agent_id.key, # type: ignore + col_value=self.agent_id, + ) diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py index 0302a9da..94b5baaf 100644 --- a/prediction_market_agent/db/models.py +++ b/prediction_market_agent/db/models.py @@ -29,3 +29,22 @@ class Prompt(SQLModel, table=True): # user (or app) should be persisted. session_identifier: str datetime_: datetime + + +class EvaluatedGoalModel(SQLModel, table=True): + """ + Checkpoint for general agent's goals. Used to store the agent's progress + towards a goal, and to restore it in future sessions. + """ + + __tablename__ = "evaluated_goals" + __table_args__ = {"extend_existing": True} + id: Optional[int] = Field(default=None, primary_key=True) + agent_id: str # Per-agent identifier + goal: str + motivation: str + completion_criteria: str + is_complete: bool + reasoning: str + output: str | None + datetime_: datetime diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py index 92d520c2..4d19e418 100644 --- a/prediction_market_agent/run_agent.py +++ b/prediction_market_agent/run_agent.py @@ -25,6 +25,7 @@ DeployableMicrochainModifiableSystemPromptAgent1, DeployableMicrochainModifiableSystemPromptAgent2, DeployableMicrochainModifiableSystemPromptAgent3, + DeployableMicrochainWithGoalManagerAgent0, ) from prediction_market_agent.agents.omen_cleaner_agent.deploy import OmenCleanerAgent from prediction_market_agent.agents.prophet_agent.deploy import ( @@ -59,6 +60,7 @@ class RunnableAgent(str, Enum): microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1" microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2" microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3" + microchain_with_goal_manager_agent_0 = "microchain_with_goal_manager_agent_0" metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent" prophet_gpt4o = "prophet_gpt4o" prophet_gpt4 = "prophet_gpt4" @@ -82,6 +84,7 @@ class RunnableAgent(str, Enum): RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1, RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2, RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3, + RunnableAgent.microchain_with_goal_manager_agent_0: DeployableMicrochainWithGoalManagerAgent0, RunnableAgent.social_media: DeployableSocialMediaAgent, RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent, RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent, diff --git a/pyproject.toml b/pyproject.toml index 3975e158..b32a0b8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ autoflake = "^2.2.1" isort = "^5.13.2" markdownify = "^0.11.6" tavily-python = "^0.3.9" -microchain-python = { git = "https://github.com/galatolofederico/microchain", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" } +microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" } pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"} psycopg2-binary = "^2.9.9" sqlmodel = "^0.0.21" diff --git a/scripts/delete_agent_db_entries.py b/scripts/delete_agent_db_entries.py index fa58f0d0..3c14dcc5 100644 --- a/scripts/delete_agent_db_entries.py +++ b/scripts/delete_agent_db_entries.py @@ -1,6 +1,9 @@ import typer from prediction_market_agent.agents.utils import AgentIdentifier +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) from prediction_market_agent.db.long_term_memory_table_handler import ( LongTermMemoryTableHandler, ) @@ -11,6 +14,7 @@ def main( session_id: AgentIdentifier, delete_memories: bool = True, delete_prompts: bool = True, + delete_goals: bool = True, ) -> None: """ Delete all memories and prompts for a given agent, defined by the session_id. @@ -31,6 +35,14 @@ def main( else: print("Memory entries successfully deleted.") + if delete_goals: + evaluated_goal_table_handler = EvaluatedGoalTableHandler(agent_id=session_id) + evaluated_goal_table_handler.delete_all_evaluated_goals() + if len(evaluated_goal_table_handler.get_latest_evaluated_goals(limit=1)) != 0: + raise Exception("Evaluated goal entries were not deleted.") + else: + print("Evaluated goal entries successfully deleted.") + if __name__ == "__main__": typer.run(main) diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py new file mode 100644 index 00000000..7db1ee3c --- /dev/null +++ b/tests/agents/test_goal_manager.py @@ -0,0 +1,284 @@ +import pytest + +from prediction_market_agent.agents.goal_manager import EvaluatedGoal, Goal, GoalManager +from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, +) +from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL +from tests.utils import RUN_PAID_TESTS + +SQLITE_DB_URL = "sqlite://" + + +def test_have_reached_retry_limit() -> None: + goal_manager = GoalManager( + agent_id="test_agent", + high_level_description="foo", + agent_capabilities="bar", + retry_limit=0, + sqlalchemy_db_url=SQLITE_DB_URL, + ) + + g0 = EvaluatedGoal( + goal="goal0", + motivation="motivation", + completion_criteria="completion_criteria", + is_complete=False, + reasoning="reasoning", + output=None, + ) + g1 = g0.model_copy() + g1.goal = "goal1" + + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is True + + goal_manager.retry_limit = 1 + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is True + ) + + goal_manager.retry_limit = 2 + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is False + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0]) + is True + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g1]) + is False + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1]) + is True + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1]) + is True + ) + + +def test_evaluated_goals_to_str() -> None: + gs = [ + EvaluatedGoal( + goal="foo0", + motivation="bar0", + completion_criteria="baz0", + is_complete=False, + reasoning="qux0", + output=None, + ), + EvaluatedGoal( + goal="foo1", + motivation="bar1", + completion_criteria="baz1", + is_complete=True, + reasoning="qux1", + output="output", + ), + ] + goals_str = GoalManager.evaluated_goals_to_str(gs) + assert goals_str == ( + "## Goal 1:\n" + "Goal: foo0\n" + "Motivation: bar0\n" + "Completion Criteria: baz0\n" + "Is Complete: False\n" + "Reasoning: qux0\n" + "Output: None\n" + "\n" + "## Goal 2:\n" + "Goal: foo1\n" + "Motivation: bar1\n" + "Completion Criteria: baz1\n" + "Is Complete: True\n" + "Reasoning: qux1\n" + "Output: output\n" + ) + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_generate_goal() -> None: + goal_manager = GoalManager( + agent_id="test_agent", + high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.", + agent_capabilities=( + "- Web search\n" + "- Web scraping\n" + "- Accurate predictions of the probability of yes/no outcomes for a given event." + ), + model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, + ) + goal0 = goal_manager.generate_goal(latest_evaluated_goals=[]) + + evaluated_goal = EvaluatedGoal( + goal="Investigate the top 5 contenders for the Tour de France, make predictions on their chances of overall victory, and compare these against the market odds.", + motivation="The Tour de France is a popular race, so markets are likely to have the highest liquidity", + completion_criteria="5 contenders identified, predictions made, and compared against market odds", + is_complete=False, + reasoning="The Tour de France is cancelled this year.", + output=None, + ) + goal2 = goal_manager.generate_goal(latest_evaluated_goals=[evaluated_goal]) + + # Generates a goal related to the Tour de France + assert "Tour de France" in goal0.goal + + # Does not generate a goal related to the Tour de France, based on the + # reasoning of the previous evaluated goal + assert "Tour de France" not in goal2.goal + + +def test_get_chat_history_after_goal_prompt() -> None: + goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz") + assistant_message = ChatMessage(role="assistant", content="The answer is 42.") + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + assistant_message, + ] + ) + assert GoalManager.get_chat_history_after_goal_prompt( + goal=goal, chat_history=chat_history + ) == ChatHistory(chat_messages=[assistant_message]) + + +def test_get_chat_history_after_goal_prompt_error() -> None: + goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz") + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ] + ) + try: + GoalManager.get_chat_history_after_goal_prompt( + goal=goal, chat_history=chat_history + ) + except ValueError as e: + assert str(e) == "Goal prompt not found in chat history" + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress_0() -> None: + """ + Test for the case where the evaluated goal: + - is completed + - should have a 'None' output. + """ + goal_manager = GoalManager( + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, place a small bet on them.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="If the winner is competing, place a small bet, otherwise do nothing.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Searching the web... Yes the winner, Tadej Pogacar, is competing.", + ), + ChatMessage(role="user", content="The reasoning has been recorded."), + ChatMessage( + role="assistant", + content="The market id is '0x123' for the TdF winner. Placing bet of 0.01 USD on Tadej Pogacar", + ), + ChatMessage(role="user", content="Bet successfully placed."), + ] + ) + goal_evaluation = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, + ) + assert goal_evaluation.is_complete is True + assert goal_evaluation.output is None + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress_1() -> None: + """ + Test for the case where the evaluated goal: + - is completed + - should have a non-'None' output. + """ + goal_manager = GoalManager( + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, get their probability of winning.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="Return the name and odds of last year's winner for this year's TdF.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Searching the web... Yes the winner, Tadej Pogacar, is competing. His winning probability: p_yes=0.27", + ), + ChatMessage(role="user", content="The reasoning has been recorded."), + ] + ) + goal_evaluation = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, + ) + assert goal_evaluation.is_complete is True + assert goal_evaluation.output is not None + assert "Tadej Pogacar" in goal_evaluation.output + assert "0.27" in goal_evaluation.output + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress_2() -> None: + """ + Test for the case where the evaluated goal is not completed + """ + goal_manager = GoalManager( + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, get their probability of winning.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="Return the name and odds of last year's winner for this year's TdF.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Uhoh, I've hit some exception and need to quit", + ), + ] + ) + goal_evaluation = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, + ) + assert goal_evaluation.is_complete is False + assert goal_evaluation.output is None diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py new file mode 100644 index 00000000..b1731816 --- /dev/null +++ b/tests/db/test_evaluated_goal_table_handler.py @@ -0,0 +1,114 @@ +from typing import Generator + +import pytest + +from prediction_market_agent.agents.goal_manager import EvaluatedGoal +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) + +SQLITE_DB_URL = "sqlite://" +TEST_AGENT_ID = "test_agent_id" + + +@pytest.fixture(scope="function") +def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]: + """Creates a in-memory SQLite DB for testing""" + table_handler = EvaluatedGoalTableHandler( + sqlalchemy_db_url=SQLITE_DB_URL, + agent_id=TEST_AGENT_ID, + ) + yield table_handler + + +def test_save_load_evaluated_goal_0(table_handler: EvaluatedGoalTableHandler) -> None: + evaluated_goal = EvaluatedGoal( + goal="abc", + motivation="def", + completion_criteria="ghi", + is_complete=True, + reasoning="jkl", + output="mno", + ) + table_handler.save_evaluated_goal( + model=evaluated_goal.to_model(agent_id=TEST_AGENT_ID) + ) + + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) + assert loaded_evaluated_goal == evaluated_goal + + +def test_save_load_evaluated_goal_1(table_handler: EvaluatedGoalTableHandler) -> None: + evaluated_goal0 = EvaluatedGoal( + goal="foo", + motivation="foo", + completion_criteria="foo", + is_complete=True, + reasoning="foo", + output="foo", + ) + evaluated_goal1 = EvaluatedGoal( + goal="bar", + motivation="bar", + completion_criteria="bar", + is_complete=False, + reasoning="bar", + output="bar", + ) + + table_handler.save_evaluated_goal( + model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID) + ) + table_handler.save_evaluated_goal( + model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID) + ) + + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) + assert loaded_evaluated_goal == evaluated_goal1 + + for limit in [2, 3]: + loaded_models = table_handler.get_latest_evaluated_goals(limit=limit) + assert len(loaded_models) == 2 + # Check LIFO order + assert loaded_models[0].datetime_ > loaded_models[1].datetime_ + assert [EvaluatedGoal.from_model(model) for model in loaded_models] == [ + evaluated_goal1, + evaluated_goal0, + ] + + +def test_save_load_evaluated_goal_multiple_agents( + table_handler: EvaluatedGoalTableHandler, +) -> None: + evaluated_goal0 = EvaluatedGoal( + goal="foo", + motivation="foo", + completion_criteria="foo", + is_complete=True, + reasoning="foo", + output="foo", + ) + evaluated_goal1 = EvaluatedGoal( + goal="bar", + motivation="bar", + completion_criteria="bar", + is_complete=False, + reasoning="bar", + output="bar", + ) + + table_handler.save_evaluated_goal( + model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID) + ) + table_handler.save_evaluated_goal( + model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID + "1") + ) + + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) + assert loaded_evaluated_goal == evaluated_goal0 diff --git a/tests/test_chat_history.py b/tests/test_chat_history.py index 103861cd..85c40822 100644 --- a/tests/test_chat_history.py +++ b/tests/test_chat_history.py @@ -5,6 +5,8 @@ from prediction_market_agent_tooling.tools.utils import utcnow from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, DatedChatHistory, DatedChatMessage, ) @@ -73,3 +75,15 @@ def test_save_to_and_load_from_memory( new_chat_history.to_undated_chat_history() == chat_history.to_undated_chat_history() ) + + +def test_stringified_chat_history() -> None: + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content="What is the weather like today?"), + ] + ) + assert str(chat_history) == ( + "system: You are a helpful assistant.\nuser: What is the weather like today?" + )