From 1e8a5b1d0f81d352a7ccd4aa7259cf10b5e76a34 Mon Sep 17 00:00:00 2001
From: Evan Griffiths <56087052+evangriffiths@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:13:25 +0100
Subject: [PATCH] Add GoalManager, and make a DeployableMicrochainAgent with
 this feature (#413)

---
 poetry.lock                                   |   6 +-
 .../agents/goal_manager.py                    | 307 ++++++++++++++++++
 .../agents/microchain_agent/deploy.py         |  52 +++
 .../agents/microchain_agent/memory.py         |   6 +
 .../microchain_agent/microchain_agent.py      |   6 +
 .../agents/microchain_agent/prompts.py        |  23 +-
 prediction_market_agent/agents/utils.py       |   1 +
 .../db/evaluated_goal_table_handler.py        |  43 +++
 prediction_market_agent/db/models.py          |  19 ++
 prediction_market_agent/run_agent.py          |   3 +
 pyproject.toml                                |   2 +-
 scripts/delete_agent_db_entries.py            |  12 +
 tests/agents/test_goal_manager.py             | 284 ++++++++++++++++
 tests/db/test_evaluated_goal_table_handler.py | 114 +++++++
 tests/test_chat_history.py                    |  14 +
 15 files changed, 887 insertions(+), 5 deletions(-)
 create mode 100644 prediction_market_agent/agents/goal_manager.py
 create mode 100644 prediction_market_agent/db/evaluated_goal_table_handler.py
 create mode 100644 tests/agents/test_goal_manager.py
 create mode 100644 tests/db/test_evaluated_goal_table_handler.py

diff --git a/poetry.lock b/poetry.lock
index ebcb21da..19d09a07 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -5154,7 +5154,7 @@ termcolor = "2.4.0"
 
 [package.source]
 type = "git"
-url = "https://github.com/galatolofederico/microchain"
+url = "https://github.com/galatolofederico/microchain.git"
 reference = "98e601f6b7413ea48fb0b099309d686c4b10ff5c"
 resolved_reference = "98e601f6b7413ea48fb0b099309d686c4b10ff5c"
 
@@ -10742,4 +10742,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10.0"
-content-hash = "8a4c0170bad71dc0af9bf36244bab6990cf4f2afd1379aebdfcfcb2c5877c3ae"
+content-hash = "39328798ec6e388bcf06c3f8d66cf5d68c6448294d9106f8082b949d64321142"
diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
new file mode 100644
index 00000000..b9039f35
--- /dev/null
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -0,0 +1,307 @@
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+from prediction_market_agent_tooling.tools.langfuse_ import (
+    get_langfuse_langchain_config,
+    observe,
+)
+from prediction_market_agent_tooling.tools.utils import utcnow
+from pydantic import BaseModel, Field
+
+from prediction_market_agent.agents.microchain_agent.memory import ChatHistory
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
+from prediction_market_agent.db.models import EvaluatedGoalModel
+from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL, APIKeys
+
+GENERATE_GOAL_PROMPT_TEMPLATE = """
+Generate a specific goal for an open-ended, autonomous agent that has a high-level description and a number of specific capabilities.
+If applicable, use the agent's previous evaluated goals when considering its new goal.
+
+The goal should satisfy the following:
+- have a narrow focus
+- be completable immediately, within a single session
+- be realistically achievable given the agen't specific capabilities
+- have a clear motivation and completion criteria
+- advance the aims of the agent
+- balance the need for exploration and exploitation
+- not be contingent on external factors that are out of the agent's control
+
+[HIGH LEVEL DESCRIPTION]
+{high_level_description}
+
+[AGENT CAPABILITIES]
+{agent_capabilities}
+
+{previous_evaluated_goals}
+{format_instructions}
+"""
+
+EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE = """
+An agent and user are working together to achieve a well defined goal.
+Given their chat history, and the goal definition, evaluate whether the goal has been completed.
+
+[GOAL]
+{goal_prompt}
+
+[CHAT HISTORY]
+{chat_history}
+
+{format_instructions}
+"""
+
+
+class Goal(BaseModel):
+    goal: str = Field(..., description="A clear description of the goal")
+    motivation: str = Field(..., description="The reason for the goal")
+    completion_criteria: str = Field(
+        ...,
+        description="The criteria that will be used to evaluate whether the goal has been completed",
+    )
+
+    def to_prompt(self) -> str:
+        return (
+            f"# Goal:\n"
+            f"{self.goal}\n\n"
+            f"## Motivation:\n{self.motivation}\n\n"
+            f"## Completion Criteria:\n{self.completion_criteria}"
+        )
+
+
+class GoalEvaluation(BaseModel):
+    reasoning: str = Field(
+        ..., description="An explanation of why the goal is deemed completed or not"
+    )
+    is_complete: bool = Field(..., description="Whether the goal is complete")
+    output: str | None = Field(
+        ...,
+        description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output",
+    )
+
+    def __str__(self) -> str:
+        return (
+            f"Is Complete: {self.is_complete}\n"
+            f"Reasoning: {self.reasoning}\n"
+            f"Output: {self.output}"
+        )
+
+
+class EvaluatedGoal(Goal):
+    reasoning: str
+    is_complete: bool
+    output: str | None
+
+    def __str__(self) -> str:
+        return (
+            f"Goal: {self.goal}\n"
+            f"Motivation: {self.motivation}\n"
+            f"Completion Criteria: {self.completion_criteria}\n"
+            f"Is Complete: {self.is_complete}\n"
+            f"Reasoning: {self.reasoning}\n"
+            f"Output: {self.output}"
+        )
+
+    @classmethod
+    def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal":
+        return EvaluatedGoal(
+            goal=model.goal,
+            motivation=model.motivation,
+            completion_criteria=model.completion_criteria,
+            is_complete=model.is_complete,
+            reasoning=model.reasoning,
+            output=model.output,
+        )
+
+    def to_model(self, agent_id: str) -> EvaluatedGoalModel:
+        return EvaluatedGoalModel(
+            goal=self.goal,
+            motivation=self.motivation,
+            completion_criteria=self.completion_criteria,
+            is_complete=self.is_complete,
+            reasoning=self.reasoning,
+            output=self.output,
+            agent_id=agent_id,
+            datetime_=utcnow(),
+        )
+
+    def to_goal(self) -> Goal:
+        return Goal(
+            goal=self.goal,
+            motivation=self.motivation,
+            completion_criteria=self.completion_criteria,
+        )
+
+
+class GoalManager:
+    def __init__(
+        self,
+        agent_id: str,
+        high_level_description: str,
+        agent_capabilities: str,
+        retry_limit: int = 3,
+        model: str = DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url: str | None = None,
+    ):
+        self.agent_id = agent_id
+        self.high_level_description = high_level_description
+        self.agent_capabilities = agent_capabilities
+        self.retry_limit = retry_limit
+        self.model = model
+        self.table_handler = EvaluatedGoalTableHandler(
+            agent_id=agent_id,
+            sqlalchemy_db_url=sqlalchemy_db_url,
+        )
+
+    def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGoal]:
+        evaluated_goal_models = self.table_handler.get_latest_evaluated_goals(
+            limit=limit
+        )
+        return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models]
+
+    @observe()
+    def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
+        """
+        Generate a new goal based on the high-level description and the latest
+        evaluated goals.
+
+        TODO support generation of long-horizon goals with a specified
+        completion date, until which the goal's status is 'pending'.
+        """
+        parser = PydanticOutputParser(pydantic_object=Goal)
+        prompt = PromptTemplate(
+            template=GENERATE_GOAL_PROMPT_TEMPLATE,
+            input_variables=[
+                "high_level_description",
+                "agent_capabilities",
+                "previous_evaluated_goals",
+            ],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+        latest_evaluated_goals_str = self.evaluated_goals_to_str(latest_evaluated_goals)
+        llm = ChatOpenAI(
+            temperature=0,
+            model=self.model,
+            api_key=APIKeys().openai_api_key_secretstr_v1,
+        )
+        chain = prompt | llm | parser
+
+        goal: Goal = chain.invoke(
+            {
+                "high_level_description": self.high_level_description,
+                "agent_capabilities": self.agent_capabilities,
+                "previous_evaluated_goals": latest_evaluated_goals_str,
+            },
+            config=get_langfuse_langchain_config(),
+        )
+        return goal
+
+    def have_reached_retry_limit(
+        self, latest_evaluated_goals: list[EvaluatedGoal]
+    ) -> bool:
+        if self.retry_limit == 0:
+            return True
+
+        if len(latest_evaluated_goals) < self.retry_limit + 1:
+            return False
+
+        latest_goal = latest_evaluated_goals[0].to_goal()
+        return all(
+            g.to_goal() == latest_goal
+            for g in latest_evaluated_goals[: self.retry_limit + 1]
+        )
+
+    def get_goal(self) -> Goal:
+        """
+        Manage the fetching of goals from memory, and deciding when to generate
+        a new goal vs. retrying an incomplete one.
+
+        TODO add the ability to continue from a previous session if the goal
+        is not complete.
+        """
+        latest_evaluated_goals = self.get_latest_evaluated_goals_from_memory(
+            limit=self.retry_limit
+        )
+        if latest_evaluated_goals:
+            # Previous goals have been retrieved from memory. Generate a new
+            # goal based on these, or retry the last on if it did not complete.
+            latest_evaluated_goal = latest_evaluated_goals[0]
+
+            if latest_evaluated_goal.is_complete:
+                # Generate a new goal
+                return self.generate_goal(latest_evaluated_goals)
+            else:
+                # Try again, unless we've reached the retry limit
+                if self.have_reached_retry_limit(latest_evaluated_goals):
+                    return self.generate_goal(latest_evaluated_goals)
+                else:
+                    return latest_evaluated_goal.to_goal()
+
+        # No evaluated goals in memory. Generate a new goal from scratch
+        return self.generate_goal(latest_evaluated_goals=[])
+
+    @classmethod
+    def get_chat_history_after_goal_prompt(
+        cls, goal: Goal, chat_history: ChatHistory
+    ) -> ChatHistory:
+        """
+        Return the chat history after the goal prompt, or None if the goal
+        prompt is not found.
+        """
+        for i, chat_message in enumerate(chat_history.chat_messages):
+            if chat_message.content == goal.to_prompt():
+                return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :])
+        raise ValueError("Goal prompt not found in chat history")
+
+    @observe()
+    def evaluate_goal_progress(
+        self,
+        goal: Goal,
+        chat_history: ChatHistory,
+    ) -> GoalEvaluation:
+        relevant_chat_history = self.get_chat_history_after_goal_prompt(
+            goal=goal,
+            chat_history=chat_history,
+        )
+        parser = PydanticOutputParser(pydantic_object=GoalEvaluation)
+        prompt = PromptTemplate(
+            template=EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE,
+            input_variables=["goal_prompt", "chat_history"],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+        llm = ChatOpenAI(
+            temperature=0,
+            model=self.model,
+            api_key=APIKeys().openai_api_key_secretstr_v1,
+        )
+        chain = prompt | llm | parser
+
+        goal_evaluation: GoalEvaluation = chain.invoke(
+            {
+                "goal_prompt": goal.to_prompt(),
+                "chat_history": str(relevant_chat_history),
+            },
+            config=get_langfuse_langchain_config(),
+        )
+        return goal_evaluation
+
+    def save_evaluated_goal(self, goal: Goal, evaluation: GoalEvaluation) -> None:
+        evaluated_goal = EvaluatedGoal(
+            goal=goal.goal,
+            motivation=goal.motivation,
+            completion_criteria=goal.completion_criteria,
+            is_complete=evaluation.is_complete,
+            reasoning=evaluation.reasoning,
+            output=evaluation.output,
+        )
+        model = evaluated_goal.to_model(agent_id=self.agent_id)
+        self.table_handler.save_evaluated_goal(model)
+
+    @staticmethod
+    def evaluated_goals_to_str(evaluated_goals: list[EvaluatedGoal]) -> str:
+        goals_str = ""
+        for i, goal in enumerate(evaluated_goals):
+            goals_str += f"## Goal {i+1}:\n{goal}\n"
+            if i < len(evaluated_goals) - 1:
+                goals_str += "\n"
+        return goals_str
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index eebe5fbe..89cf0664 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -2,11 +2,18 @@
 from prediction_market_agent_tooling.deploy.agent import DeployableAgent
 from prediction_market_agent_tooling.loggers import logger
 from prediction_market_agent_tooling.markets.markets import MarketType
+from prediction_market_agent_tooling.tools.utils import check_not_none
 
+from prediction_market_agent.agents.goal_manager import GoalManager
+from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
+)
 from prediction_market_agent.agents.microchain_agent.microchain_agent import (
     SupportedModel,
     build_agent,
     get_editable_prompt_from_agent,
+    get_functions_summary_list,
     get_unformatted_system_prompt,
     save_agent_history,
 )
@@ -32,6 +39,12 @@ class DeployableMicrochainAgent(DeployableAgent):
     system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN
 
+    def build_goal_manager(
+        self,
+        agent: Agent,
+    ) -> GoalManager | None:
+        return None
+
     def run(
         self,
         market_type: MarketType,
@@ -54,6 +67,7 @@ def run(
                 prompt_handler if self.load_historical_prompt else None
             ),
         )
+
         agent: Agent = build_agent(
             market_type=market_type,
             model=self.model,
@@ -67,6 +81,10 @@ def run(
             enable_langfuse=self.enable_langfuse,
         )
 
+        if goal_manager := self.build_goal_manager(agent=agent):
+            goal = goal_manager.get_goal()
+            agent.prompt = goal.to_prompt()
+
         # Save formatted system prompt
         initial_formatted_system_prompt = agent.system_prompt
 
@@ -76,6 +94,23 @@ def run(
             logger.error(e)
             raise e
         finally:
+            if goal_manager:
+                goal = check_not_none(goal)
+                goal_evaluation = goal_manager.evaluate_goal_progress(
+                    goal=goal,
+                    chat_history=ChatHistory.from_list_of_dicts(agent.history),
+                )
+                goal_manager.save_evaluated_goal(
+                    goal=goal,
+                    evaluation=goal_evaluation,
+                )
+                agent.history.append(
+                    ChatMessage(
+                        role="user",
+                        content=str(f"# Goal evaluation\n{goal_evaluation}"),
+                    ).model_dump()
+                )
+
             save_agent_history(
                 agent=agent,
                 long_term_memory=long_term_memory,
@@ -116,3 +151,20 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 ):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3
     model = SupportedModel.llama_31_instruct
+
+
+class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
+    task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
+    model = SupportedModel.gpt_4o
+    system_prompt_choice = SystemPromptChoice.TRADING_AGENT_MINIMAL
+
+    def build_goal_manager(
+        self,
+        agent: Agent,
+    ) -> GoalManager:
+        return GoalManager(
+            agent_id=self.task_description,
+            high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.",
+            agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine)}",
+            retry_limit=3,
+        )
diff --git a/prediction_market_agent/agents/microchain_agent/memory.py b/prediction_market_agent/agents/microchain_agent/memory.py
index 27788466..53f667bb 100644
--- a/prediction_market_agent/agents/microchain_agent/memory.py
+++ b/prediction_market_agent/agents/microchain_agent/memory.py
@@ -23,6 +23,9 @@ class ChatMessage(BaseModel):
     def is_system_message(self) -> bool:
         return self.role == "system"
 
+    def __str__(self) -> str:
+        return f"{self.role}: {self.content}"
+
 
 class DatedChatMessage(ChatMessage):
     datetime_: datetime
@@ -98,6 +101,9 @@ def iterations(self) -> int:
         else:
             return (self.num_messages - 1) // 2
 
+    def __str__(self) -> str:
+        return "\n".join(str(m) for m in self.chat_messages)
+
 
 class DatedChatHistory(ChatHistory):
     chat_messages: Sequence[DatedChatMessage]
diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
index 9669fd5c..064efb94 100644
--- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py
+++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
@@ -247,3 +247,9 @@ def save_agent_history(
 
 def get_editable_prompt_from_agent(agent: Agent) -> str:
     return extract_updatable_system_prompt(str(agent.system_prompt))
+
+
+def get_functions_summary_list(engine: Engine) -> str:
+    return "\n".join(
+        [f"- {fname}: {f.description}" for fname, f in engine.functions.items()]
+    )
diff --git a/prediction_market_agent/agents/microchain_agent/prompts.py b/prediction_market_agent/agents/microchain_agent/prompts.py
index 4cb04a90..2bc71593 100644
--- a/prediction_market_agent/agents/microchain_agent/prompts.py
+++ b/prediction_market_agent/agents/microchain_agent/prompts.py
@@ -33,6 +33,22 @@
 Only output a single function call per message.
 Make 'Reasoning' calls frequently - at least every other call.
 """
+
+# This is similar to the TRADING_AGENT_SYSTEM_PROMPT, except that it doesn't
+# contain any specific instructions on what to do. This is appropriate to use
+# for an agent when combined with a user-prompt containing the instructions for
+# the session.
+TRADING_AGENT_SYSTEM_PROMPT_MINIMAL = f"""You are a helpful assistant, who specializes as an expert trader agent in prediction markets.
+
+{NON_UPDATABLE_DIVIDOR}
+
+{{engine_help}}
+
+Only output valid Python function calls, without code formatting characters, without any other text. i.e. it should run if passed to Python's `eval` function.
+Only output a single function call per message.
+Make 'Reasoning' calls frequently - at least every other call. You need to reason step by step.
+"""
+
 # Experimental system prompt for task-solving agent.
 TASK_AGENT_SYSTEM_PROMPT = f"""Act as a task-solving agents that picks up available tasks and solves them for getting rewards.
 
@@ -74,6 +90,7 @@ def build_full_unformatted_system_prompt(system_prompt: str) -> str:
 class SystemPromptChoice(str, Enum):
     JUST_BORN = "just_born"
     TRADING_AGENT = "trading_agent"
+    TRADING_AGENT_MINIMAL = "trading_agent_minimal"
     TASK_AGENT = "task_agent"
 
 
@@ -98,7 +115,10 @@ def from_system_prompt_choice(
             include_agent_functions = True
             include_trading_functions = True
 
-        elif system_prompt_choice == SystemPromptChoice.TRADING_AGENT:
+        elif system_prompt_choice in [
+            SystemPromptChoice.TRADING_AGENT,
+            SystemPromptChoice.TRADING_AGENT_MINIMAL,
+        ]:
             include_trading_functions = True
 
         elif system_prompt_choice == SystemPromptChoice.TASK_AGENT:
@@ -118,4 +138,5 @@ def from_system_prompt_choice(
     SystemPromptChoice.JUST_BORN: SYSTEM_PROMPT,
     SystemPromptChoice.TRADING_AGENT: TRADING_AGENT_SYSTEM_PROMPT,
     SystemPromptChoice.TASK_AGENT: TASK_AGENT_SYSTEM_PROMPT,
+    SystemPromptChoice.TRADING_AGENT_MINIMAL: TRADING_AGENT_SYSTEM_PROMPT_MINIMAL,
 }
diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py
index 9b5910ff..4e903850 100644
--- a/prediction_market_agent/agents/utils.py
+++ b/prediction_market_agent/agents/utils.py
@@ -31,6 +31,7 @@ class AgentIdentifier(str, Enum):
     MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2"
     MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3"
     MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app"
+    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "trader-agent-0-with-goal-manager"
 
 
 MEMORIES_TO_LEARNINGS_TEMPLATE = """
diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py
new file mode 100644
index 00000000..e2f41abc
--- /dev/null
+++ b/prediction_market_agent/db/evaluated_goal_table_handler.py
@@ -0,0 +1,43 @@
+import typing as t
+
+from sqlmodel import col
+
+from prediction_market_agent.db.models import EvaluatedGoalModel
+from prediction_market_agent.db.sql_handler import SQLHandler
+
+
+class EvaluatedGoalTableHandler:
+    def __init__(
+        self,
+        agent_id: str,
+        sqlalchemy_db_url: str | None = None,
+    ):
+        self.agent_id = agent_id
+        self.sql_handler = SQLHandler(
+            model=EvaluatedGoalModel,
+            sqlalchemy_db_url=sqlalchemy_db_url,
+        )
+
+    def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None:
+        self.sql_handler.save_multiple([model])
+
+    def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]:
+        column_to_order: str = EvaluatedGoalModel.datetime_.key  # type: ignore
+        items: t.Sequence[
+            EvaluatedGoalModel
+        ] = self.sql_handler.get_with_filter_and_order(
+            query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id],
+            order_by_column_name=column_to_order,
+            order_desc=True,
+            limit=limit,
+        )
+        return list(items)
+
+    def delete_all_evaluated_goals(self) -> None:
+        """
+        Delete all evaluated goals with `agent_id`
+        """
+        self.sql_handler.delete_all_entries(
+            col_name=EvaluatedGoalModel.agent_id.key,  # type: ignore
+            col_value=self.agent_id,
+        )
diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py
index 0302a9da..94b5baaf 100644
--- a/prediction_market_agent/db/models.py
+++ b/prediction_market_agent/db/models.py
@@ -29,3 +29,22 @@ class Prompt(SQLModel, table=True):
     # user (or app) should be persisted.
     session_identifier: str
     datetime_: datetime
+
+
+class EvaluatedGoalModel(SQLModel, table=True):
+    """
+    Checkpoint for general agent's goals. Used to store the agent's progress
+    towards a goal, and to restore it in future sessions.
+    """
+
+    __tablename__ = "evaluated_goals"
+    __table_args__ = {"extend_existing": True}
+    id: Optional[int] = Field(default=None, primary_key=True)
+    agent_id: str  # Per-agent identifier
+    goal: str
+    motivation: str
+    completion_criteria: str
+    is_complete: bool
+    reasoning: str
+    output: str | None
+    datetime_: datetime
diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py
index 92d520c2..4d19e418 100644
--- a/prediction_market_agent/run_agent.py
+++ b/prediction_market_agent/run_agent.py
@@ -25,6 +25,7 @@
     DeployableMicrochainModifiableSystemPromptAgent1,
     DeployableMicrochainModifiableSystemPromptAgent2,
     DeployableMicrochainModifiableSystemPromptAgent3,
+    DeployableMicrochainWithGoalManagerAgent0,
 )
 from prediction_market_agent.agents.omen_cleaner_agent.deploy import OmenCleanerAgent
 from prediction_market_agent.agents.prophet_agent.deploy import (
@@ -59,6 +60,7 @@ class RunnableAgent(str, Enum):
     microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1"
     microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2"
     microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3"
+    microchain_with_goal_manager_agent_0 = "microchain_with_goal_manager_agent_0"
     metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent"
     prophet_gpt4o = "prophet_gpt4o"
     prophet_gpt4 = "prophet_gpt4"
@@ -82,6 +84,7 @@ class RunnableAgent(str, Enum):
     RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1,
     RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2,
     RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3,
+    RunnableAgent.microchain_with_goal_manager_agent_0: DeployableMicrochainWithGoalManagerAgent0,
     RunnableAgent.social_media: DeployableSocialMediaAgent,
     RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent,
     RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent,
diff --git a/pyproject.toml b/pyproject.toml
index 3975e158..b32a0b8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ autoflake = "^2.2.1"
 isort = "^5.13.2"
 markdownify = "^0.11.6"
 tavily-python = "^0.3.9"
-microchain-python = { git = "https://github.com/galatolofederico/microchain", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" }
+microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" }
 pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"}
 psycopg2-binary = "^2.9.9"
 sqlmodel = "^0.0.21"
diff --git a/scripts/delete_agent_db_entries.py b/scripts/delete_agent_db_entries.py
index fa58f0d0..3c14dcc5 100644
--- a/scripts/delete_agent_db_entries.py
+++ b/scripts/delete_agent_db_entries.py
@@ -1,6 +1,9 @@
 import typer
 
 from prediction_market_agent.agents.utils import AgentIdentifier
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
 from prediction_market_agent.db.long_term_memory_table_handler import (
     LongTermMemoryTableHandler,
 )
@@ -11,6 +14,7 @@ def main(
     session_id: AgentIdentifier,
     delete_memories: bool = True,
     delete_prompts: bool = True,
+    delete_goals: bool = True,
 ) -> None:
     """
     Delete all memories and prompts for a given agent, defined by the session_id.
@@ -31,6 +35,14 @@ def main(
         else:
             print("Memory entries successfully deleted.")
 
+    if delete_goals:
+        evaluated_goal_table_handler = EvaluatedGoalTableHandler(agent_id=session_id)
+        evaluated_goal_table_handler.delete_all_evaluated_goals()
+        if len(evaluated_goal_table_handler.get_latest_evaluated_goals(limit=1)) != 0:
+            raise Exception("Evaluated goal entries were not deleted.")
+        else:
+            print("Evaluated goal entries successfully deleted.")
+
 
 if __name__ == "__main__":
     typer.run(main)
diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
new file mode 100644
index 00000000..7db1ee3c
--- /dev/null
+++ b/tests/agents/test_goal_manager.py
@@ -0,0 +1,284 @@
+import pytest
+
+from prediction_market_agent.agents.goal_manager import EvaluatedGoal, Goal, GoalManager
+from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
+)
+from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL
+from tests.utils import RUN_PAID_TESTS
+
+SQLITE_DB_URL = "sqlite://"
+
+
+def test_have_reached_retry_limit() -> None:
+    goal_manager = GoalManager(
+        agent_id="test_agent",
+        high_level_description="foo",
+        agent_capabilities="bar",
+        retry_limit=0,
+        sqlalchemy_db_url=SQLITE_DB_URL,
+    )
+
+    g0 = EvaluatedGoal(
+        goal="goal0",
+        motivation="motivation",
+        completion_criteria="completion_criteria",
+        is_complete=False,
+        reasoning="reasoning",
+        output=None,
+    )
+    g1 = g0.model_copy()
+    g1.goal = "goal1"
+
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is True
+
+    goal_manager.retry_limit = 1
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is True
+    )
+
+    goal_manager.retry_limit = 2
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is False
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0])
+        is True
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g1])
+        is False
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1])
+        is True
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1])
+        is True
+    )
+
+
+def test_evaluated_goals_to_str() -> None:
+    gs = [
+        EvaluatedGoal(
+            goal="foo0",
+            motivation="bar0",
+            completion_criteria="baz0",
+            is_complete=False,
+            reasoning="qux0",
+            output=None,
+        ),
+        EvaluatedGoal(
+            goal="foo1",
+            motivation="bar1",
+            completion_criteria="baz1",
+            is_complete=True,
+            reasoning="qux1",
+            output="output",
+        ),
+    ]
+    goals_str = GoalManager.evaluated_goals_to_str(gs)
+    assert goals_str == (
+        "## Goal 1:\n"
+        "Goal: foo0\n"
+        "Motivation: bar0\n"
+        "Completion Criteria: baz0\n"
+        "Is Complete: False\n"
+        "Reasoning: qux0\n"
+        "Output: None\n"
+        "\n"
+        "## Goal 2:\n"
+        "Goal: foo1\n"
+        "Motivation: bar1\n"
+        "Completion Criteria: baz1\n"
+        "Is Complete: True\n"
+        "Reasoning: qux1\n"
+        "Output: output\n"
+    )
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_generate_goal() -> None:
+    goal_manager = GoalManager(
+        agent_id="test_agent",
+        high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.",
+        agent_capabilities=(
+            "- Web search\n"
+            "- Web scraping\n"
+            "- Accurate predictions of the probability of yes/no outcomes for a given event."
+        ),
+        model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
+    )
+    goal0 = goal_manager.generate_goal(latest_evaluated_goals=[])
+
+    evaluated_goal = EvaluatedGoal(
+        goal="Investigate the top 5 contenders for the Tour de France, make predictions on their chances of overall victory, and compare these against the market odds.",
+        motivation="The Tour de France is a popular race, so markets are likely to have the highest liquidity",
+        completion_criteria="5 contenders identified, predictions made, and compared against market odds",
+        is_complete=False,
+        reasoning="The Tour de France is cancelled this year.",
+        output=None,
+    )
+    goal2 = goal_manager.generate_goal(latest_evaluated_goals=[evaluated_goal])
+
+    # Generates a goal related to the Tour de France
+    assert "Tour de France" in goal0.goal
+
+    # Does not generate a goal related to the Tour de France, based on the
+    # reasoning of the previous evaluated goal
+    assert "Tour de France" not in goal2.goal
+
+
+def test_get_chat_history_after_goal_prompt() -> None:
+    goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz")
+    assistant_message = ChatMessage(role="assistant", content="The answer is 42.")
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            assistant_message,
+        ]
+    )
+    assert GoalManager.get_chat_history_after_goal_prompt(
+        goal=goal, chat_history=chat_history
+    ) == ChatHistory(chat_messages=[assistant_message])
+
+
+def test_get_chat_history_after_goal_prompt_error() -> None:
+    goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz")
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+        ]
+    )
+    try:
+        GoalManager.get_chat_history_after_goal_prompt(
+            goal=goal, chat_history=chat_history
+        )
+    except ValueError as e:
+        assert str(e) == "Goal prompt not found in chat history"
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress_0() -> None:
+    """
+    Test for the case where the evaluated goal:
+    - is completed
+    - should have a 'None' output.
+    """
+    goal_manager = GoalManager(
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, place a small bet on them.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="If the winner is competing, place a small bet, otherwise do nothing.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Searching the web... Yes the winner, Tadej Pogacar, is competing.",
+            ),
+            ChatMessage(role="user", content="The reasoning has been recorded."),
+            ChatMessage(
+                role="assistant",
+                content="The market id is '0x123' for the TdF winner. Placing bet of 0.01 USD on Tadej Pogacar",
+            ),
+            ChatMessage(role="user", content="Bet successfully placed."),
+        ]
+    )
+    goal_evaluation = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
+    )
+    assert goal_evaluation.is_complete is True
+    assert goal_evaluation.output is None
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress_1() -> None:
+    """
+    Test for the case where the evaluated goal:
+    - is completed
+    - should have a non-'None' output.
+    """
+    goal_manager = GoalManager(
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, get their probability of winning.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="Return the name and odds of last year's winner for this year's TdF.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Searching the web... Yes the winner, Tadej Pogacar, is competing. His winning probability: p_yes=0.27",
+            ),
+            ChatMessage(role="user", content="The reasoning has been recorded."),
+        ]
+    )
+    goal_evaluation = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
+    )
+    assert goal_evaluation.is_complete is True
+    assert goal_evaluation.output is not None
+    assert "Tadej Pogacar" in goal_evaluation.output
+    assert "0.27" in goal_evaluation.output
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress_2() -> None:
+    """
+    Test for the case where the evaluated goal is not completed
+    """
+    goal_manager = GoalManager(
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, get their probability of winning.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="Return the name and odds of last year's winner for this year's TdF.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Uhoh, I've hit some exception and need to quit",
+            ),
+        ]
+    )
+    goal_evaluation = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
+    )
+    assert goal_evaluation.is_complete is False
+    assert goal_evaluation.output is None
diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py
new file mode 100644
index 00000000..b1731816
--- /dev/null
+++ b/tests/db/test_evaluated_goal_table_handler.py
@@ -0,0 +1,114 @@
+from typing import Generator
+
+import pytest
+
+from prediction_market_agent.agents.goal_manager import EvaluatedGoal
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
+
+SQLITE_DB_URL = "sqlite://"
+TEST_AGENT_ID = "test_agent_id"
+
+
+@pytest.fixture(scope="function")
+def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]:
+    """Creates a in-memory SQLite DB for testing"""
+    table_handler = EvaluatedGoalTableHandler(
+        sqlalchemy_db_url=SQLITE_DB_URL,
+        agent_id=TEST_AGENT_ID,
+    )
+    yield table_handler
+
+
+def test_save_load_evaluated_goal_0(table_handler: EvaluatedGoalTableHandler) -> None:
+    evaluated_goal = EvaluatedGoal(
+        goal="abc",
+        motivation="def",
+        completion_criteria="ghi",
+        is_complete=True,
+        reasoning="jkl",
+        output="mno",
+    )
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal.to_model(agent_id=TEST_AGENT_ID)
+    )
+
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
+    assert loaded_evaluated_goal == evaluated_goal
+
+
+def test_save_load_evaluated_goal_1(table_handler: EvaluatedGoalTableHandler) -> None:
+    evaluated_goal0 = EvaluatedGoal(
+        goal="foo",
+        motivation="foo",
+        completion_criteria="foo",
+        is_complete=True,
+        reasoning="foo",
+        output="foo",
+    )
+    evaluated_goal1 = EvaluatedGoal(
+        goal="bar",
+        motivation="bar",
+        completion_criteria="bar",
+        is_complete=False,
+        reasoning="bar",
+        output="bar",
+    )
+
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID)
+    )
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID)
+    )
+
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
+    assert loaded_evaluated_goal == evaluated_goal1
+
+    for limit in [2, 3]:
+        loaded_models = table_handler.get_latest_evaluated_goals(limit=limit)
+        assert len(loaded_models) == 2
+        # Check LIFO order
+        assert loaded_models[0].datetime_ > loaded_models[1].datetime_
+        assert [EvaluatedGoal.from_model(model) for model in loaded_models] == [
+            evaluated_goal1,
+            evaluated_goal0,
+        ]
+
+
+def test_save_load_evaluated_goal_multiple_agents(
+    table_handler: EvaluatedGoalTableHandler,
+) -> None:
+    evaluated_goal0 = EvaluatedGoal(
+        goal="foo",
+        motivation="foo",
+        completion_criteria="foo",
+        is_complete=True,
+        reasoning="foo",
+        output="foo",
+    )
+    evaluated_goal1 = EvaluatedGoal(
+        goal="bar",
+        motivation="bar",
+        completion_criteria="bar",
+        is_complete=False,
+        reasoning="bar",
+        output="bar",
+    )
+
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID)
+    )
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID + "1")
+    )
+
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
+    assert loaded_evaluated_goal == evaluated_goal0
diff --git a/tests/test_chat_history.py b/tests/test_chat_history.py
index 103861cd..85c40822 100644
--- a/tests/test_chat_history.py
+++ b/tests/test_chat_history.py
@@ -5,6 +5,8 @@
 from prediction_market_agent_tooling.tools.utils import utcnow
 
 from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
     DatedChatHistory,
     DatedChatMessage,
 )
@@ -73,3 +75,15 @@ def test_save_to_and_load_from_memory(
         new_chat_history.to_undated_chat_history()
         == chat_history.to_undated_chat_history()
     )
+
+
+def test_stringified_chat_history() -> None:
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content="What is the weather like today?"),
+        ]
+    )
+    assert str(chat_history) == (
+        "system: You are a helpful assistant.\nuser: What is the weather like today?"
+    )