From 906f1f237166929f5e996f040d2e0a04de2df093 Mon Sep 17 00:00:00 2001
From: Peter Jung <peter@jung.ninja>
Date: Thu, 31 Oct 2024 14:59:01 +0100
Subject: [PATCH] Squash 4 commits into one

---
 poetry.lock                                               | 8 ++++----
 prediction_prophet/autonolas/research.py                  | 7 ++++---
 prediction_prophet/benchmark/agents.py                    | 2 +-
 .../functions/create_embeddings_from_results.py           | 4 ++--
 prediction_prophet/functions/debate_prediction.py         | 4 ++--
 prediction_prophet/functions/generate_subqueries.py       | 4 ++--
 prediction_prophet/functions/prepare_report.py            | 6 +++---
 prediction_prophet/functions/rerank_subqueries.py         | 4 ++--
 prediction_prophet/functions/web_scrape.py                | 3 ++-
 pyproject.toml                                            | 2 +-
 10 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 05781d77..898e698c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4643,13 +4643,13 @@ test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)"
 
 [[package]]
 name = "prediction-market-agent-tooling"
-version = "0.56.0.dev112"
+version = "0.56.0.dev113"
 description = "Tools to benchmark, deploy and monitor prediction market agents."
 optional = false
 python-versions = "<3.12,>=3.10"
 files = [
-    {file = "prediction_market_agent_tooling-0.56.0.dev112-py3-none-any.whl", hash = "sha256:41d717d7627e636ad23316a06aca30ec1dcc3dec793ec6cb3fc2c6a30670493b"},
-    {file = "prediction_market_agent_tooling-0.56.0.dev112.tar.gz", hash = "sha256:8c4f301ea9ba49689200a68d2225d85f7ce69cd0d1f60c0a37b1d83a667894c1"},
+    {file = "prediction_market_agent_tooling-0.56.0.dev113-py3-none-any.whl", hash = "sha256:ea1ef18122ee88b04910f9d160fa937d58233e654ff179996ea2ed5ae88a605c"},
+    {file = "prediction_market_agent_tooling-0.56.0.dev113.tar.gz", hash = "sha256:e9e98abc24797a49f2101007b955a6075efbae25080c87473bd3e919a25ff953"},
 ]
 
 [package.dependencies]
@@ -7944,4 +7944,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10"
-content-hash = "e3d9d2478752560753fcaedb8a563aafa2d31591d25d985c4222b5e6951ea15f"
+content-hash = "a0b8295253376d3824de4e6f70901808dc2f291aa32278eed85f361033da4468"
diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py
index b8a0af66..f36c57ac 100644
--- a/prediction_prophet/autonolas/research.py
+++ b/prediction_prophet/autonolas/research.py
@@ -2,6 +2,7 @@
 import os
 import math
 import tenacity
+from datetime import timedelta
 from sklearn.metrics.pairwise import cosine_similarity
 from typing import Any, Dict, Generator, List, Optional, Tuple, TypedDict
 from datetime import datetime, timezone
@@ -32,7 +33,7 @@
 from dateutil import parser
 from prediction_prophet.functions.utils import check_not_none
 from prediction_market_agent_tooling.gtypes import Probability
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from prediction_market_agent_tooling.tools.caches.db_cache import db_cache
 from prediction_prophet.functions.parallelism import par_map
 from pydantic.types import SecretStr
@@ -358,7 +359,7 @@ def fields_dict_to_bullet_list(fields_dict: Dict[str, str]) -> str:
     return bullet_list
 
 @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_fixed(1), reraise=True)
-@db_cache
+@db_cache(max_age=timedelta(days=1))
 def search_google(query: str, num: int = 3) -> List[str]:
     """Search Google using a custom search engine."""
     service = build("customsearch", "v1", developerKey=os.getenv("GOOGLE_SEARCH_API_KEY"))
@@ -1220,7 +1221,7 @@ def make_prediction(
     api_key: SecretStr | None = None,
 ) -> Prediction:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
     
     current_time_utc = datetime.now(timezone.utc)
     formatted_time_utc = current_time_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-6] + "Z"
diff --git a/prediction_prophet/benchmark/agents.py b/prediction_prophet/benchmark/agents.py
index 8a2bd100..0318f42b 100644
--- a/prediction_prophet/benchmark/agents.py
+++ b/prediction_prophet/benchmark/agents.py
@@ -11,7 +11,6 @@
 from prediction_prophet.autonolas.research import EmbeddingModel
 from prediction_prophet.autonolas.research import make_prediction, get_urls_from_queries
 from prediction_prophet.autonolas.research import research as research_autonolas
-from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary
 from prediction_prophet.functions.rephrase_question import rephrase_question
 from prediction_prophet.functions.research import Research, research as prophet_research
 from prediction_prophet.functions.search import search
@@ -26,6 +25,7 @@
 from pydantic.types import SecretStr
 from prediction_prophet.autonolas.research import Prediction as LLMCompletionPredictionDict
 from prediction_market_agent_tooling.tools.langfuse_ import observe
+from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary
 
 if t.TYPE_CHECKING:
     from loguru import Logger
diff --git a/prediction_prophet/functions/create_embeddings_from_results.py b/prediction_prophet/functions/create_embeddings_from_results.py
index 43962005..fcdd6b07 100644
--- a/prediction_prophet/functions/create_embeddings_from_results.py
+++ b/prediction_prophet/functions/create_embeddings_from_results.py
@@ -13,13 +13,13 @@
 from prediction_prophet.models.WebScrapeResult import WebScrapeResult
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from pydantic.types import SecretStr
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from prediction_market_agent_tooling.gtypes import secretstr_to_v1_secretstr
 
 
 def create_embeddings_from_results(results: list[WebScrapeResult], text_splitter: RecursiveCharacterTextSplitter, api_key: SecretStr | None = None) -> Chroma:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
     
     collection = Chroma(embedding_function=OpenAIEmbeddings(api_key=secretstr_to_v1_secretstr(api_key)))
     texts = []
diff --git a/prediction_prophet/functions/debate_prediction.py b/prediction_prophet/functions/debate_prediction.py
index 43345314..b2eb5599 100644
--- a/prediction_prophet/functions/debate_prediction.py
+++ b/prediction_prophet/functions/debate_prediction.py
@@ -9,7 +9,7 @@
 from langchain.schema.output_parser import StrOutputParser
 from langchain_openai import ChatOpenAI
 from langchain.prompts import ChatPromptTemplate
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from prediction_market_agent_tooling.gtypes import secretstr_to_v1_secretstr
 
 
@@ -85,7 +85,7 @@
     
 def make_debated_prediction(prompt: str, additional_information: str, api_key: SecretStr | None = None) -> Prediction:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
         
     formatted_time_utc = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec='seconds') + "Z"
     
diff --git a/prediction_prophet/functions/generate_subqueries.py b/prediction_prophet/functions/generate_subqueries.py
index d93c345a..956eefa5 100644
--- a/prediction_prophet/functions/generate_subqueries.py
+++ b/prediction_prophet/functions/generate_subqueries.py
@@ -3,7 +3,7 @@
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import ChatPromptTemplate
 from pydantic.types import SecretStr
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from prediction_market_agent_tooling.gtypes import secretstr_to_v1_secretstr
 from prediction_market_agent_tooling.tools.langfuse_ import get_langfuse_langchain_config, observe
 
@@ -22,7 +22,7 @@ def generate_subqueries(query: str, limit: int, model: str, temperature: float,
         return [query]
 
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
             
     subquery_generation_prompt = ChatPromptTemplate.from_template(template=subquery_generation_template)
 
diff --git a/prediction_prophet/functions/prepare_report.py b/prediction_prophet/functions/prepare_report.py
index 06037b01..fceec3b9 100644
--- a/prediction_prophet/functions/prepare_report.py
+++ b/prediction_prophet/functions/prepare_report.py
@@ -5,7 +5,7 @@
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.output_parser import StrOutputParser
 from prediction_prophet.functions.utils import trim_to_n_tokens
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from pydantic.types import SecretStr
 from prediction_market_agent_tooling.gtypes import secretstr_to_v1_secretstr
 from prediction_market_agent_tooling.tools.langfuse_ import get_langfuse_langchain_config, observe
@@ -13,7 +13,7 @@
 @observe()
 def prepare_summary(goal: str, content: str, model: str, api_key: SecretStr | None = None, trim_content_to_tokens: t.Optional[int] = None) -> str:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
     
     prompt_template = """Write comprehensive summary of the following web content, that provides relevant information to answer the question: '{goal}'.
 But cut the fluff and keep it up to the point.
@@ -43,7 +43,7 @@ def prepare_summary(goal: str, content: str, model: str, api_key: SecretStr | No
 @observe()
 def prepare_report(goal: str, scraped: list[str], model: str, temperature: float, api_key: SecretStr | None = None) -> str:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
         
     evaluation_prompt_template = """
     You are a professional researcher. Your goal is to provide a relevant information report
diff --git a/prediction_prophet/functions/rerank_subqueries.py b/prediction_prophet/functions/rerank_subqueries.py
index b240d3ca..57487c67 100644
--- a/prediction_prophet/functions/rerank_subqueries.py
+++ b/prediction_prophet/functions/rerank_subqueries.py
@@ -3,7 +3,7 @@
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.output_parser import StrOutputParser
 from pydantic.types import SecretStr
-from prediction_market_agent_tooling.tools.utils import secret_str_from_env
+from prediction_market_agent_tooling.config import APIKeys
 from prediction_market_agent_tooling.gtypes import secretstr_to_v1_secretstr
 from prediction_market_agent_tooling.tools.langfuse_ import get_langfuse_langchain_config, observe
 
@@ -20,7 +20,7 @@
 @observe()
 def rerank_subqueries(queries: list[str], goal: str, model: str, temperature: float, api_key: SecretStr | None = None) -> list[str]:
     if api_key == None:
-        api_key = secret_str_from_env("OPENAI_API_KEY")
+        api_key = APIKeys().openai_api_key
             
     rerank_results_prompt = ChatPromptTemplate.from_template(template=rerank_queries_template)
 
diff --git a/prediction_prophet/functions/web_scrape.py b/prediction_prophet/functions/web_scrape.py
index b15d5668..97df365f 100644
--- a/prediction_prophet/functions/web_scrape.py
+++ b/prediction_prophet/functions/web_scrape.py
@@ -4,11 +4,11 @@
 from bs4 import BeautifulSoup
 from requests import Response
 import tenacity
+from datetime import timedelta
 from prediction_market_agent_tooling.tools.caches.db_cache import db_cache
 
 
 @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_fixed(1), reraise=True)
-@db_cache
 def fetch_html(url: str, timeout: int) -> Response:
     headers = {
         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:107.0) Gecko/20100101 Firefox/107.0"
@@ -16,6 +16,7 @@ def fetch_html(url: str, timeout: int) -> Response:
     response = requests.get(url, headers=headers, timeout=timeout)
     return response
 
+@db_cache(max_age=timedelta(days=1))
 def web_scrape_strict(url: str, timeout: int = 10) -> str:
     response = fetch_html(url=url, timeout=timeout)
 
diff --git a/pyproject.toml b/pyproject.toml
index 3663976c..5e861a81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ scikit-learn = "^1.4.0"
 typer = ">=0.9.0,<1.0.0"
 types-requests = "^2.31.0.20240125"
 types-python-dateutil = "^2.9.0"
-prediction-market-agent-tooling = { version = "^0.56.0.dev112", extras = ["langchain", "google"] }
+prediction-market-agent-tooling = { version = "^0.56.0.dev113", extras = ["langchain", "google"] }
 langchain-community = "^0.2.6"
 memory-profiler = "^0.61.0"
 matplotlib = "^3.8.3"