Skip to content

Commit

Permalink
Merge pull request #143 from OthersideAI/fix-config
Browse files Browse the repository at this point in the history
Fix config issue
  • Loading branch information
joshbickett authored Jan 19, 2024
2 parents d0369f8 + 4c61db4 commit 59d93df
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 17 deletions.
24 changes: 15 additions & 9 deletions operate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dotenv import load_dotenv
from openai import OpenAI
from prompt_toolkit.shortcuts import input_dialog
import google.generativeai as genai


class Config:
Expand All @@ -18,15 +19,19 @@ class Config:
def __init__(self):
load_dotenv()
self.verbose = False
self.openai_api_key = os.getenv("OPENAI_API_KEY", "")
self.google_api_key = os.getenv("GOOGLE_API_KEY", "")

def initialize_openai(self):
client = OpenAI()
client.api_key = self.openai_api_key
client.api_key = os.getenv("OPENAI_API_KEY")
client.base_url = os.getenv("OPENAI_API_BASE_URL", client.base_url)
return client

def initialize_google(self):
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"), transport="rest")
model = genai.GenerativeModel("gemini-pro-vision")

return model

def validation(self, model, voice_mode):
"""
Validate the input parameters for the dialog operation.
Expand All @@ -39,7 +44,13 @@ def validation(self, model, voice_mode):
)

def require_api_key(self, key_name, key_description, is_required):
if is_required and not getattr(self, key_name.lower()):
key_exists = bool(os.environ.get(key_name))
if self.verbose:
print("[Config] require_api_key")
print("[Config] key_name", key_name)
print("[Config] key_description", key_description)
print("[Config] key_exists", key_exists)
if is_required and not key_exists:
self.prompt_and_save_api_key(key_name, key_description)

def prompt_and_save_api_key(self, key_name, key_description):
Expand All @@ -55,11 +66,6 @@ def prompt_and_save_api_key(self, key_name, key_description):
load_dotenv() # Reload environment variables
# Update the instance attribute with the new key

if key_value:
self.save_api_key_to_env(key_name, key_value)
load_dotenv() # Reload environment variables
setattr(self, key_name.lower(), key_value)

@staticmethod
def save_api_key_to_env(key_name, key_value):
with open(".env", "a") as file:
Expand Down
26 changes: 19 additions & 7 deletions operate/models/apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from PIL import Image
from ultralytics import YOLO
import google.generativeai as genai

from operate.config import Config
from operate.exceptions import ModelNotRecognizedException
from operate.utils.screenshot import (
Expand All @@ -35,9 +35,13 @@

# Load configuration
VERBOSE = Config().verbose
config = Config()


async def get_next_action(model, messages, objective, session_id):
if VERBOSE:
print("[Self-Operating Computer][get_next_action]")
print("[Self-Operating Computer][get_next_action] model", model)
if model == "gpt-4":
return call_gpt_4_vision_preview(messages), None
if model == "gpt-4-with-som":
Expand All @@ -52,11 +56,10 @@ async def get_next_action(model, messages, objective, session_id):


def call_gpt_4_vision_preview(messages):
config = Config()
client = config.initialize_openai()
if VERBOSE:
print("[Self Operating Computer][get_next_action][call_gpt_4_v]")
time.sleep(1)
client = config.initialize_openai()
try:
screenshots_dir = "screenshots"
if not os.path.exists(screenshots_dir):
Expand Down Expand Up @@ -137,7 +140,10 @@ def call_gemini_pro_vision(messages, objective):
"""
Get the next action for Self-Operating Computer using Gemini Pro Vision
"""
config = Config()
if VERBOSE:
print(
"[Self Operating Computer][call_gemini_pro_vision]",
)
# sleep for a second
time.sleep(1)
try:
Expand All @@ -152,11 +158,18 @@ def call_gemini_pro_vision(messages, objective):
time.sleep(1)
prompt = get_system_prompt(objective)

model = genai.GenerativeModel("gemini-pro-vision")
model = config.initialize_google()
if VERBOSE:
print("[Self Operating Computer][call_gemini_pro_vision] model", model)

response = model.generate_content([prompt, Image.open(screenshot_filename)])

content = response.text[1:]
if VERBOSE:
print(
"[Self Operating Computer][call_gemini_pro_vision] response", response
)
print("[Self Operating Computer][call_gemini_pro_vision] content", content)

content = json.loads(content)
if VERBOSE:
Expand All @@ -176,9 +189,8 @@ def call_gemini_pro_vision(messages, objective):


async def call_gpt_4_vision_preview_labeled(messages, objective):
config = Config()
client = config.initialize_openai()
time.sleep(1)
client = config.initialize_openai()
try:
yolo_model = YOLO("./operate/models/weights/best.pt") # Load your trained model
screenshots_dir = "screenshots"
Expand Down
2 changes: 1 addition & 1 deletion operate/operate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def main(model, terminal_prompt, voice_mode=False):
Returns:
None
"""

mic = None
# Initialize `WhisperMic`, if `voice_mode` is True

Expand Down Expand Up @@ -109,7 +110,6 @@ def main(model, terminal_prompt, voice_mode=False):

while True:
if VERBOSE:
print("[Self Operating Computer]")
print("[Self Operating Computer] loop_count", loop_count)
try:
operations, session_id = asyncio.run(
Expand Down

0 comments on commit 59d93df

Please sign in to comment.