Skip to content

Commit 59d93df

Browse files
authored
Merge pull request #143 from OthersideAI/fix-config
Fix config issue
2 parents d0369f8 + 4c61db4 commit 59d93df

File tree

3 files changed

+35
-17
lines changed

3 files changed

+35
-17
lines changed

operate/config.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from dotenv import load_dotenv
44
from openai import OpenAI
55
from prompt_toolkit.shortcuts import input_dialog
6+
import google.generativeai as genai
67

78

89
class Config:
@@ -18,15 +19,19 @@ class Config:
1819
def __init__(self):
1920
load_dotenv()
2021
self.verbose = False
21-
self.openai_api_key = os.getenv("OPENAI_API_KEY", "")
22-
self.google_api_key = os.getenv("GOOGLE_API_KEY", "")
2322

2423
def initialize_openai(self):
2524
client = OpenAI()
26-
client.api_key = self.openai_api_key
25+
client.api_key = os.getenv("OPENAI_API_KEY")
2726
client.base_url = os.getenv("OPENAI_API_BASE_URL", client.base_url)
2827
return client
2928

29+
def initialize_google(self):
30+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"), transport="rest")
31+
model = genai.GenerativeModel("gemini-pro-vision")
32+
33+
return model
34+
3035
def validation(self, model, voice_mode):
3136
"""
3237
Validate the input parameters for the dialog operation.
@@ -39,7 +44,13 @@ def validation(self, model, voice_mode):
3944
)
4045

4146
def require_api_key(self, key_name, key_description, is_required):
42-
if is_required and not getattr(self, key_name.lower()):
47+
key_exists = bool(os.environ.get(key_name))
48+
if self.verbose:
49+
print("[Config] require_api_key")
50+
print("[Config] key_name", key_name)
51+
print("[Config] key_description", key_description)
52+
print("[Config] key_exists", key_exists)
53+
if is_required and not key_exists:
4354
self.prompt_and_save_api_key(key_name, key_description)
4455

4556
def prompt_and_save_api_key(self, key_name, key_description):
@@ -55,11 +66,6 @@ def prompt_and_save_api_key(self, key_name, key_description):
5566
load_dotenv() # Reload environment variables
5667
# Update the instance attribute with the new key
5768

58-
if key_value:
59-
self.save_api_key_to_env(key_name, key_value)
60-
load_dotenv() # Reload environment variables
61-
setattr(self, key_name.lower(), key_value)
62-
6369
@staticmethod
6470
def save_api_key_to_env(key_name, key_value):
6571
with open(".env", "a") as file:

operate/models/apis.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from PIL import Image
1010
from ultralytics import YOLO
11-
import google.generativeai as genai
11+
1212
from operate.config import Config
1313
from operate.exceptions import ModelNotRecognizedException
1414
from operate.utils.screenshot import (
@@ -35,9 +35,13 @@
3535

3636
# Load configuration
3737
VERBOSE = Config().verbose
38+
config = Config()
3839

3940

4041
async def get_next_action(model, messages, objective, session_id):
42+
if VERBOSE:
43+
print("[Self-Operating Computer][get_next_action]")
44+
print("[Self-Operating Computer][get_next_action] model", model)
4145
if model == "gpt-4":
4246
return call_gpt_4_vision_preview(messages), None
4347
if model == "gpt-4-with-som":
@@ -52,11 +56,10 @@ async def get_next_action(model, messages, objective, session_id):
5256

5357

5458
def call_gpt_4_vision_preview(messages):
55-
config = Config()
56-
client = config.initialize_openai()
5759
if VERBOSE:
5860
print("[Self Operating Computer][get_next_action][call_gpt_4_v]")
5961
time.sleep(1)
62+
client = config.initialize_openai()
6063
try:
6164
screenshots_dir = "screenshots"
6265
if not os.path.exists(screenshots_dir):
@@ -137,7 +140,10 @@ def call_gemini_pro_vision(messages, objective):
137140
"""
138141
Get the next action for Self-Operating Computer using Gemini Pro Vision
139142
"""
140-
config = Config()
143+
if VERBOSE:
144+
print(
145+
"[Self Operating Computer][call_gemini_pro_vision]",
146+
)
141147
# sleep for a second
142148
time.sleep(1)
143149
try:
@@ -152,11 +158,18 @@ def call_gemini_pro_vision(messages, objective):
152158
time.sleep(1)
153159
prompt = get_system_prompt(objective)
154160

155-
model = genai.GenerativeModel("gemini-pro-vision")
161+
model = config.initialize_google()
162+
if VERBOSE:
163+
print("[Self Operating Computer][call_gemini_pro_vision] model", model)
156164

157165
response = model.generate_content([prompt, Image.open(screenshot_filename)])
158166

159167
content = response.text[1:]
168+
if VERBOSE:
169+
print(
170+
"[Self Operating Computer][call_gemini_pro_vision] response", response
171+
)
172+
print("[Self Operating Computer][call_gemini_pro_vision] content", content)
160173

161174
content = json.loads(content)
162175
if VERBOSE:
@@ -176,9 +189,8 @@ def call_gemini_pro_vision(messages, objective):
176189

177190

178191
async def call_gpt_4_vision_preview_labeled(messages, objective):
179-
config = Config()
180-
client = config.initialize_openai()
181192
time.sleep(1)
193+
client = config.initialize_openai()
182194
try:
183195
yolo_model = YOLO("./operate/models/weights/best.pt") # Load your trained model
184196
screenshots_dir = "screenshots"

operate/operate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def main(model, terminal_prompt, voice_mode=False):
4545
Returns:
4646
None
4747
"""
48+
4849
mic = None
4950
# Initialize `WhisperMic`, if `voice_mode` is True
5051

@@ -109,7 +110,6 @@ def main(model, terminal_prompt, voice_mode=False):
109110

110111
while True:
111112
if VERBOSE:
112-
print("[Self Operating Computer]")
113113
print("[Self Operating Computer] loop_count", loop_count)
114114
try:
115115
operations, session_id = asyncio.run(

0 commit comments

Comments
 (0)