versionhq 1.2.4.13__py3-none-any.whl → 1.2.4.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versionhq/__init__.py +1 -1
- versionhq/_utils/__init__.py +1 -0
- versionhq/_utils/handle_directory.py +15 -0
- versionhq/agent/model.py +5 -3
- versionhq/tool/gpt/_enum.py +1 -1
- versionhq/tool/gpt/cua.py +313 -152
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.14.dist-info}/METADATA +2 -1
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.14.dist-info}/RECORD +11 -10
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.14.dist-info}/WHEEL +1 -1
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.14.dist-info}/licenses/LICENSE +0 -0
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.14.dist-info}/top_level.txt +0 -0
versionhq/__init__.py
CHANGED
versionhq/_utils/__init__.py
CHANGED
@@ -5,3 +5,4 @@ from versionhq._utils.is_valid_url import is_valid_url
|
|
5
5
|
from versionhq._utils.usage_metrics import UsageMetrics, ErrorType
|
6
6
|
from versionhq._utils.convert_img_url import convert_img_url
|
7
7
|
from versionhq._utils.is_valid_enum import is_valid_enum
|
8
|
+
from versionhq._utils.handle_directory import handle_directory
|
@@ -0,0 +1,15 @@
|
|
1
|
+
import os
|
2
|
+
import datetime
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
|
6
|
+
def handle_directory(directory_name: str = None, filename: str = None, ext: str = 'png') -> Path:
|
7
|
+
"""Creates and returns the absolute file path"""
|
8
|
+
|
9
|
+
os.makedirs(directory_name, exist_ok=True)
|
10
|
+
|
11
|
+
date = str(datetime.datetime.now().strftime('%j'))
|
12
|
+
cwd = Path.cwd()
|
13
|
+
DIRECTORY = cwd / f'{directory_name}/{filename}_{date}.{ext}'
|
14
|
+
|
15
|
+
return DIRECTORY
|
versionhq/agent/model.py
CHANGED
@@ -454,14 +454,14 @@ class Agent(BaseModel):
|
|
454
454
|
return rag_tools, gpt_tools, tools
|
455
455
|
|
456
456
|
|
457
|
-
def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput
|
457
|
+
def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput or None
|
458
458
|
"""Generates k, v pairs from multiple GPT tool results and stores them in TaskOutput class."""
|
459
459
|
|
460
460
|
from versionhq.task.model import TaskOutput
|
461
461
|
from versionhq._utils import UsageMetrics
|
462
462
|
|
463
463
|
if not gpt_tools:
|
464
|
-
return
|
464
|
+
return None
|
465
465
|
|
466
466
|
tool_res = dict()
|
467
467
|
annotation_set = dict()
|
@@ -470,7 +470,9 @@ class Agent(BaseModel):
|
|
470
470
|
for i, item in enumerate(gpt_tools):
|
471
471
|
raw, annotations, usage = item.run()
|
472
472
|
tool_res.update({ str(i): raw })
|
473
|
-
|
473
|
+
|
474
|
+
if annotations:
|
475
|
+
annotation_set.update({ str(i): annotations })
|
474
476
|
total_usage.aggregate(metrics=usage)
|
475
477
|
|
476
478
|
res = TaskOutput(raw=str(tool_res), tool_output=tool_res, usage=total_usage, annotations=annotation_set)
|
versionhq/tool/gpt/_enum.py
CHANGED
versionhq/tool/gpt/cua.py
CHANGED
@@ -1,28 +1,29 @@
|
|
1
|
+
import base64
|
1
2
|
import datetime
|
2
3
|
import time
|
4
|
+
import platform
|
3
5
|
from typing import List, Dict, Any, Tuple
|
4
6
|
|
5
7
|
from versionhq._utils import convert_img_url
|
6
8
|
from versionhq.tool.gpt import openai_client
|
7
|
-
from versionhq.tool.gpt._enum import
|
8
|
-
from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType, Logger, is_valid_url
|
9
|
+
from versionhq.tool.gpt._enum import GPTCUABrowserEnum, GPTCUATypeEnum, GPTSizeEnum
|
10
|
+
from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType, Logger, is_valid_url, handle_directory
|
9
11
|
|
10
|
-
|
11
|
-
allowed_browsers = ['webkit', 'chromium', 'firefox']
|
12
|
+
allowed_browsers = ['chromium', 'firefox']
|
12
13
|
|
13
14
|
|
14
15
|
class CUAToolSchema:
|
15
16
|
type: str = GPTCUATypeEnum.COMPUTER_USE_PREVIEW.value
|
16
17
|
display_width: int = 1024
|
17
18
|
display_height: int = 768
|
18
|
-
environment: str =
|
19
|
+
environment: str = GPTCUABrowserEnum.BROWSER.value
|
19
20
|
|
20
21
|
def __init__(
|
21
22
|
self,
|
22
23
|
type: str | GPTCUATypeEnum = None,
|
23
24
|
display_width: int = None,
|
24
25
|
display_height: int = None,
|
25
|
-
environment: str |
|
26
|
+
environment: str | GPTCUABrowserEnum = None
|
26
27
|
):
|
27
28
|
self.display_height = display_height if display_height else self.display_height
|
28
29
|
self.display_width = display_width if display_width else self.display_width
|
@@ -30,11 +31,8 @@ class CUAToolSchema:
|
|
30
31
|
if type and is_valid_enum(enum=GPTCUATypeEnum, val=type):
|
31
32
|
self.type = type.value if isinstance(type, GPTCUATypeEnum) else type
|
32
33
|
|
33
|
-
if environment and is_valid_enum(enum=
|
34
|
-
self.environment = environment.value if isinstance(environment,
|
35
|
-
|
36
|
-
self.environment = environment if environment else self.environment
|
37
|
-
|
34
|
+
if environment and is_valid_enum(enum=GPTCUABrowserEnum, val=environment):
|
35
|
+
self.environment = environment.value if isinstance(environment, GPTCUABrowserEnum) else environment
|
38
36
|
|
39
37
|
@property
|
40
38
|
def schema(self) -> Dict[str, Any]:
|
@@ -56,8 +54,10 @@ class GPTToolCUA:
|
|
56
54
|
reasoning_effort: str = GPTSizeEnum.MEDIUM.value
|
57
55
|
truncation: str = "auto"
|
58
56
|
|
57
|
+
_schema: Dict[str, Any] = dict()
|
59
58
|
_response_ids: List[str] = list()
|
60
59
|
_call_ids: List[str] = list()
|
60
|
+
_calls: Dict[str, Dict[str, Any]] = dict() # stores response_id and raw output object.
|
61
61
|
_usage: UsageMetrics = UsageMetrics()
|
62
62
|
_logger: Logger = Logger(info_file_save=True, filename="cua-task-{}".format(str(datetime.datetime.now().timestamp())) + ".png")
|
63
63
|
|
@@ -74,7 +74,7 @@ class GPTToolCUA:
|
|
74
74
|
_usage: UsageMetrics = UsageMetrics()
|
75
75
|
):
|
76
76
|
self.user_prompt = user_prompt
|
77
|
-
self.web_url = web_url if is_valid_url(web_url) else
|
77
|
+
self.web_url = web_url if is_valid_url(web_url) else None
|
78
78
|
self.browser = browser if browser in allowed_browsers else 'chromium'
|
79
79
|
self.truncation = truncation if truncation else self.truncation
|
80
80
|
self._usage = _usage
|
@@ -104,104 +104,93 @@ class GPTToolCUA:
|
|
104
104
|
pass
|
105
105
|
|
106
106
|
|
107
|
-
def
|
108
|
-
|
109
|
-
if not page:
|
110
|
-
return None, None
|
111
|
-
|
112
|
-
path = path if path else "screenshot.png"
|
113
|
-
screenshot_bytes = page.screenshot()
|
114
|
-
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
115
|
-
self._logger.log(message=f"Action: screenshot", level="info", color="blue")
|
116
|
-
return screenshot_bytes, screenshot_base64
|
117
|
-
|
118
|
-
|
119
|
-
def _handle_model_action(self, page: Any, action: Any, action_type: str = None) -> bool:
|
120
|
-
"""Creates a page object and performs actions."""
|
107
|
+
def _structure_schema(self, screenshot: str = None) -> None:
|
108
|
+
"""Formats args schema for CUA calling."""
|
121
109
|
|
122
|
-
|
123
|
-
|
110
|
+
tool_schema = [item.schema for item in self.tools]
|
111
|
+
schema = dict()
|
112
|
+
inputs = list()
|
113
|
+
previous_response_id = self._response_ids[-1] if self._response_ids else None
|
114
|
+
# (self._response_ids[-1].startswith("rs") or self._response_ids[-1].startswith("resp")) else None
|
124
115
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
self._logger.log(message=f"Action: type text: {text}", level="info", color="blue")
|
156
|
-
page.keyboard.type(text)
|
157
|
-
|
158
|
-
case "wait":
|
159
|
-
self._logger.log(message=f"Action: wait", level="info", color="blue")
|
160
|
-
time.sleep(2)
|
161
|
-
|
162
|
-
case "screenshot":
|
163
|
-
pass
|
116
|
+
if self._call_ids:
|
117
|
+
inputs = [
|
118
|
+
{
|
119
|
+
"call_id": self._call_ids[-1],
|
120
|
+
"type": "computer_call_output",
|
121
|
+
}
|
122
|
+
]
|
123
|
+
if screenshot:
|
124
|
+
inputs[0].update({ "output": { "type": "computer_screenshot", "image_url": f"data:image/png;base64,{str(screenshot)}"}})
|
125
|
+
|
126
|
+
# if self._calls:
|
127
|
+
# call = self._calls[self._call_ids[-1]]
|
128
|
+
# if call and call.call_id not in inputs[0]:
|
129
|
+
# inputs.append(call)
|
130
|
+
|
131
|
+
if previous_response_id:
|
132
|
+
schema = dict(
|
133
|
+
model=self.model,
|
134
|
+
previous_response_id=previous_response_id,
|
135
|
+
tools=tool_schema,
|
136
|
+
input=inputs,
|
137
|
+
truncation=self.truncation
|
138
|
+
)
|
139
|
+
else:
|
140
|
+
schema = dict(
|
141
|
+
model=self.model,
|
142
|
+
tools=tool_schema,
|
143
|
+
input=inputs,
|
144
|
+
truncation=self.truncation
|
145
|
+
)
|
164
146
|
|
165
|
-
|
166
|
-
|
147
|
+
else:
|
148
|
+
input = [{ "role": "user", "content": self.user_prompt } ]
|
149
|
+
img_url = convert_img_url(self.img_url) if self.img_url else None
|
150
|
+
if img_url:
|
151
|
+
input.append({"type": "input_image", "image_url": f"data:image/png;base64,{img_url}"})
|
167
152
|
|
168
|
-
|
169
|
-
|
170
|
-
|
153
|
+
schema = dict(
|
154
|
+
model=self.model,
|
155
|
+
tools=tool_schema,
|
156
|
+
input=input,
|
157
|
+
reasoning={ "effort": self.reasoning_effort},
|
158
|
+
truncation=self.truncation
|
159
|
+
)
|
171
160
|
|
172
|
-
|
173
|
-
self.
|
174
|
-
return bool(self._usage.total_errors)
|
161
|
+
self._schema = schema
|
162
|
+
# return self._schema
|
175
163
|
|
176
164
|
|
177
|
-
def
|
165
|
+
def _run(self, screenshot: str = None) -> Tuple[Dict[str, Any], None, UsageMetrics]:
|
178
166
|
raw_res = dict()
|
179
167
|
usage = self._usage if self._usage else UsageMetrics()
|
180
168
|
start_dt = datetime.datetime.now()
|
181
169
|
|
182
170
|
try:
|
183
|
-
|
184
|
-
|
185
|
-
output_image_url = schema["input"][0]["output"]["image_url"].replace("SCREENSHOT", str(screenshot))
|
186
|
-
schema["input"][0]["output"]["image_url"] = output_image_url
|
187
|
-
|
188
|
-
res = openai_client.responses.create(**schema)
|
171
|
+
self._structure_schema(screenshot=screenshot)
|
172
|
+
res = openai_client.responses.create(**self._schema)
|
189
173
|
if not res:
|
190
174
|
usage.record_errors(ErrorType.TOOL)
|
191
175
|
else:
|
176
|
+
self._response_ids.append(res.id)
|
192
177
|
for item in res.output:
|
178
|
+
|
193
179
|
match item.type:
|
194
180
|
case "reasoning":
|
195
|
-
|
196
|
-
|
197
|
-
|
181
|
+
reasoning = item.summary[0].text if item.summary and isinstance(item.summary, list) else str(item.summary) if item.summary else ""
|
182
|
+
raw_res.update(dict(reasoning=reasoning))
|
183
|
+
# self._response_ids.append(item.id)
|
184
|
+
|
198
185
|
case "computer_call":
|
199
186
|
raw_res.update(dict(action=item.action))
|
200
187
|
# self._response_ids.append(item.id)
|
201
|
-
|
188
|
+
call_id = item.call_id
|
189
|
+
self._call_ids.append(call_id)
|
190
|
+
self._calls.update({ call_id: item })
|
202
191
|
case _:
|
203
192
|
pass
|
204
|
-
|
193
|
+
usage.record_token_usage(**res.usage.__dict__)
|
205
194
|
|
206
195
|
except Exception as e:
|
207
196
|
self._logger.log(message=f"Failed to run: {str(e)}", color="red", level="error")
|
@@ -212,84 +201,256 @@ class GPTToolCUA:
|
|
212
201
|
return raw_res, None, usage
|
213
202
|
|
214
203
|
|
215
|
-
def invoke_playwright(self) ->
|
204
|
+
def invoke_playwright(self) -> Dict[str, Any]:
|
216
205
|
"""Handles computer use loop. Ref. OpenAI official website."""
|
206
|
+
try:
|
207
|
+
from playwright.sync_api import sync_playwright
|
208
|
+
except Exception as e:
|
209
|
+
self._logger.log(level="error", message=f"Install Playwright by adding `versionhq[tools]` to requirements.txt or run `uv add playwright`. {str(e)}", color="red")
|
210
|
+
raise e
|
217
211
|
|
218
|
-
|
219
|
-
|
220
|
-
self._logger.log(message="Start
|
212
|
+
import os
|
213
|
+
os.environ["DEBUG"] = "pw:browser"
|
214
|
+
self._logger.log(message="Start computer use.", level="info", color="blue")
|
215
|
+
start_dt = datetime.datetime.now()
|
216
|
+
res = None
|
217
|
+
|
218
|
+
# try:
|
219
|
+
p = sync_playwright().start()
|
220
|
+
b = p.firefox if self.browser == "firefox" else p.chromium
|
221
|
+
browser = b.launch(headless=True)
|
222
|
+
page = browser.new_page()
|
223
|
+
if not browser or not page:
|
224
|
+
return None, None, None
|
225
|
+
|
226
|
+
if self.web_url:
|
227
|
+
page.goto(self.web_url, timeout=3000000, wait_until="load", referer=None)
|
228
|
+
time.sleep(3)
|
229
|
+
|
230
|
+
res, _, usage = self._run()
|
231
|
+
self._usage.aggregate(metrics=usage)
|
232
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
233
|
+
action = actions[0] if actions else None
|
234
|
+
|
235
|
+
if action:
|
236
|
+
while True:
|
237
|
+
x = action.x if hasattr(action, 'x') else 0
|
238
|
+
y = action.y if hasattr(action, 'y') else 0
|
239
|
+
scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
|
240
|
+
scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
|
241
|
+
text = action.text if hasattr(action, 'text') else ''
|
242
|
+
screenshot_base64 = None
|
243
|
+
path = handle_directory(directory_name='_screenshots', filename=f'cua_playwright', ext='png')
|
244
|
+
|
245
|
+
match action.type:
|
246
|
+
case "click":
|
247
|
+
self._logger.log(message="Action: click", color="blue", level="info")
|
248
|
+
button = action.button if hasattr(action, 'button') and (action.button == 'left' or action.button == 'right') else 'left'
|
249
|
+
page.mouse.move(x, y)
|
250
|
+
page.mouse.click(x, y, button=button)
|
251
|
+
time.sleep(1)
|
252
|
+
|
253
|
+
case "scroll":
|
254
|
+
self._logger.log(message="Action: scroll", color="blue", level="info")
|
255
|
+
page.mouse.move(x, y)
|
256
|
+
page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
257
|
+
time.sleep(1)
|
258
|
+
|
259
|
+
case "move":
|
260
|
+
self._logger.log(message="Action: move", color="blue", level="info")
|
261
|
+
page.mouse.move(x, y)
|
262
|
+
page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
263
|
+
time.sleep(1)
|
264
|
+
|
265
|
+
case "keypress":
|
266
|
+
self._logger.log(message="Action: keypress", color="blue", level="info")
|
267
|
+
keys = action.keys
|
268
|
+
for k in keys:
|
269
|
+
match k.lower():
|
270
|
+
case "enter": page.keyboard.press("Enter")
|
271
|
+
case "space": page.keyboard.press(" ")
|
272
|
+
case _: page.keyboard.press(k)
|
273
|
+
time.sleep(1)
|
274
|
+
|
275
|
+
case "type":
|
276
|
+
self._logger.log(message="Action: type", color="blue", level="info")
|
277
|
+
page.keyboard.type(text)
|
278
|
+
time.sleep(1)
|
279
|
+
|
280
|
+
case "wait":
|
281
|
+
self._logger.log(message="Action: wait", color="blue", level="info")
|
282
|
+
time.sleep(3)
|
283
|
+
|
284
|
+
case "screenshot":
|
285
|
+
self._logger.log(message="Action: screenshot", color="blue", level="info")
|
286
|
+
screenshot_bytes = page.screenshot(path=path)
|
287
|
+
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
288
|
+
time.sleep(1)
|
289
|
+
|
290
|
+
case _:
|
291
|
+
self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
|
292
|
+
return False
|
293
|
+
|
294
|
+
if not screenshot_base64:
|
295
|
+
screenshot_bytes = page.screenshot(path=path)
|
296
|
+
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
297
|
+
time.sleep(1)
|
298
|
+
|
299
|
+
res, _, usage = self._run(screenshot=screenshot_base64)
|
300
|
+
self._usage.aggregate(metrics=usage)
|
301
|
+
if not res:
|
302
|
+
usage.record_errors(type=ErrorType.API)
|
303
|
+
break
|
221
304
|
|
222
|
-
try:
|
223
|
-
with sync_playwright() as p:
|
224
|
-
b = p.firefox if self.browser == "firefox" else p.webkit if self.browser == "webkit" else p.chromium
|
225
|
-
browser = b.launch(headless=True)
|
226
|
-
page = browser.new_page()
|
227
|
-
if not browser or not page:
|
228
|
-
return None, None, None
|
229
|
-
|
230
|
-
page.goto(self.web_url)
|
231
|
-
res, _, usage = self.run()
|
232
|
-
self._usage = usage
|
233
305
|
actions = [v for k, v in res.items() if k =="action"] if res else []
|
234
306
|
action = actions[0] if actions else None
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
self._handle_model_action(page=page, action=action)
|
240
|
-
_, screenshot_base64 = self._take_screenshot(page=page)
|
241
|
-
res, _, usage = self.run(screenshot=screenshot_base64)
|
242
|
-
self._usage.agggregate(metrics=usage)
|
243
|
-
if not res:
|
244
|
-
usage.record_errors(type=ErrorType.API)
|
245
|
-
break
|
246
|
-
|
247
|
-
actions = [v for k, v in res.items() if k =="action"] if res else []
|
248
|
-
action = actions[0] if actions else None
|
249
|
-
if not action:
|
250
|
-
break
|
251
|
-
else:
|
252
|
-
self._usage.record_errors(type=ErrorType.TOOL)
|
307
|
+
if not action:
|
308
|
+
break
|
309
|
+
else:
|
310
|
+
self._usage.record_errors(type=ErrorType.TOOL)
|
253
311
|
|
254
|
-
except Exception as e:
|
255
|
-
|
312
|
+
# except Exception as e:
|
313
|
+
# self._logger.log(message=f"Failed to execute. {str(e)}", color="red", level="error")
|
314
|
+
# browser.close()
|
256
315
|
|
257
316
|
end_dt = datetime.datetime.now()
|
258
317
|
self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
|
259
|
-
|
260
|
-
return res, _, self._usage
|
318
|
+
return res
|
261
319
|
|
262
320
|
|
263
|
-
|
264
|
-
|
265
|
-
|
321
|
+
def invoke_selenium(self, **kwargs) -> Dict[str, Any]:
|
322
|
+
try:
|
323
|
+
from selenium import webdriver
|
324
|
+
from selenium.webdriver.common.keys import Keys
|
325
|
+
from selenium.webdriver.common.action_chains import ActionChains
|
326
|
+
from selenium.webdriver.common.actions.action_builder import ActionBuilder
|
327
|
+
except Exception as e:
|
328
|
+
self._logger.log(level="error", message=f"Install Selenium by `uv pip install versionhq[tools]` or `uv add selenium`. {str(e)}", color="red")
|
329
|
+
raise e
|
266
330
|
|
267
|
-
|
268
|
-
schema = dict()
|
269
|
-
inputs = list()
|
270
|
-
previous_response_id = self._response_ids[-1] if self._response_ids and self._response_ids[-1].startswith("rs") else None
|
331
|
+
self._logger.log(message="Start computer use", level="info", color="blue")
|
271
332
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
333
|
+
start_dt = datetime.datetime.now()
|
334
|
+
|
335
|
+
driver = webdriver.Chrome(options=kwargs) if kwargs else webdriver.Chrome()
|
336
|
+
if self.tools:
|
337
|
+
driver.set_window_size(height=self.tools[0].display_height, width=self.tools[0].display_width)
|
338
|
+
|
339
|
+
if self.web_url:
|
340
|
+
driver.get(self.web_url)
|
341
|
+
time.sleep(3)
|
342
|
+
|
343
|
+
res, _, usage = self._run()
|
344
|
+
self._logger.log(message=f"Initial response: {res}", color="blue", level="info")
|
345
|
+
self._usage.aggregate(metrics=usage)
|
346
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
347
|
+
action = actions[0] if actions else None
|
348
|
+
action_chains = ActionChains(driver=driver)
|
349
|
+
action_builder = ActionBuilder(driver=driver)
|
350
|
+
|
351
|
+
if action:
|
352
|
+
while True:
|
353
|
+
x = action.x if hasattr(action, 'x') else 0
|
354
|
+
y = action.y if hasattr(action, 'y') else 0
|
355
|
+
scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
|
356
|
+
scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
|
357
|
+
text = action.text if hasattr(action, 'text') else ''
|
358
|
+
path = handle_directory(directory_name='_screenshots', filename=f'cua_selenium', ext='png')
|
359
|
+
|
360
|
+
match action.type:
|
361
|
+
case 'click':
|
362
|
+
self._logger.log(message="Action: click", color="blue", level="info")
|
363
|
+
driver.execute_script(f'window.scrollBy({x}, {y})')
|
364
|
+
action_chains.move_by_offset(xoffset=x, yoffset=y)
|
365
|
+
action_chains.perform()
|
366
|
+
|
367
|
+
if hasattr(action, 'button'):
|
368
|
+
match action.button:
|
369
|
+
case 'left':
|
370
|
+
action_chains.click()
|
371
|
+
case 'right':
|
372
|
+
action_chains.context_click()
|
373
|
+
action_chains.perform()
|
374
|
+
time.sleep(1)
|
375
|
+
|
376
|
+
case "scroll" | "move":
|
377
|
+
self._logger.log(message="Action: scroll", color="blue", level="info")
|
378
|
+
driver.execute_script(f'window.scrollBy({scroll_x}, {scroll_y})')
|
379
|
+
time.sleep(1)
|
380
|
+
|
381
|
+
case "keypress":
|
382
|
+
self._logger.log(message="Action: keypress", color="blue", level="info")
|
383
|
+
keys = action.keys
|
384
|
+
if keys:
|
385
|
+
for k in keys:
|
386
|
+
match k.lower():
|
387
|
+
case "enter": action_chains.key_down(Keys.ENTER).perform()
|
388
|
+
case "space": action_chains.key_down(Keys.SPACE).perform()
|
389
|
+
case "select_all":
|
390
|
+
if platform.system() == 'Darwin':
|
391
|
+
action_chains.send_keys(Keys.COMMAND + "a").perform()
|
392
|
+
else:
|
393
|
+
action_chains.send_keys(Keys.CONTROL + "a").perform()
|
394
|
+
case _:
|
395
|
+
action_chains.key_down(Keys.SHIFT).send_keys(k).key_up(Keys.SHIFT).perform()
|
396
|
+
time.sleep(1)
|
397
|
+
|
398
|
+
case "type":
|
399
|
+
self._logger.log(message="Action: type", color="blue", level="info")
|
400
|
+
action_chains.send_keys(text).perform()
|
401
|
+
time.sleep(1)
|
402
|
+
|
403
|
+
case "wait":
|
404
|
+
self._logger.log(message="Action: wait", color="blue", level="info")
|
405
|
+
action_chains.pause(3)
|
406
|
+
|
407
|
+
case "screenshot":
|
408
|
+
self._logger.log(message="Action: screenshot", color="blue", level="info")
|
409
|
+
driver.save_screenshot(path)
|
410
|
+
time.sleep(1)
|
411
|
+
|
412
|
+
case _:
|
413
|
+
self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
|
414
|
+
return False
|
415
|
+
|
416
|
+
with open(path, "rb") as image_file:
|
417
|
+
res, usage = None, None
|
418
|
+
if image_file:
|
419
|
+
screenshot_base64 = base64.b64encode(image_file.read()).decode("utf-8")
|
420
|
+
res, _, usage = self._run(screenshot=screenshot_base64)
|
421
|
+
else:
|
422
|
+
res, _, usage = self._run()
|
423
|
+
|
424
|
+
print("res", res)
|
425
|
+
|
426
|
+
self._usage.aggregate(metrics=usage)
|
427
|
+
if not res:
|
428
|
+
usage.record_errors(type=ErrorType.API)
|
429
|
+
break
|
287
430
|
|
431
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
432
|
+
action = actions[0] if actions else None
|
433
|
+
if not action:
|
434
|
+
self._logger.log(message="No action found.", color="yellow", level="warning")
|
435
|
+
break
|
288
436
|
else:
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
437
|
+
self._usage.record_errors(type=ErrorType.TOOL)
|
438
|
+
|
439
|
+
end_dt = datetime.datetime.now()
|
440
|
+
self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
|
441
|
+
return res
|
442
|
+
|
294
443
|
|
295
|
-
|
444
|
+
def run(self) -> Tuple[Dict[str, Any], None, UsageMetrics]:
|
445
|
+
"""Core function to execute the tool."""
|
446
|
+
|
447
|
+
res = None
|
448
|
+
try:
|
449
|
+
res = self.invoke_playwright()
|
450
|
+
except:
|
451
|
+
self._call_ids = []
|
452
|
+
self._calls = dict()
|
453
|
+
self._response_ids = []
|
454
|
+
res = self.invoke_selenium()
|
455
|
+
|
456
|
+
return res, None, self._usage
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: versionhq
|
3
|
-
Version: 1.2.4.
|
3
|
+
Version: 1.2.4.14
|
4
4
|
Summary: Autonomous agent networks for task automation with multi-step reasoning.
|
5
5
|
Author-email: Kuriko Iwai <kuriko@versi0n.io>
|
6
6
|
License: MIT License
|
@@ -77,6 +77,7 @@ Provides-Extra: tools
|
|
77
77
|
Requires-Dist: html2text>=2024.2.26; extra == "tools"
|
78
78
|
Requires-Dist: sec-api>=1.0.28; extra == "tools"
|
79
79
|
Requires-Dist: pytest-playwright>=0.7.0; extra == "tools"
|
80
|
+
Requires-Dist: selenium>=4.29.0; extra == "tools"
|
80
81
|
Provides-Extra: torch
|
81
82
|
Requires-Dist: torch>=2.6.0; extra == "torch"
|
82
83
|
Requires-Dist: torchvision>=0.21.0; extra == "torch"
|
@@ -1,9 +1,10 @@
|
|
1
|
-
versionhq/__init__.py,sha256=
|
1
|
+
versionhq/__init__.py,sha256=7yJXhEnXuIcMKUqz042HK99oD79bvLl2hiajGP9J7OM,3356
|
2
2
|
versionhq/_prompt/auto_feedback.py,sha256=bbj37yTa11lRHpx-sV_Wmpb4dVnDBB7_v8ageUobHXY,3780
|
3
3
|
versionhq/_prompt/constants.py,sha256=DOwUFnVVObEFqgnaMCDnW8fnw1oPMgS8JAqOiTuqleI,932
|
4
4
|
versionhq/_prompt/model.py,sha256=wJlDM9yzrqlXWxyw4HkYQzPii2MPfqkgTF3qhXoJN2M,8038
|
5
|
-
versionhq/_utils/__init__.py,sha256=
|
5
|
+
versionhq/_utils/__init__.py,sha256=S3GvJKOTHM43JzPdaDqT6Zkan9eQJpc4biqQBXiVq6o,481
|
6
6
|
versionhq/_utils/convert_img_url.py,sha256=BlINw4RQ632m9P4FJbqzqYlzTLESBTRkhkstAopnNNY,408
|
7
|
+
versionhq/_utils/handle_directory.py,sha256=n5y2ClC4A3f6rkv8XDfzoCqJcw-8sCJ0Q5q_ZiQ5uxw,417
|
7
8
|
versionhq/_utils/i18n.py,sha256=TwA_PnYfDLA6VqlUDPuybdV9lgi3Frh_ASsb_X8jJo8,1483
|
8
9
|
versionhq/_utils/is_valid_enum.py,sha256=vGGIuvhDnFU2fUyyFxJyjw-NfByK0vfFAu1ShaHBeZE,720
|
9
10
|
versionhq/_utils/is_valid_url.py,sha256=m8Mswvb-90FJtx1Heq6hPFDbwGgrv_R3wSbZQmEPM9Q,379
|
@@ -14,7 +15,7 @@ versionhq/_utils/usage_metrics.py,sha256=gDK6fZgT1njX4iPIPFapWxfxIiz-zZYv72p0u6M
|
|
14
15
|
versionhq/_utils/vars.py,sha256=bZ5Dx_bFKlt3hi4-NNGXqdk7B23If_WaTIju2fiTyPQ,57
|
15
16
|
versionhq/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
17
|
versionhq/agent/inhouse_agents.py,sha256=D2WAiXCYsnQK3_Fe7CbbtvXsHWOaN6vde6m_QoW7fH4,2629
|
17
|
-
versionhq/agent/model.py,sha256=
|
18
|
+
versionhq/agent/model.py,sha256=n4yU1f7-74piTJXEK-IahJOWzSpuwViaj7RJEMJW_Y0,26988
|
18
19
|
versionhq/agent/parser.py,sha256=riG0dkdQCxH7uJ0AbdVdg7WvL0BXhUgJht0VtQvxJBc,4082
|
19
20
|
versionhq/agent/rpm_controller.py,sha256=grezIxyBci_lDlwAlgWFRyR5KOocXeOhYkgN02dNFNE,2360
|
20
21
|
versionhq/agent/TEMPLATES/Backstory.py,sha256=dkfuATUQ2g2WoUKkmgAIch-RB--bektGoQaUlsDOn0g,529
|
@@ -70,12 +71,12 @@ versionhq/tool/composio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
70
71
|
versionhq/tool/composio/model.py,sha256=GIFKso_e_4a3BdaulqU_i6Y9JFAExNBjzHUHR_zZeSI,8577
|
71
72
|
versionhq/tool/composio/params.py,sha256=FvBuEXsOQUYnN7RTFxT20kAkiEYkxWKkiVtgpqOzKZQ,1843
|
72
73
|
versionhq/tool/gpt/__init__.py,sha256=A6xCuf_GUBs7wfx904J_Vd2t1GJCcf0lMKOL7MbZce4,160
|
73
|
-
versionhq/tool/gpt/_enum.py,sha256=
|
74
|
-
versionhq/tool/gpt/cua.py,sha256=
|
74
|
+
versionhq/tool/gpt/_enum.py,sha256=iBtH964dyv6d326VXSJsthB7EKxFXLcZVQPfvaCtbdk,496
|
75
|
+
versionhq/tool/gpt/cua.py,sha256=KFDueZiu7idDn9l_XrOfi_1PyllID9jFHG1S6sFaBbc,19360
|
75
76
|
versionhq/tool/gpt/file_search.py,sha256=r5JVlf-epKB8DDXyrzlkezguHUMir0JW-77LUHoy-w8,5813
|
76
77
|
versionhq/tool/gpt/web_search.py,sha256=bpqEQopbq9KtqQ_0W7QAAJ5TyoKGiVM94-SMp5oqNFE,3483
|
77
|
-
versionhq-1.2.4.
|
78
|
-
versionhq-1.2.4.
|
79
|
-
versionhq-1.2.4.
|
80
|
-
versionhq-1.2.4.
|
81
|
-
versionhq-1.2.4.
|
78
|
+
versionhq-1.2.4.14.dist-info/licenses/LICENSE,sha256=cRoGGdM73IiDs6nDWKqPlgSv7aR4n-qBXYnJlCMHCeE,1082
|
79
|
+
versionhq-1.2.4.14.dist-info/METADATA,sha256=wPMQGhx1Xxyh-oScOhbqXrsUxJMGMdoYmoiNfZFApN8,21399
|
80
|
+
versionhq-1.2.4.14.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
81
|
+
versionhq-1.2.4.14.dist-info/top_level.txt,sha256=DClQwxDWqIUGeRJkA8vBlgeNsYZs4_nJWMonzFt5Wj0,10
|
82
|
+
versionhq-1.2.4.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|