versionhq 1.2.4.13__py3-none-any.whl → 1.2.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versionhq/__init__.py +3 -3
- versionhq/_prompt/model.py +27 -26
- versionhq/_utils/__init__.py +1 -0
- versionhq/_utils/handle_directory.py +15 -0
- versionhq/agent/model.py +11 -10
- versionhq/llm/llm_vars.py +13 -3
- versionhq/llm/model.py +50 -18
- versionhq/task/model.py +1 -0
- versionhq/tool/gpt/_enum.py +1 -1
- versionhq/tool/gpt/cua.py +319 -165
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.15.dist-info}/METADATA +2 -1
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.15.dist-info}/RECORD +15 -14
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.15.dist-info}/WHEEL +1 -1
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.15.dist-info}/licenses/LICENSE +0 -0
- {versionhq-1.2.4.13.dist-info → versionhq-1.2.4.15.dist-info}/top_level.txt +0 -0
versionhq/__init__.py
CHANGED
@@ -10,7 +10,7 @@ load_dotenv(override=True)
|
|
10
10
|
from versionhq.agent.model import Agent
|
11
11
|
from versionhq.agent_network.model import AgentNetwork, Formation, Member, TaskHandlingProcess
|
12
12
|
from versionhq.llm.model import LLM
|
13
|
-
from versionhq.llm.llm_vars import LLM_CONTEXT_WINDOW_SIZES, MODEL_PARAMS, PROVIDERS,
|
13
|
+
from versionhq.llm.llm_vars import LLM_CONTEXT_WINDOW_SIZES, MODEL_PARAMS, PROVIDERS, MODELS
|
14
14
|
from versionhq.clients.customer.model import Customer
|
15
15
|
from versionhq.clients.product.model import Product, ProductProvider
|
16
16
|
from versionhq.clients.workflow.model import MessagingWorkflow, MessagingComponent
|
@@ -35,7 +35,7 @@ from versionhq.agent_network.formation import form_agent_network
|
|
35
35
|
from versionhq.task_graph.draft import workflow
|
36
36
|
|
37
37
|
|
38
|
-
__version__ = "1.2.4.
|
38
|
+
__version__ = "1.2.4.15"
|
39
39
|
__all__ = [
|
40
40
|
"Agent",
|
41
41
|
|
@@ -48,7 +48,7 @@ __all__ = [
|
|
48
48
|
"LLM_CONTEXT_WINDOW_SIZES",
|
49
49
|
"MODEL_PARAMS",
|
50
50
|
"PROVIDERS",
|
51
|
-
"
|
51
|
+
"MODELS",
|
52
52
|
|
53
53
|
"Customer",
|
54
54
|
"Product",
|
versionhq/_prompt/model.py
CHANGED
@@ -14,7 +14,6 @@ class Prompt:
|
|
14
14
|
agent: Any = None
|
15
15
|
context: Any = None
|
16
16
|
|
17
|
-
|
18
17
|
def __init__(self, task, agent, context):
|
19
18
|
from versionhq.agent.model import Agent
|
20
19
|
from versionhq.task.model import Task
|
@@ -32,22 +31,24 @@ class Prompt:
|
|
32
31
|
output_prompt = ""
|
33
32
|
output_formats_to_follow = dict()
|
34
33
|
|
35
|
-
if self.task.
|
36
|
-
if
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
if self.task.is_multimodal == False:
|
35
|
+
if self.task.response_schema:
|
36
|
+
if isinstance(self.task.response_schema, list):
|
37
|
+
for item in self.task.response_schema:
|
38
|
+
if isinstance(item, ResponseField):
|
39
|
+
output_formats_to_follow[item.title] = f"<Return your answer in {item.data_type.__name__}>"
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
elif issubclass(self.task.response_schema, BaseModel):
|
42
|
+
for k, v in self.task.response_schema.model_fields.items():
|
43
|
+
output_formats_to_follow[k] = f"<Return your answer in {v.annotation}>"
|
44
44
|
|
45
|
-
|
46
|
-
Ref. Output image: {output_formats_to_follow}
|
47
|
-
|
48
|
-
|
49
|
-
output_prompt = "You MUST return your response as a valid JSON serializable string, enclosed in double quotes. Use double quotes for all keys and string values. Do NOT use single quotes, trailing commas, or other non-standard JSON syntax."
|
45
|
+
output_prompt = f"""Your response MUST be a valid JSON string that strictly follows the response format. Use double quotes for all keys and string values. Do not use single quotes, trailing commas, or any other non-standard JSON syntax.
|
46
|
+
Ref. Output image: {output_formats_to_follow}"""
|
47
|
+
else:
|
48
|
+
output_prompt = "You MUST return your response as a valid JSON serializable string, enclosed in double quotes. Use double quotes for all keys and string values. Do NOT use single quotes, trailing commas, or other non-standard JSON syntax."
|
50
49
|
|
50
|
+
else:
|
51
|
+
output_prompt = "Return your response in concise manner."
|
51
52
|
return dedent(output_prompt)
|
52
53
|
|
53
54
|
|
@@ -98,19 +99,20 @@ Ref. Output image: {output_formats_to_follow}
|
|
98
99
|
|
99
100
|
content_messages = {}
|
100
101
|
|
101
|
-
if self.task.
|
102
|
-
|
103
|
-
|
104
|
-
|
102
|
+
if self.task.is_multimodal == False:
|
103
|
+
if self.task.image:
|
104
|
+
img_url = convert_img_url(self.task.image)
|
105
|
+
if img_url:
|
106
|
+
content_messages.update({ "type": "image_url", "image_url": { "url": img_url }})
|
105
107
|
|
106
|
-
|
107
|
-
|
108
|
-
|
108
|
+
if self.task.file:
|
109
|
+
if is_valid_url(self.task.file):
|
110
|
+
content_messages.update({ "type": "image_url", "image_url": self.file })
|
109
111
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
112
|
+
if self.task.audio and self.agent.llm.provider == "gemini":
|
113
|
+
audio_bytes = Path(self.task.audio).read_bytes()
|
114
|
+
encoded_data = base64.b64encode(audio_bytes).decode("utf-8")
|
115
|
+
content_messages.update({ "type": "image_url", "image_url": "data:audio/mp3;base64,{}".format(encoded_data)})
|
114
116
|
|
115
117
|
return content_messages
|
116
118
|
|
@@ -188,7 +190,6 @@ Ref. Output image: {output_formats_to_follow}
|
|
188
190
|
# else:
|
189
191
|
# user_prompt = self.agent._use_trained_data(user_prompt=user_prompt)
|
190
192
|
|
191
|
-
|
192
193
|
content_prompt = self._format_content_prompt()
|
193
194
|
|
194
195
|
messages = []
|
versionhq/_utils/__init__.py
CHANGED
@@ -5,3 +5,4 @@ from versionhq._utils.is_valid_url import is_valid_url
|
|
5
5
|
from versionhq._utils.usage_metrics import UsageMetrics, ErrorType
|
6
6
|
from versionhq._utils.convert_img_url import convert_img_url
|
7
7
|
from versionhq._utils.is_valid_enum import is_valid_enum
|
8
|
+
from versionhq._utils.handle_directory import handle_directory
|
@@ -0,0 +1,15 @@
|
|
1
|
+
import os
|
2
|
+
import datetime
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
|
6
|
+
def handle_directory(directory_name: str = None, filename: str = None, ext: str = 'png') -> Path:
|
7
|
+
"""Creates and returns the absolute file path"""
|
8
|
+
|
9
|
+
os.makedirs(directory_name, exist_ok=True)
|
10
|
+
|
11
|
+
date = str(datetime.datetime.now().strftime('%j'))
|
12
|
+
cwd = Path.cwd()
|
13
|
+
DIRECTORY = cwd / f'{directory_name}/{filename}_{date}.{ext}'
|
14
|
+
|
15
|
+
return DIRECTORY
|
versionhq/agent/model.py
CHANGED
@@ -356,12 +356,9 @@ class Agent(BaseModel):
|
|
356
356
|
response_format: Optional[Dict[str, Any]] = None,
|
357
357
|
tools: Optional[List[InstanceOf[Tool]| InstanceOf[ToolSet] | Type[Tool]]] = None,
|
358
358
|
tool_res_as_final: bool = False,
|
359
|
+
file: str = None, # absolute path to the content file (for multimodal use)
|
359
360
|
) -> Tuple[str, UsageMetrics]:
|
360
|
-
"""
|
361
|
-
Create formatted prompts using the developer prompt and the agent's backstory, then call the base model.
|
362
|
-
- Execute the task up to `self.max_retry_limit` times in case of receiving an error or empty response.
|
363
|
-
- Pass the task_tools to the model to let them execute.
|
364
|
-
"""
|
361
|
+
"""Calls LLM."""
|
365
362
|
|
366
363
|
task_execution_counter = 0
|
367
364
|
iterations = 0
|
@@ -375,10 +372,10 @@ class Agent(BaseModel):
|
|
375
372
|
Logger(**self._logger_config, filename=self.key).log(level="info", message=f"Messages sent to the model: {messages}", color="blue")
|
376
373
|
|
377
374
|
if tool_res_as_final:
|
378
|
-
raw_response = self.func_calling_llm.call(messages=messages, tools=tools, tool_res_as_final=True)
|
375
|
+
raw_response = self.func_calling_llm.call(messages=messages, tools=tools, tool_res_as_final=True, file=file)
|
379
376
|
usage.record_token_usage(*self.func_calling_llm._usages)
|
380
377
|
else:
|
381
|
-
raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools)
|
378
|
+
raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools, file=file)
|
382
379
|
usage.record_token_usage(*self.llm._usages)
|
383
380
|
|
384
381
|
task_execution_counter += 1
|
@@ -454,14 +451,14 @@ class Agent(BaseModel):
|
|
454
451
|
return rag_tools, gpt_tools, tools
|
455
452
|
|
456
453
|
|
457
|
-
def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput
|
454
|
+
def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput or None
|
458
455
|
"""Generates k, v pairs from multiple GPT tool results and stores them in TaskOutput class."""
|
459
456
|
|
460
457
|
from versionhq.task.model import TaskOutput
|
461
458
|
from versionhq._utils import UsageMetrics
|
462
459
|
|
463
460
|
if not gpt_tools:
|
464
|
-
return
|
461
|
+
return None
|
465
462
|
|
466
463
|
tool_res = dict()
|
467
464
|
annotation_set = dict()
|
@@ -470,7 +467,9 @@ class Agent(BaseModel):
|
|
470
467
|
for i, item in enumerate(gpt_tools):
|
471
468
|
raw, annotations, usage = item.run()
|
472
469
|
tool_res.update({ str(i): raw })
|
473
|
-
|
470
|
+
|
471
|
+
if annotations:
|
472
|
+
annotation_set.update({ str(i): annotations })
|
474
473
|
total_usage.aggregate(metrics=usage)
|
475
474
|
|
476
475
|
res = TaskOutput(raw=str(tool_res), tool_output=tool_res, usage=total_usage, annotations=annotation_set)
|
@@ -580,6 +579,7 @@ class Agent(BaseModel):
|
|
580
579
|
raw_response = ""
|
581
580
|
user_prompt, dev_prompt = "", ""
|
582
581
|
usage = UsageMetrics(id=task.id)
|
582
|
+
file = task.audio if task.is_multimodal and task.audio else task.image if task.is_multimodal and task.image else task.file if task.is_multimodal and task.file else None
|
583
583
|
|
584
584
|
if self.max_rpm and self._rpm_controller:
|
585
585
|
self._rpm_controller._reset_request_count()
|
@@ -598,6 +598,7 @@ class Agent(BaseModel):
|
|
598
598
|
response_format=task._structure_response_format(model_provider=self.llm.provider),
|
599
599
|
tools=tools,
|
600
600
|
tool_res_as_final=task.tool_res_as_final,
|
601
|
+
file=file,
|
601
602
|
)
|
602
603
|
|
603
604
|
except Exception as e:
|
versionhq/llm/llm_vars.py
CHANGED
@@ -28,14 +28,13 @@ PROVIDERS = {
|
|
28
28
|
"HF_ENDPOINT": "HF_ENDPOINT",
|
29
29
|
},
|
30
30
|
"azure": {
|
31
|
-
"api_base": "
|
31
|
+
"api_base": "AZURE_OPENAI_ENDPOINT_MODEL_NAME",
|
32
32
|
"api_key": "AZURE_OPENAI_API_KEY",
|
33
33
|
"api_version": "AZURE_OPENAI_API_VERSION",
|
34
34
|
},
|
35
35
|
"azure_ai": {
|
36
36
|
"api_key": "AZURE_AI_API_KEY",
|
37
37
|
"base_url": "AZURE_AI_API_BASE",
|
38
|
-
|
39
38
|
}
|
40
39
|
}
|
41
40
|
|
@@ -47,7 +46,7 @@ ENDPOINTS = [
|
|
47
46
|
|
48
47
|
|
49
48
|
# Resaoning and text generation models
|
50
|
-
|
49
|
+
MODELS = {
|
51
50
|
"openai": [
|
52
51
|
"gpt-4.5-preview-2025-02-27",
|
53
52
|
"gpt-4",
|
@@ -96,6 +95,10 @@ TEXT_MODELS = {
|
|
96
95
|
"bedrock/cohere.command-light-text-v14",
|
97
96
|
],
|
98
97
|
"azure": [
|
98
|
+
"azure/whisper",
|
99
|
+
"azure/whisper-2",
|
100
|
+
"azure/gpt-4o-mini-audio-preview",
|
101
|
+
|
99
102
|
"azure/DeepSeek-V3",
|
100
103
|
"azure/DeepSeek-R1",
|
101
104
|
"azure/Llama-3.3-70B-Instruct",
|
@@ -163,6 +166,13 @@ TEXT_MODELS = {
|
|
163
166
|
}
|
164
167
|
|
165
168
|
|
169
|
+
AUDIO_TO_TEXT_MODELS = [
|
170
|
+
"azure/whisper",
|
171
|
+
"azure/whisper-2",
|
172
|
+
"azure/gpt-4o-mini-audio-preview",
|
173
|
+
]
|
174
|
+
|
175
|
+
|
166
176
|
"""
|
167
177
|
Max input token size by the model.
|
168
178
|
"""
|
versionhq/llm/model.py
CHANGED
@@ -12,9 +12,9 @@ import litellm
|
|
12
12
|
from litellm import JSONSchemaValidationError, get_supported_openai_params, supports_response_schema
|
13
13
|
from pydantic import BaseModel, Field, PrivateAttr, model_validator, ConfigDict
|
14
14
|
|
15
|
-
from versionhq.llm.llm_vars import LLM_CONTEXT_WINDOW_SIZES,
|
15
|
+
from versionhq.llm.llm_vars import LLM_CONTEXT_WINDOW_SIZES, MODELS, AUDIO_TO_TEXT_MODELS, MODEL_PARAMS, PROVIDERS, ENDPOINTS
|
16
16
|
from versionhq.tool.model import Tool, ToolSet
|
17
|
-
from versionhq._utils import Logger
|
17
|
+
from versionhq._utils import Logger, UsageMetrics, ErrorType
|
18
18
|
|
19
19
|
|
20
20
|
load_dotenv(override=True)
|
@@ -115,7 +115,7 @@ class LLM(BaseModel):
|
|
115
115
|
self.provider = DEFAULT_MODEL_PROVIDER_NAME
|
116
116
|
|
117
117
|
else:
|
118
|
-
provider_model_list =
|
118
|
+
provider_model_list = MODELS.get(self.provider)
|
119
119
|
if provider_model_list:
|
120
120
|
self.model = provider_model_list[0]
|
121
121
|
self.provider = self.provider
|
@@ -127,29 +127,29 @@ class LLM(BaseModel):
|
|
127
127
|
elif self.model and self.provider is None:
|
128
128
|
model_match = [
|
129
129
|
item for item in [
|
130
|
-
[val for val in v if val == self.model][0] for k, v in
|
130
|
+
[val for val in v if val == self.model][0] for k, v in MODELS.items() if [val for val in v if val == self.model]
|
131
131
|
] if item
|
132
132
|
]
|
133
133
|
model_partial_match = [
|
134
134
|
item for item in [
|
135
|
-
[val for val in v if val.find(self.model) != -1][0] for k, v in
|
135
|
+
[val for val in v if val.find(self.model) != -1][0] for k, v in MODELS.items() if [val for val in v if val.find(self.model) != -1]
|
136
136
|
] if item
|
137
137
|
]
|
138
|
-
provider_match = [k for k, v in
|
138
|
+
provider_match = [k for k, v in MODELS.items() if k == self.model]
|
139
139
|
|
140
140
|
if model_match:
|
141
141
|
self.model = model_match[0]
|
142
|
-
self.provider = [k for k, v in
|
142
|
+
self.provider = [k for k, v in MODELS.items() if self.model in v][0]
|
143
143
|
|
144
144
|
elif model_partial_match:
|
145
145
|
self.model = model_partial_match[0]
|
146
|
-
self.provider = [k for k, v in
|
146
|
+
self.provider = [k for k, v in MODELS.items() if [item for item in v if item.find(self.model) != -1]][0]
|
147
147
|
|
148
148
|
elif provider_match:
|
149
149
|
provider = provider_match[0]
|
150
|
-
if self.
|
150
|
+
if self.MODELS.get(provider):
|
151
151
|
self.provider = provider
|
152
|
-
self.model = self.
|
152
|
+
self.model = self.MODELS.get(provider)[0]
|
153
153
|
else:
|
154
154
|
self.provider = DEFAULT_MODEL_PROVIDER_NAME
|
155
155
|
self.model = DEFAULT_MODEL_NAME
|
@@ -159,7 +159,7 @@ class LLM(BaseModel):
|
|
159
159
|
self.provider = DEFAULT_MODEL_PROVIDER_NAME
|
160
160
|
|
161
161
|
else:
|
162
|
-
provider_model_list =
|
162
|
+
provider_model_list = MODELS.get(self.provider)
|
163
163
|
if self.model not in provider_model_list:
|
164
164
|
self._logger.log(level="warning", message=f"The provided model: {self._init_model_name} is not in the list. We will assign a default model.", color="yellow")
|
165
165
|
self.model = DEFAULT_MODEL_NAME
|
@@ -232,7 +232,16 @@ class LLM(BaseModel):
|
|
232
232
|
|
233
233
|
valid_cred = {}
|
234
234
|
for k, v in cred.items():
|
235
|
-
val =
|
235
|
+
val = None
|
236
|
+
if '_MODEL_NAME' in v:
|
237
|
+
model_name = self.model.split('/')[-1] if self.model.split('/') else self.model
|
238
|
+
key = v.replace('_MODEL_NAME', f'_{model_name.replace("-", '_').replace(' ', '_').upper()}')
|
239
|
+
val = os.environ.get(key, None)
|
240
|
+
if not val:
|
241
|
+
val = os.environ.get(v.replace('_MODEL_NAME', ''), None)
|
242
|
+
else:
|
243
|
+
val = os.environ.get(v, None)
|
244
|
+
|
236
245
|
if val:
|
237
246
|
valid_cred[str(k)] = val
|
238
247
|
|
@@ -288,12 +297,12 @@ class LLM(BaseModel):
|
|
288
297
|
messages: List[Dict[str, str]],
|
289
298
|
response_format: Optional[Dict[str, Any]] = None,
|
290
299
|
tools: Optional[List[Tool | ToolSet | Any ]] = None,
|
291
|
-
config: Optional[Dict[str, Any]] =
|
292
|
-
tool_res_as_final: bool = False
|
300
|
+
config: Optional[Dict[str, Any]] = dict(),
|
301
|
+
tool_res_as_final: bool = False,
|
302
|
+
file: str = None
|
293
303
|
) -> str:
|
294
|
-
"""
|
295
|
-
|
296
|
-
"""
|
304
|
+
"""Configures and calls the LLM (chat, text generation, reasoning models)."""
|
305
|
+
|
297
306
|
litellm.drop_params = True
|
298
307
|
litellm.set_verbose = True
|
299
308
|
|
@@ -302,9 +311,32 @@ class LLM(BaseModel):
|
|
302
311
|
self._set_callbacks(self.callbacks)
|
303
312
|
|
304
313
|
try:
|
305
|
-
res
|
314
|
+
res = None
|
315
|
+
tool_res = ""
|
306
316
|
cred = self._set_credentials()
|
307
317
|
|
318
|
+
if file and self.model in AUDIO_TO_TEXT_MODELS:
|
319
|
+
params = self._create_valid_params(config=config)
|
320
|
+
audio_file = open(file, 'rb')
|
321
|
+
res = litellm.transcription(
|
322
|
+
model=self.model,
|
323
|
+
file=audio_file,
|
324
|
+
rompt=messages,
|
325
|
+
ustom_llm_provider=self.endpoint_provider,
|
326
|
+
response_format="json",
|
327
|
+
**cred
|
328
|
+
)
|
329
|
+
usage = UsageMetrics()
|
330
|
+
if res:
|
331
|
+
usage.latency = res._response_ms if hasattr(res, '_response_ms') else 0
|
332
|
+
self._usages.append(usage)
|
333
|
+
return res.text
|
334
|
+
else:
|
335
|
+
usage.record_errors(type=ErrorType.API)
|
336
|
+
self._usages.append(usage)
|
337
|
+
return None
|
338
|
+
|
339
|
+
|
308
340
|
if self.provider == "gemini":
|
309
341
|
self.response_format = { "type": "json_object" } if not tools and self.model != "gemini/gemini-2.0-flash-thinking-exp" else None
|
310
342
|
elif response_format and "json_schema" in response_format:
|
versionhq/task/model.py
CHANGED
@@ -314,6 +314,7 @@ class Task(BaseModel):
|
|
314
314
|
name: Optional[str] = Field(default=None)
|
315
315
|
description: str = Field(description="Description of the actual task")
|
316
316
|
response_schema: Optional[Type[BaseModel] | List[ResponseField]] = Field(default=None, description="stores response format")
|
317
|
+
is_multimodal: bool = False
|
317
318
|
|
318
319
|
# tool usage
|
319
320
|
tools: Optional[List[Any]] = Field(default_factory=list, description="tools that the agent can use aside from their tools")
|
versionhq/tool/gpt/_enum.py
CHANGED
versionhq/tool/gpt/cua.py
CHANGED
@@ -1,40 +1,33 @@
|
|
1
|
+
import base64
|
1
2
|
import datetime
|
2
3
|
import time
|
3
|
-
|
4
|
+
import platform
|
5
|
+
from typing import List, Dict, Any, Tuple, Literal, get_args
|
4
6
|
|
5
7
|
from versionhq._utils import convert_img_url
|
6
8
|
from versionhq.tool.gpt import openai_client
|
7
|
-
from versionhq.tool.gpt._enum import
|
8
|
-
from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType, Logger, is_valid_url
|
9
|
-
|
10
|
-
|
11
|
-
allowed_browsers = ['webkit', 'chromium', 'firefox']
|
9
|
+
from versionhq.tool.gpt._enum import GPTSizeEnum
|
10
|
+
from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType, Logger, is_valid_url, handle_directory
|
12
11
|
|
12
|
+
BROWSER = Literal['chromium', 'firefox']
|
13
|
+
TYPE = Literal["computer_call_output", "computer_use_preview"]
|
14
|
+
ENV = Literal["browser", "mac", "windows", "ubuntu"]
|
13
15
|
|
14
16
|
class CUAToolSchema:
|
15
|
-
type:
|
17
|
+
type: TYPE = "computer_use_preview"
|
18
|
+
environment: ENV = "browser"
|
16
19
|
display_width: int = 1024
|
17
20
|
display_height: int = 768
|
18
|
-
environment: str = GPTCUAEnvironmentEnum.BROWSER.value
|
19
21
|
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
type: str | GPTCUATypeEnum = None,
|
23
|
-
display_width: int = None,
|
24
|
-
display_height: int = None,
|
25
|
-
environment: str | GPTCUAEnvironmentEnum = None
|
26
|
-
):
|
22
|
+
def __init__(self, type: str = None, display_width: int = None, display_height: int = None, environment: str = None):
|
27
23
|
self.display_height = display_height if display_height else self.display_height
|
28
24
|
self.display_width = display_width if display_width else self.display_width
|
29
25
|
|
30
|
-
if type and
|
31
|
-
self.type = type
|
32
|
-
|
33
|
-
if environment and is_valid_enum(enum=GPTCUAEnvironmentEnum, val=environment):
|
34
|
-
self.environment = environment.value if isinstance(environment, GPTCUAEnvironmentEnum) else environment
|
35
|
-
|
36
|
-
self.environment = environment if environment else self.environment
|
26
|
+
if type and type in get_args(TYPE):
|
27
|
+
self.type = type
|
37
28
|
|
29
|
+
if environment and environment in get_args(ENV):
|
30
|
+
self.environment = environment
|
38
31
|
|
39
32
|
@property
|
40
33
|
def schema(self) -> Dict[str, Any]:
|
@@ -52,12 +45,14 @@ class GPTToolCUA:
|
|
52
45
|
user_prompt: str = None
|
53
46
|
img_url: str = None
|
54
47
|
web_url: str = "https://www.google.com"
|
55
|
-
browser:
|
48
|
+
browser: BROWSER = "firefox"
|
56
49
|
reasoning_effort: str = GPTSizeEnum.MEDIUM.value
|
57
50
|
truncation: str = "auto"
|
58
51
|
|
52
|
+
_schema: Dict[str, Any] = dict()
|
59
53
|
_response_ids: List[str] = list()
|
60
54
|
_call_ids: List[str] = list()
|
55
|
+
_calls: Dict[str, Dict[str, Any]] = dict() # stores response_id and raw output object.
|
61
56
|
_usage: UsageMetrics = UsageMetrics()
|
62
57
|
_logger: Logger = Logger(info_file_save=True, filename="cua-task-{}".format(str(datetime.datetime.now().timestamp())) + ".png")
|
63
58
|
|
@@ -74,8 +69,8 @@ class GPTToolCUA:
|
|
74
69
|
_usage: UsageMetrics = UsageMetrics()
|
75
70
|
):
|
76
71
|
self.user_prompt = user_prompt
|
77
|
-
self.web_url = web_url if is_valid_url(web_url) else
|
78
|
-
self.browser = browser if browser in
|
72
|
+
self.web_url = web_url if is_valid_url(web_url) else None
|
73
|
+
self.browser = browser if browser in get_args(BROWSER) else 'chromium'
|
79
74
|
self.truncation = truncation if truncation else self.truncation
|
80
75
|
self._usage = _usage
|
81
76
|
self._response_ids = list()
|
@@ -104,104 +99,93 @@ class GPTToolCUA:
|
|
104
99
|
pass
|
105
100
|
|
106
101
|
|
107
|
-
def
|
108
|
-
|
109
|
-
if not page:
|
110
|
-
return None, None
|
111
|
-
|
112
|
-
path = path if path else "screenshot.png"
|
113
|
-
screenshot_bytes = page.screenshot()
|
114
|
-
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
115
|
-
self._logger.log(message=f"Action: screenshot", level="info", color="blue")
|
116
|
-
return screenshot_bytes, screenshot_base64
|
117
|
-
|
118
|
-
|
119
|
-
def _handle_model_action(self, page: Any, action: Any, action_type: str = None) -> bool:
|
120
|
-
"""Creates a page object and performs actions."""
|
102
|
+
def _structure_schema(self, screenshot: str = None) -> None:
|
103
|
+
"""Formats args schema for CUA calling."""
|
121
104
|
|
122
|
-
|
123
|
-
|
105
|
+
tool_schema = [item.schema for item in self.tools]
|
106
|
+
schema = dict()
|
107
|
+
inputs = list()
|
108
|
+
previous_response_id = self._response_ids[-1] if self._response_ids else None
|
109
|
+
# (self._response_ids[-1].startswith("rs") or self._response_ids[-1].startswith("resp")) else None
|
124
110
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
self._logger.log(message=f"Action: type text: {text}", level="info", color="blue")
|
156
|
-
page.keyboard.type(text)
|
157
|
-
|
158
|
-
case "wait":
|
159
|
-
self._logger.log(message=f"Action: wait", level="info", color="blue")
|
160
|
-
time.sleep(2)
|
161
|
-
|
162
|
-
case "screenshot":
|
163
|
-
pass
|
111
|
+
if self._call_ids:
|
112
|
+
inputs = [
|
113
|
+
{
|
114
|
+
"call_id": self._call_ids[-1],
|
115
|
+
"type": "computer_call_output",
|
116
|
+
}
|
117
|
+
]
|
118
|
+
if screenshot:
|
119
|
+
inputs[0].update({ "output": { "type": "computer_screenshot", "image_url": f"data:image/png;base64,{str(screenshot)}"}})
|
120
|
+
|
121
|
+
# if self._calls:
|
122
|
+
# call = self._calls[self._call_ids[-1]]
|
123
|
+
# if call and call.call_id not in inputs[0]:
|
124
|
+
# inputs.append(call)
|
125
|
+
|
126
|
+
if previous_response_id:
|
127
|
+
schema = dict(
|
128
|
+
model=self.model,
|
129
|
+
previous_response_id=previous_response_id,
|
130
|
+
tools=tool_schema,
|
131
|
+
input=inputs,
|
132
|
+
truncation=self.truncation
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
schema = dict(
|
136
|
+
model=self.model,
|
137
|
+
tools=tool_schema,
|
138
|
+
input=inputs,
|
139
|
+
truncation=self.truncation
|
140
|
+
)
|
164
141
|
|
165
|
-
|
166
|
-
|
142
|
+
else:
|
143
|
+
input = [{ "role": "user", "content": self.user_prompt } ]
|
144
|
+
img_url = convert_img_url(self.img_url) if self.img_url else None
|
145
|
+
if img_url:
|
146
|
+
input.append({"type": "input_image", "image_url": f"data:image/png;base64,{img_url}"})
|
167
147
|
|
168
|
-
|
169
|
-
|
170
|
-
|
148
|
+
schema = dict(
|
149
|
+
model=self.model,
|
150
|
+
tools=tool_schema,
|
151
|
+
input=input,
|
152
|
+
reasoning={ "effort": self.reasoning_effort},
|
153
|
+
truncation=self.truncation
|
154
|
+
)
|
171
155
|
|
172
|
-
|
173
|
-
self.
|
174
|
-
return bool(self._usage.total_errors)
|
156
|
+
self._schema = schema
|
157
|
+
# return self._schema
|
175
158
|
|
176
159
|
|
177
|
-
def
|
160
|
+
def _run(self, screenshot: str = None) -> Tuple[Dict[str, Any], None, UsageMetrics]:
|
178
161
|
raw_res = dict()
|
179
162
|
usage = self._usage if self._usage else UsageMetrics()
|
180
163
|
start_dt = datetime.datetime.now()
|
181
164
|
|
182
165
|
try:
|
183
|
-
|
184
|
-
|
185
|
-
output_image_url = schema["input"][0]["output"]["image_url"].replace("SCREENSHOT", str(screenshot))
|
186
|
-
schema["input"][0]["output"]["image_url"] = output_image_url
|
187
|
-
|
188
|
-
res = openai_client.responses.create(**schema)
|
166
|
+
self._structure_schema(screenshot=screenshot)
|
167
|
+
res = openai_client.responses.create(**self._schema)
|
189
168
|
if not res:
|
190
169
|
usage.record_errors(ErrorType.TOOL)
|
191
170
|
else:
|
171
|
+
self._response_ids.append(res.id)
|
192
172
|
for item in res.output:
|
173
|
+
|
193
174
|
match item.type:
|
194
175
|
case "reasoning":
|
195
|
-
|
196
|
-
|
197
|
-
|
176
|
+
reasoning = item.summary[0].text if item.summary and isinstance(item.summary, list) else str(item.summary) if item.summary else ""
|
177
|
+
raw_res.update(dict(reasoning=reasoning))
|
178
|
+
# self._response_ids.append(item.id)
|
179
|
+
|
198
180
|
case "computer_call":
|
199
181
|
raw_res.update(dict(action=item.action))
|
200
182
|
# self._response_ids.append(item.id)
|
201
|
-
|
183
|
+
call_id = item.call_id
|
184
|
+
self._call_ids.append(call_id)
|
185
|
+
self._calls.update({ call_id: item })
|
202
186
|
case _:
|
203
187
|
pass
|
204
|
-
|
188
|
+
usage.record_token_usage(**res.usage.__dict__)
|
205
189
|
|
206
190
|
except Exception as e:
|
207
191
|
self._logger.log(message=f"Failed to run: {str(e)}", color="red", level="error")
|
@@ -212,84 +196,254 @@ class GPTToolCUA:
|
|
212
196
|
return raw_res, None, usage
|
213
197
|
|
214
198
|
|
215
|
-
def invoke_playwright(self) ->
|
199
|
+
def invoke_playwright(self) -> Dict[str, Any]:
|
216
200
|
"""Handles computer use loop. Ref. OpenAI official website."""
|
201
|
+
try:
|
202
|
+
from playwright.sync_api import sync_playwright
|
203
|
+
except Exception as e:
|
204
|
+
self._logger.log(level="error", message=f"Install Playwright by adding `versionhq[tools]` to requirements.txt or run `uv add playwright`. {str(e)}", color="red")
|
205
|
+
raise e
|
217
206
|
|
218
|
-
|
219
|
-
|
220
|
-
self._logger.log(message="Start
|
207
|
+
import os
|
208
|
+
os.environ["DEBUG"] = "pw:browser"
|
209
|
+
self._logger.log(message="Start computer use.", level="info", color="blue")
|
210
|
+
start_dt = datetime.datetime.now()
|
211
|
+
res = None
|
212
|
+
|
213
|
+
# try:
|
214
|
+
p = sync_playwright().start()
|
215
|
+
b = p.firefox if self.browser == "firefox" else p.chromium
|
216
|
+
browser = b.launch(headless=True)
|
217
|
+
page = browser.new_page()
|
218
|
+
if not browser or not page:
|
219
|
+
return None, None, None
|
220
|
+
|
221
|
+
if self.web_url:
|
222
|
+
page.goto(self.web_url, timeout=3000000, wait_until="load", referer=None)
|
223
|
+
time.sleep(3)
|
224
|
+
|
225
|
+
res, _, usage = self._run()
|
226
|
+
self._usage.aggregate(metrics=usage)
|
227
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
228
|
+
action = actions[0] if actions else None
|
229
|
+
|
230
|
+
if action:
|
231
|
+
while True:
|
232
|
+
x = action.x if hasattr(action, 'x') else 0
|
233
|
+
y = action.y if hasattr(action, 'y') else 0
|
234
|
+
scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
|
235
|
+
scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
|
236
|
+
text = action.text if hasattr(action, 'text') else ''
|
237
|
+
screenshot_base64 = None
|
238
|
+
path = handle_directory(directory_name='_screenshots', filename=f'cua_playwright', ext='png')
|
239
|
+
|
240
|
+
match action.type:
|
241
|
+
case "click":
|
242
|
+
self._logger.log(message="Action: click", color="blue", level="info")
|
243
|
+
button = action.button if hasattr(action, 'button') and (action.button == 'left' or action.button == 'right') else 'left'
|
244
|
+
page.mouse.move(x, y)
|
245
|
+
page.mouse.click(x, y, button=button)
|
246
|
+
time.sleep(1)
|
247
|
+
|
248
|
+
case "scroll":
|
249
|
+
self._logger.log(message="Action: scroll", color="blue", level="info")
|
250
|
+
page.mouse.move(x, y)
|
251
|
+
page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
252
|
+
time.sleep(1)
|
253
|
+
|
254
|
+
case "move":
|
255
|
+
self._logger.log(message="Action: move", color="blue", level="info")
|
256
|
+
page.mouse.move(x, y)
|
257
|
+
page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
258
|
+
time.sleep(1)
|
259
|
+
|
260
|
+
case "keypress":
|
261
|
+
self._logger.log(message="Action: keypress", color="blue", level="info")
|
262
|
+
keys = action.keys
|
263
|
+
for k in keys:
|
264
|
+
match k.lower():
|
265
|
+
case "enter": page.keyboard.press("Enter")
|
266
|
+
case "space": page.keyboard.press(" ")
|
267
|
+
case _: page.keyboard.press(k)
|
268
|
+
time.sleep(1)
|
269
|
+
|
270
|
+
case "type":
|
271
|
+
self._logger.log(message="Action: type", color="blue", level="info")
|
272
|
+
page.keyboard.type(text)
|
273
|
+
time.sleep(1)
|
274
|
+
|
275
|
+
case "wait":
|
276
|
+
self._logger.log(message="Action: wait", color="blue", level="info")
|
277
|
+
time.sleep(3)
|
278
|
+
|
279
|
+
case "screenshot":
|
280
|
+
self._logger.log(message="Action: screenshot", color="blue", level="info")
|
281
|
+
screenshot_bytes = page.screenshot(path=path)
|
282
|
+
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
283
|
+
time.sleep(1)
|
284
|
+
|
285
|
+
case _:
|
286
|
+
self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
|
287
|
+
return False
|
288
|
+
|
289
|
+
if not screenshot_base64:
|
290
|
+
screenshot_bytes = page.screenshot(path=path)
|
291
|
+
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
292
|
+
time.sleep(1)
|
293
|
+
|
294
|
+
res, _, usage = self._run(screenshot=screenshot_base64)
|
295
|
+
self._usage.aggregate(metrics=usage)
|
296
|
+
if not res:
|
297
|
+
usage.record_errors(type=ErrorType.API)
|
298
|
+
break
|
221
299
|
|
222
|
-
try:
|
223
|
-
with sync_playwright() as p:
|
224
|
-
b = p.firefox if self.browser == "firefox" else p.webkit if self.browser == "webkit" else p.chromium
|
225
|
-
browser = b.launch(headless=True)
|
226
|
-
page = browser.new_page()
|
227
|
-
if not browser or not page:
|
228
|
-
return None, None, None
|
229
|
-
|
230
|
-
page.goto(self.web_url)
|
231
|
-
res, _, usage = self.run()
|
232
|
-
self._usage = usage
|
233
300
|
actions = [v for k, v in res.items() if k =="action"] if res else []
|
234
301
|
action = actions[0] if actions else None
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
self._handle_model_action(page=page, action=action)
|
240
|
-
_, screenshot_base64 = self._take_screenshot(page=page)
|
241
|
-
res, _, usage = self.run(screenshot=screenshot_base64)
|
242
|
-
self._usage.agggregate(metrics=usage)
|
243
|
-
if not res:
|
244
|
-
usage.record_errors(type=ErrorType.API)
|
245
|
-
break
|
246
|
-
|
247
|
-
actions = [v for k, v in res.items() if k =="action"] if res else []
|
248
|
-
action = actions[0] if actions else None
|
249
|
-
if not action:
|
250
|
-
break
|
251
|
-
else:
|
252
|
-
self._usage.record_errors(type=ErrorType.TOOL)
|
302
|
+
if not action:
|
303
|
+
break
|
304
|
+
else:
|
305
|
+
self._usage.record_errors(type=ErrorType.TOOL)
|
253
306
|
|
254
|
-
except Exception as e:
|
255
|
-
|
307
|
+
# except Exception as e:
|
308
|
+
# self._logger.log(message=f"Failed to execute. {str(e)}", color="red", level="error")
|
309
|
+
# browser.close()
|
256
310
|
|
257
311
|
end_dt = datetime.datetime.now()
|
258
312
|
self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
|
259
|
-
|
260
|
-
return res, _, self._usage
|
313
|
+
return res
|
261
314
|
|
262
315
|
|
263
|
-
|
264
|
-
|
265
|
-
|
316
|
+
def invoke_selenium(self, **kwargs) -> Dict[str, Any]:
|
317
|
+
try:
|
318
|
+
from selenium import webdriver
|
319
|
+
from selenium.webdriver.common.keys import Keys
|
320
|
+
from selenium.webdriver.common.action_chains import ActionChains
|
321
|
+
from selenium.webdriver.common.actions.action_builder import ActionBuilder
|
322
|
+
except Exception as e:
|
323
|
+
self._logger.log(level="error", message=f"Install Selenium by `uv pip install versionhq[tools]` or `uv add selenium`. {str(e)}", color="red")
|
324
|
+
raise e
|
266
325
|
|
267
|
-
|
268
|
-
schema = dict()
|
269
|
-
inputs = list()
|
270
|
-
previous_response_id = self._response_ids[-1] if self._response_ids and self._response_ids[-1].startswith("rs") else None
|
326
|
+
self._logger.log(message="Start computer use", level="info", color="blue")
|
271
327
|
|
272
|
-
|
273
|
-
inputs = [
|
274
|
-
{
|
275
|
-
"call_id": self._call_ids[-1],
|
276
|
-
"type": "computer_call_output",
|
277
|
-
"output": { "type": "input_image", "image_url": f"data:image/png;base64,SCREENSHOT"}
|
278
|
-
}
|
279
|
-
]
|
280
|
-
schema = dict(
|
281
|
-
model=self.model,
|
282
|
-
previous_response_id=previous_response_id,
|
283
|
-
tools=tool_schema,
|
284
|
-
input=inputs,
|
285
|
-
truncation=self.truncation
|
286
|
-
)
|
328
|
+
start_dt = datetime.datetime.now()
|
287
329
|
|
330
|
+
driver = webdriver.Chrome(options=kwargs) if kwargs else webdriver.Chrome()
|
331
|
+
if self.tools:
|
332
|
+
driver.set_window_size(height=self.tools[0].display_height, width=self.tools[0].display_width)
|
333
|
+
|
334
|
+
if self.web_url:
|
335
|
+
driver.get(self.web_url)
|
336
|
+
time.sleep(3)
|
337
|
+
|
338
|
+
res, _, usage = self._run()
|
339
|
+
self._logger.log(message=f"Initial response: {res}", color="blue", level="info")
|
340
|
+
self._usage.aggregate(metrics=usage)
|
341
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
342
|
+
action = actions[0] if actions else None
|
343
|
+
action_chains = ActionChains(driver=driver)
|
344
|
+
action_builder = ActionBuilder(driver=driver)
|
345
|
+
|
346
|
+
if action:
|
347
|
+
while True:
|
348
|
+
x = action.x if hasattr(action, 'x') else 0
|
349
|
+
y = action.y if hasattr(action, 'y') else 0
|
350
|
+
scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
|
351
|
+
scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
|
352
|
+
text = action.text if hasattr(action, 'text') else ''
|
353
|
+
path = handle_directory(directory_name='_screenshots', filename=f'cua_selenium', ext='png')
|
354
|
+
|
355
|
+
match action.type:
|
356
|
+
case 'click':
|
357
|
+
self._logger.log(message="Action: click", color="blue", level="info")
|
358
|
+
driver.execute_script(f'window.scrollBy({x}, {y})')
|
359
|
+
action_chains.move_by_offset(xoffset=x, yoffset=y)
|
360
|
+
action_chains.perform()
|
361
|
+
|
362
|
+
if hasattr(action, 'button'):
|
363
|
+
match action.button:
|
364
|
+
case 'left':
|
365
|
+
action_chains.click()
|
366
|
+
case 'right':
|
367
|
+
action_chains.context_click()
|
368
|
+
action_chains.perform()
|
369
|
+
time.sleep(1)
|
370
|
+
|
371
|
+
case "scroll" | "move":
|
372
|
+
self._logger.log(message="Action: scroll", color="blue", level="info")
|
373
|
+
driver.execute_script(f'window.scrollBy({scroll_x}, {scroll_y})')
|
374
|
+
time.sleep(1)
|
375
|
+
|
376
|
+
case "keypress":
|
377
|
+
self._logger.log(message="Action: keypress", color="blue", level="info")
|
378
|
+
keys = action.keys
|
379
|
+
if keys:
|
380
|
+
for k in keys:
|
381
|
+
match k.lower():
|
382
|
+
case "enter": action_chains.key_down(Keys.ENTER).perform()
|
383
|
+
case "space": action_chains.key_down(Keys.SPACE).perform()
|
384
|
+
case "select_all":
|
385
|
+
if platform.system() == 'Darwin':
|
386
|
+
action_chains.send_keys(Keys.COMMAND + "a").perform()
|
387
|
+
else:
|
388
|
+
action_chains.send_keys(Keys.CONTROL + "a").perform()
|
389
|
+
case _:
|
390
|
+
action_chains.key_down(Keys.SHIFT).send_keys(k).key_up(Keys.SHIFT).perform()
|
391
|
+
time.sleep(1)
|
392
|
+
|
393
|
+
case "type":
|
394
|
+
self._logger.log(message="Action: type", color="blue", level="info")
|
395
|
+
action_chains.send_keys(text).perform()
|
396
|
+
time.sleep(1)
|
397
|
+
|
398
|
+
case "wait":
|
399
|
+
self._logger.log(message="Action: wait", color="blue", level="info")
|
400
|
+
action_chains.pause(3)
|
401
|
+
|
402
|
+
case "screenshot":
|
403
|
+
self._logger.log(message="Action: screenshot", color="blue", level="info")
|
404
|
+
driver.save_screenshot(path)
|
405
|
+
time.sleep(1)
|
406
|
+
|
407
|
+
case _:
|
408
|
+
self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
|
409
|
+
return False
|
410
|
+
|
411
|
+
with open(path, "rb") as image_file:
|
412
|
+
res, usage = None, None
|
413
|
+
if image_file:
|
414
|
+
screenshot_base64 = base64.b64encode(image_file.read()).decode("utf-8")
|
415
|
+
res, _, usage = self._run(screenshot=screenshot_base64)
|
416
|
+
else:
|
417
|
+
res, _, usage = self._run()
|
418
|
+
|
419
|
+
self._usage.aggregate(metrics=usage)
|
420
|
+
if not res:
|
421
|
+
usage.record_errors(type=ErrorType.API)
|
422
|
+
break
|
423
|
+
|
424
|
+
actions = [v for k, v in res.items() if k =="action"] if res else []
|
425
|
+
action = actions[0] if actions else None
|
426
|
+
if not action:
|
427
|
+
self._logger.log(message="No action found.", color="yellow", level="warning")
|
428
|
+
break
|
288
429
|
else:
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
430
|
+
self._usage.record_errors(type=ErrorType.TOOL)
|
431
|
+
|
432
|
+
end_dt = datetime.datetime.now()
|
433
|
+
self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
|
434
|
+
return res
|
435
|
+
|
294
436
|
|
295
|
-
|
437
|
+
def run(self) -> Tuple[Dict[str, Any], None, UsageMetrics]:
|
438
|
+
"""Core function to execute the tool."""
|
439
|
+
|
440
|
+
res = None
|
441
|
+
try:
|
442
|
+
res = self.invoke_playwright()
|
443
|
+
except:
|
444
|
+
self._call_ids = []
|
445
|
+
self._calls = dict()
|
446
|
+
self._response_ids = []
|
447
|
+
res = self.invoke_selenium()
|
448
|
+
|
449
|
+
return res, None, self._usage
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: versionhq
|
3
|
-
Version: 1.2.4.
|
3
|
+
Version: 1.2.4.15
|
4
4
|
Summary: Autonomous agent networks for task automation with multi-step reasoning.
|
5
5
|
Author-email: Kuriko Iwai <kuriko@versi0n.io>
|
6
6
|
License: MIT License
|
@@ -77,6 +77,7 @@ Provides-Extra: tools
|
|
77
77
|
Requires-Dist: html2text>=2024.2.26; extra == "tools"
|
78
78
|
Requires-Dist: sec-api>=1.0.28; extra == "tools"
|
79
79
|
Requires-Dist: pytest-playwright>=0.7.0; extra == "tools"
|
80
|
+
Requires-Dist: selenium>=4.30.0; extra == "tools"
|
80
81
|
Provides-Extra: torch
|
81
82
|
Requires-Dist: torch>=2.6.0; extra == "torch"
|
82
83
|
Requires-Dist: torchvision>=0.21.0; extra == "torch"
|
@@ -1,9 +1,10 @@
|
|
1
|
-
versionhq/__init__.py,sha256=
|
1
|
+
versionhq/__init__.py,sha256=oV5jD7iS1ttOqwTAukwrhJlWGH_j93WfbzvQP-jesA4,3346
|
2
2
|
versionhq/_prompt/auto_feedback.py,sha256=bbj37yTa11lRHpx-sV_Wmpb4dVnDBB7_v8ageUobHXY,3780
|
3
3
|
versionhq/_prompt/constants.py,sha256=DOwUFnVVObEFqgnaMCDnW8fnw1oPMgS8JAqOiTuqleI,932
|
4
|
-
versionhq/_prompt/model.py,sha256=
|
5
|
-
versionhq/_utils/__init__.py,sha256=
|
4
|
+
versionhq/_prompt/model.py,sha256=wi9ZhdNA-BzsWHEwrl0yP3ZNoqGJSLzZGyuJH04DJjQ,8293
|
5
|
+
versionhq/_utils/__init__.py,sha256=S3GvJKOTHM43JzPdaDqT6Zkan9eQJpc4biqQBXiVq6o,481
|
6
6
|
versionhq/_utils/convert_img_url.py,sha256=BlINw4RQ632m9P4FJbqzqYlzTLESBTRkhkstAopnNNY,408
|
7
|
+
versionhq/_utils/handle_directory.py,sha256=n5y2ClC4A3f6rkv8XDfzoCqJcw-8sCJ0Q5q_ZiQ5uxw,417
|
7
8
|
versionhq/_utils/i18n.py,sha256=TwA_PnYfDLA6VqlUDPuybdV9lgi3Frh_ASsb_X8jJo8,1483
|
8
9
|
versionhq/_utils/is_valid_enum.py,sha256=vGGIuvhDnFU2fUyyFxJyjw-NfByK0vfFAu1ShaHBeZE,720
|
9
10
|
versionhq/_utils/is_valid_url.py,sha256=m8Mswvb-90FJtx1Heq6hPFDbwGgrv_R3wSbZQmEPM9Q,379
|
@@ -14,7 +15,7 @@ versionhq/_utils/usage_metrics.py,sha256=gDK6fZgT1njX4iPIPFapWxfxIiz-zZYv72p0u6M
|
|
14
15
|
versionhq/_utils/vars.py,sha256=bZ5Dx_bFKlt3hi4-NNGXqdk7B23If_WaTIju2fiTyPQ,57
|
15
16
|
versionhq/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
17
|
versionhq/agent/inhouse_agents.py,sha256=D2WAiXCYsnQK3_Fe7CbbtvXsHWOaN6vde6m_QoW7fH4,2629
|
17
|
-
versionhq/agent/model.py,sha256=
|
18
|
+
versionhq/agent/model.py,sha256=9L7277HnY3rZL_-_aCStSskgivFNtqvQUq04ZBMbTac,27010
|
18
19
|
versionhq/agent/parser.py,sha256=riG0dkdQCxH7uJ0AbdVdg7WvL0BXhUgJht0VtQvxJBc,4082
|
19
20
|
versionhq/agent/rpm_controller.py,sha256=grezIxyBci_lDlwAlgWFRyR5KOocXeOhYkgN02dNFNE,2360
|
20
21
|
versionhq/agent/TEMPLATES/Backstory.py,sha256=dkfuATUQ2g2WoUKkmgAIch-RB--bektGoQaUlsDOn0g,529
|
@@ -38,8 +39,8 @@ versionhq/knowledge/source.py,sha256=-hEUPtJUHHMx4rUKtiHl19J8xAMw-WVBw34zwa2jZ08
|
|
38
39
|
versionhq/knowledge/source_docling.py,sha256=XpavmLvh4dLcuTikj8MCE9KG52oQMafy7_wBneliMK0,4994
|
39
40
|
versionhq/knowledge/storage.py,sha256=Kd-4r6aWM5EDaoXrzKXbgi1hY6tysSQARPGXM95qMmU,8266
|
40
41
|
versionhq/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
-
versionhq/llm/llm_vars.py,sha256=
|
42
|
-
versionhq/llm/model.py,sha256=
|
42
|
+
versionhq/llm/llm_vars.py,sha256=msX_Sgv5Tycu_GGY9C8Mn1xNW-iDF-Jsq9SIjhgQKiA,9243
|
43
|
+
versionhq/llm/model.py,sha256=1dDObVKJ3M-zK0oCxivG_aTXgcx_M05h1AbVtlGf57I,18697
|
43
44
|
versionhq/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
45
|
versionhq/memory/contextual_memory.py,sha256=QEMVvHuEXxY7M6-12S8HhyFKf108KfX8Zzt7paPW048,3882
|
45
46
|
versionhq/memory/model.py,sha256=VQR1229t7GQPMItlGAHLtJrb6LrZfSoRA1DRW4z0SOU,8234
|
@@ -53,7 +54,7 @@ versionhq/storage/utils.py,sha256=r5ghA_ktdR2IuzlzKqZYCjsNxztEMzyhWLneA4cFuWY,74
|
|
53
54
|
versionhq/task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
55
|
versionhq/task/evaluation.py,sha256=9jFOmjP-yy1vxRn781KmpdQ_d4J_ZA1UX_21Q3m-iuE,4122
|
55
56
|
versionhq/task/formatter.py,sha256=N8Kmk9vtrMtBdgJ8J7RmlKNMdZWSmV8O1bDexmCWgU0,643
|
56
|
-
versionhq/task/model.py,sha256
|
57
|
+
versionhq/task/model.py,sha256=ApjV2JUe-gxRS8N0B6fBXzRFu-fQcna2gLlSKBhB_vM,29645
|
57
58
|
versionhq/task/structured_response.py,sha256=tqOHpch8CVmMj0aZXjdDWtPNcVmBW8DVZnBvPBwS4PM,5053
|
58
59
|
versionhq/task/TEMPLATES/Description.py,sha256=hKhpbz0ztbkUMXz9KiL-P40fis9OB5ICOdL9jCtgAhU,864
|
59
60
|
versionhq/task_graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -70,12 +71,12 @@ versionhq/tool/composio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
70
71
|
versionhq/tool/composio/model.py,sha256=GIFKso_e_4a3BdaulqU_i6Y9JFAExNBjzHUHR_zZeSI,8577
|
71
72
|
versionhq/tool/composio/params.py,sha256=FvBuEXsOQUYnN7RTFxT20kAkiEYkxWKkiVtgpqOzKZQ,1843
|
72
73
|
versionhq/tool/gpt/__init__.py,sha256=A6xCuf_GUBs7wfx904J_Vd2t1GJCcf0lMKOL7MbZce4,160
|
73
|
-
versionhq/tool/gpt/_enum.py,sha256=
|
74
|
-
versionhq/tool/gpt/cua.py,sha256=
|
74
|
+
versionhq/tool/gpt/_enum.py,sha256=iBtH964dyv6d326VXSJsthB7EKxFXLcZVQPfvaCtbdk,496
|
75
|
+
versionhq/tool/gpt/cua.py,sha256=vdrPest2wWntMEKyvXcsR4WeivP5edE8B4rKqQbgHHY,19108
|
75
76
|
versionhq/tool/gpt/file_search.py,sha256=r5JVlf-epKB8DDXyrzlkezguHUMir0JW-77LUHoy-w8,5813
|
76
77
|
versionhq/tool/gpt/web_search.py,sha256=bpqEQopbq9KtqQ_0W7QAAJ5TyoKGiVM94-SMp5oqNFE,3483
|
77
|
-
versionhq-1.2.4.
|
78
|
-
versionhq-1.2.4.
|
79
|
-
versionhq-1.2.4.
|
80
|
-
versionhq-1.2.4.
|
81
|
-
versionhq-1.2.4.
|
78
|
+
versionhq-1.2.4.15.dist-info/licenses/LICENSE,sha256=cRoGGdM73IiDs6nDWKqPlgSv7aR4n-qBXYnJlCMHCeE,1082
|
79
|
+
versionhq-1.2.4.15.dist-info/METADATA,sha256=0wsn8Zh2QnUwCo8LxL6Lmm9HuB9s67v_KYsVVkajqBQ,21399
|
80
|
+
versionhq-1.2.4.15.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
81
|
+
versionhq-1.2.4.15.dist-info/top_level.txt,sha256=DClQwxDWqIUGeRJkA8vBlgeNsYZs4_nJWMonzFt5Wj0,10
|
82
|
+
versionhq-1.2.4.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|