versionhq 1.2.4.5__py3-none-any.whl → 1.2.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versionhq/__init__.py +12 -3
- versionhq/_prompt/auto_feedback.py +1 -1
- versionhq/_prompt/model.py +11 -8
- versionhq/_utils/__init__.py +2 -0
- versionhq/_utils/convert_img_url.py +15 -0
- versionhq/_utils/is_valid_enum.py +25 -0
- versionhq/_utils/llm_as_a_judge.py +0 -1
- versionhq/_utils/usage_metrics.py +35 -14
- versionhq/agent/model.py +91 -27
- versionhq/agent_network/formation.py +3 -9
- versionhq/agent_network/model.py +3 -4
- versionhq/clients/customer/__init__.py +2 -2
- versionhq/clients/product/model.py +4 -4
- versionhq/clients/workflow/model.py +1 -1
- versionhq/llm/llm_vars.py +0 -2
- versionhq/llm/model.py +1 -1
- versionhq/storage/task_output_storage.py +2 -2
- versionhq/task/evaluation.py +11 -2
- versionhq/task/model.py +72 -59
- versionhq/task_graph/model.py +30 -26
- versionhq/tool/composio/__init__.py +0 -0
- versionhq/tool/{composio_tool.py → composio/model.py} +4 -5
- versionhq/tool/gpt/__init__.py +6 -0
- versionhq/tool/gpt/_enum.py +28 -0
- versionhq/tool/gpt/cup.py +145 -0
- versionhq/tool/gpt/file_search.py +163 -0
- versionhq/tool/gpt/web_search.py +89 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/METADATA +1 -1
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/RECORD +33 -25
- /versionhq/tool/{composio_tool_vars.py → composio/params.py} +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/LICENSE +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/WHEEL +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/top_level.txt +0 -0
versionhq/task/model.py
CHANGED
@@ -3,7 +3,7 @@ import threading
|
|
3
3
|
import datetime
|
4
4
|
import uuid
|
5
5
|
import inspect
|
6
|
-
import
|
6
|
+
from enum import IntEnum
|
7
7
|
from concurrent.futures import Future
|
8
8
|
from hashlib import md5
|
9
9
|
from typing import Any, Dict, List, Set, Optional, Callable, Type
|
@@ -14,11 +14,15 @@ from pydantic_core import PydanticCustomError
|
|
14
14
|
|
15
15
|
import versionhq as vhq
|
16
16
|
from versionhq.task.evaluation import Evaluation, EvaluationItem
|
17
|
-
from versionhq.tool.model import Tool, ToolSet
|
17
|
+
from versionhq.tool.model import Tool, ToolSet, BaseTool
|
18
|
+
from versionhq.tool.rag_tool import RagTool
|
19
|
+
from versionhq.tool.gpt.web_search import GPTToolWebSearch
|
20
|
+
from versionhq.tool.gpt.file_search import GPTToolFileSearch
|
21
|
+
from versionhq.tool.gpt.cup import GPTToolCUP
|
18
22
|
from versionhq._utils import process_config, Logger, UsageMetrics, ErrorType
|
19
23
|
|
20
24
|
|
21
|
-
class TaskExecutionType(
|
25
|
+
class TaskExecutionType(IntEnum):
|
22
26
|
"""
|
23
27
|
Enumeration to store task execution types of independent tasks without dependencies.
|
24
28
|
"""
|
@@ -174,14 +178,15 @@ class TaskOutput(BaseModel):
|
|
174
178
|
"""
|
175
179
|
A class to store the final output of the given task in raw (string), json_dict, and pydantic class formats.
|
176
180
|
"""
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
json_dict: Dict[str, Any] = Field(default=None, description="`raw` converted to dictionary")
|
181
|
+
task_id: UUID4 = Field(default_factory=uuid.uuid4)
|
182
|
+
raw: str = Field(default="")
|
183
|
+
json_dict: Dict[str, Any] = Field(default=None)
|
181
184
|
pydantic: Optional[Any] = Field(default=None)
|
182
185
|
tool_output: Optional[Any] = Field(default=None, description="stores tool result when the task takes tool output as its final output")
|
183
186
|
callback_output: Optional[Any] = Field(default=None, description="stores task or agent callback outcome")
|
187
|
+
annotations: Optional[Dict[str, Any]] = Field(default=None)
|
184
188
|
evaluation: Optional[InstanceOf[Evaluation]] = Field(default=None, description="stores overall evaluation of the task output. stored in ltm")
|
189
|
+
usage: Optional[UsageMetrics] = Field(default=None)
|
185
190
|
|
186
191
|
|
187
192
|
def _fetch_value_of(self, key: str = None) -> Any:
|
@@ -311,7 +316,7 @@ class Task(BaseModel):
|
|
311
316
|
response_schema: Optional[Type[BaseModel] | List[ResponseField]] = Field(default=None, description="stores response format")
|
312
317
|
|
313
318
|
# tool usage
|
314
|
-
tools: Optional[List[
|
319
|
+
tools: Optional[List[Any]] = Field(default_factory=list, description="tools that the agent can use aside from their tools")
|
315
320
|
can_use_agent_tools: bool = Field(default=True, description="whether the agent can use their own tools when executing the task")
|
316
321
|
tool_res_as_final: bool = Field(default=False, description="when set True, tools res will be stored in the `TaskOutput`")
|
317
322
|
|
@@ -336,7 +341,6 @@ class Task(BaseModel):
|
|
336
341
|
fsls: Optional[list[str]] = Field(default=None, description="stores ideal/weak responses")
|
337
342
|
|
338
343
|
# recording
|
339
|
-
_usage: UsageMetrics = PrivateAttr(default=None)
|
340
344
|
_delegations: int = 0
|
341
345
|
processed_agents: Set[str] = Field(default_factory=set, description="store keys of the agents that executed the task")
|
342
346
|
output: Optional[TaskOutput] = Field(default=None, description="store the final TaskOutput object")
|
@@ -361,24 +365,30 @@ class Task(BaseModel):
|
|
361
365
|
for field in required_fields:
|
362
366
|
if getattr(self, field) is None:
|
363
367
|
raise ValueError( f"{field} must be provided either directly or through config")
|
364
|
-
|
365
|
-
self._usage = UsageMetrics(id=self.id)
|
366
368
|
return self
|
367
369
|
|
368
370
|
|
369
371
|
@model_validator(mode="after")
|
370
372
|
def set_up_tools(self) -> Self:
|
371
|
-
if
|
372
|
-
pass
|
373
|
-
else:
|
373
|
+
if self.tools:
|
374
374
|
tool_list = []
|
375
375
|
for item in self.tools:
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
376
|
+
match item:
|
377
|
+
case Tool() | ToolSet() | BaseTool() | RagTool() | GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
|
378
|
+
tool_list.append(item)
|
379
|
+
case type(item, callable):
|
380
|
+
tool_list.append(Tool(func=item))
|
381
|
+
case dict():
|
382
|
+
tool = None
|
383
|
+
try:
|
384
|
+
tool = Tool(**item)
|
385
|
+
except:
|
386
|
+
try:
|
387
|
+
tool = RagTool(**item)
|
388
|
+
except:
|
389
|
+
pass
|
390
|
+
case _:
|
391
|
+
pass
|
382
392
|
self.tools = tool_list
|
383
393
|
return self
|
384
394
|
|
@@ -472,7 +482,6 @@ class Task(BaseModel):
|
|
472
482
|
return output
|
473
483
|
except:
|
474
484
|
output = self._sanitize_raw_output(raw=raw)
|
475
|
-
self._usage.record_errors(type=ErrorType.FORMAT)
|
476
485
|
return output
|
477
486
|
|
478
487
|
|
@@ -637,44 +646,47 @@ class Task(BaseModel):
|
|
637
646
|
|
638
647
|
start_dt = datetime.datetime.now()
|
639
648
|
task_output: InstanceOf[TaskOutput] = None
|
640
|
-
raw_output: str = None
|
641
|
-
tool_output: str | list = None
|
642
|
-
task_tools: List[List[InstanceOf[Tool]| InstanceOf[ToolSet] | Type[Tool]]] = []
|
643
649
|
user_prompt, dev_prompt = None, None
|
644
650
|
|
645
|
-
if self.tools:
|
646
|
-
for item in self.tools:
|
647
|
-
if isinstance(item, ToolSet) or isinstance(item, Tool) or type(item) == Tool:
|
648
|
-
task_tools.append(item)
|
649
|
-
|
650
651
|
if self.allow_delegation == True:
|
651
652
|
agent_to_delegate = self._select_agent_to_delegate(agent=agent)
|
652
653
|
agent = agent_to_delegate
|
653
654
|
self._delegations += 1
|
654
655
|
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
656
|
+
user_prompt, dev_prompt, raw_output, usage = agent.execute_task(task=self, context=context)
|
657
|
+
match raw_output:
|
658
|
+
case TaskOutput():
|
659
|
+
raw_output.task_id = self.id
|
660
|
+
raw_output.usage = usage
|
661
|
+
task_output = raw_output
|
662
|
+
|
663
|
+
case str():
|
664
|
+
json_dict_output = self._create_json_output(raw=raw_output)
|
665
|
+
if "outcome" in json_dict_output:
|
666
|
+
json_dict_output = self._create_json_output(raw=str(json_dict_output["outcome"]))
|
667
|
+
|
668
|
+
pydantic_output = self._create_pydantic_output(raw=raw_output, json_dict=json_dict_output)
|
669
|
+
task_output = TaskOutput(
|
670
|
+
task_id=self.id,
|
671
|
+
raw=raw_output if raw_output is not None else "",
|
672
|
+
pydantic=pydantic_output,
|
673
|
+
json_dict=json_dict_output,
|
674
|
+
tool_output=raw_output if self.tool_res_as_final else None,
|
675
|
+
usage=usage
|
676
|
+
)
|
661
677
|
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
pydantic=pydantic_output,
|
674
|
-
json_dict=json_dict_output,
|
675
|
-
)
|
678
|
+
case None | "":
|
679
|
+
task_output = TaskOutput(task_id=self.id, raw="", usage=usage)
|
680
|
+
task_output.usage.record_errors(type=ErrorType.FORMAT)
|
681
|
+
|
682
|
+
case _:
|
683
|
+
task_output = TaskOutput(
|
684
|
+
task_id=self.id,
|
685
|
+
raw=raw_output,
|
686
|
+
tool_output=raw_output if self.tool_res_as_final else None,
|
687
|
+
usage=usage
|
688
|
+
)
|
676
689
|
|
677
|
-
self.output = task_output
|
678
690
|
self.processed_agents.add(agent.key)
|
679
691
|
|
680
692
|
# if self.output_file: ## disabled for now
|
@@ -690,10 +702,9 @@ class Task(BaseModel):
|
|
690
702
|
self._pfg.user_prompts.update({ index: user_prompt })
|
691
703
|
self._pfg.dev_prompts.update({ index: dev_prompt })
|
692
704
|
|
693
|
-
if
|
705
|
+
if task_output.raw:
|
694
706
|
if self.should_evaluate:
|
695
707
|
task_output.evaluate(task=self)
|
696
|
-
self.output = task_output
|
697
708
|
|
698
709
|
self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
|
699
710
|
|
@@ -704,11 +715,14 @@ class Task(BaseModel):
|
|
704
715
|
valid_kwargs = { k: kwargs[k] if k in kwargs else None for k in valid_keys }
|
705
716
|
callback_res = self.callback(**valid_kwargs)
|
706
717
|
task_output.callback_output = callback_res
|
707
|
-
self.output = task_output
|
708
|
-
self._store_logs()
|
709
718
|
|
710
719
|
end_dt = datetime.datetime.now()
|
711
|
-
|
720
|
+
task_output.usage.record_latency(start_dt=start_dt, end_dt=end_dt)
|
721
|
+
if task_output.json_dict and "output" in task_output.json_dict:
|
722
|
+
task_output.usage.record_errors(type=ErrorType.FORMAT)
|
723
|
+
|
724
|
+
self.output = task_output
|
725
|
+
self._store_logs()
|
712
726
|
return task_output
|
713
727
|
|
714
728
|
|
@@ -719,7 +733,6 @@ class Task(BaseModel):
|
|
719
733
|
from versionhq._prompt.model import Prompt
|
720
734
|
from versionhq._prompt.auto_feedback import PromptFeedbackGraph
|
721
735
|
|
722
|
-
# self._usage = None
|
723
736
|
prompt = Prompt(task=self, agent=agent, context=context)
|
724
737
|
pfg = PromptFeedbackGraph(prompt=prompt, should_reform=self.human, reform_trigger_event=ReformTriggerEvent.USER_INPUT if self.human else None)
|
725
738
|
pfg = pfg.set_up_graph()
|
@@ -728,11 +741,11 @@ class Task(BaseModel):
|
|
728
741
|
try:
|
729
742
|
if self._pfg and self.output is None:
|
730
743
|
res, all_outputs = self._pfg.activate()
|
731
|
-
if all_outputs:
|
744
|
+
if all_outputs:
|
745
|
+
res.usage = self._pfg.usage
|
732
746
|
return res
|
733
|
-
|
734
747
|
except:
|
735
|
-
self.
|
748
|
+
self._pfg.usage.record_errors(type=ErrorType.API)
|
736
749
|
Logger().log(level="error", message="Failed to execute the task.", color="red")
|
737
750
|
return None
|
738
751
|
|
versionhq/task_graph/model.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
import matplotlib
|
2
2
|
matplotlib.use('agg')
|
3
3
|
|
4
|
-
import enum
|
5
4
|
import uuid
|
6
|
-
import
|
7
|
-
import
|
5
|
+
import datetime
|
6
|
+
from enum import IntEnum, Enum
|
8
7
|
from abc import ABC
|
9
8
|
from concurrent.futures import Future
|
10
9
|
from typing import List, Any, Optional, Callable, Dict, Type, Tuple
|
11
10
|
from typing_extensions import Self
|
12
11
|
|
12
|
+
import networkx as nx
|
13
|
+
import matplotlib.pyplot as plt
|
13
14
|
from pydantic import BaseModel, InstanceOf, Field, UUID4, field_validator, model_validator
|
14
15
|
from pydantic_core import PydanticCustomError
|
15
16
|
|
@@ -18,13 +19,13 @@ from versionhq.task.model import Task, TaskOutput, Evaluation, ResponseField
|
|
18
19
|
from versionhq._utils import Logger, UsageMetrics, ErrorType
|
19
20
|
|
20
21
|
|
21
|
-
class ReformTriggerEvent(
|
22
|
+
class ReformTriggerEvent(IntEnum):
|
22
23
|
USER_INPUT = 1 # ask human
|
23
24
|
TEST_TIME_COMPUTATION = 2 # mismatch between actual responses and expected outcome
|
24
25
|
ERROR_DETECTION = 3 # response error
|
25
26
|
|
26
27
|
|
27
|
-
class ConditionType(
|
28
|
+
class ConditionType(IntEnum):
|
28
29
|
AND = 1
|
29
30
|
OR = 2
|
30
31
|
|
@@ -71,7 +72,7 @@ class Condition(BaseModel):
|
|
71
72
|
return bool(len([item for item in cond_list if item == True]) == len(cond_list))
|
72
73
|
|
73
74
|
|
74
|
-
class TaskStatus(
|
75
|
+
class TaskStatus(IntEnum):
|
75
76
|
"""
|
76
77
|
Enum to track the task execution status
|
77
78
|
"""
|
@@ -84,7 +85,7 @@ class TaskStatus(enum.Enum):
|
|
84
85
|
ERROR = 7 # tried task execute but returned error. resupmtion follows edge weights and agent settings
|
85
86
|
|
86
87
|
|
87
|
-
class DependencyType(
|
88
|
+
class DependencyType(str, Enum):
|
88
89
|
"""
|
89
90
|
Concise enumeration of the edge type.
|
90
91
|
"""
|
@@ -393,14 +394,13 @@ class Graph(ABC, BaseModel):
|
|
393
394
|
|
394
395
|
|
395
396
|
class TaskGraph(Graph):
|
396
|
-
_usage: Optional[UsageMetrics] = None
|
397
|
-
|
398
397
|
id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True)
|
399
398
|
should_reform: bool = False
|
400
399
|
reform_trigger_event: Optional[ReformTriggerEvent] = None
|
401
400
|
outputs: Dict[str, TaskOutput] = Field(default_factory=dict, description="stores node identifier and TaskOutput")
|
402
401
|
concl_response_schema: Optional[List[ResponseField] | Type[BaseModel]] = Field(default=None, description="stores final response schema in Pydantic class or response fields")
|
403
402
|
concl: Optional[TaskOutput] = Field(default=None, description="stores the final or latest conclusion of the entire task graph")
|
403
|
+
usage: Optional[UsageMetrics] = None
|
404
404
|
|
405
405
|
|
406
406
|
def _save(self, title: str, abs_file_path: str = None) -> None:
|
@@ -420,18 +420,6 @@ class TaskGraph(Graph):
|
|
420
420
|
Logger().log(level="error", message=f"Failed to save the graph {str(self.id)}: {str(e)}", color="red")
|
421
421
|
|
422
422
|
|
423
|
-
def _handle_usage(self) -> None:
|
424
|
-
"""Returns total tokens and latency spended for the graph execution."""
|
425
|
-
if not self.nodes:
|
426
|
-
return None
|
427
|
-
|
428
|
-
self._usage = self._usage if self._usage else UsageMetrics(id=self.id)
|
429
|
-
|
430
|
-
for node in self.nodes.values():
|
431
|
-
if node.task and node.task._usage:
|
432
|
-
self._usage.aggregate(metrics=node.task._usage)
|
433
|
-
|
434
|
-
|
435
423
|
def _handle_human_input(self) -> str | None:
|
436
424
|
"""Handles input from human."""
|
437
425
|
request = None
|
@@ -448,12 +436,27 @@ class TaskGraph(Graph):
|
|
448
436
|
Logger().log(message=f"Ok. regenerating the graph based on your input: ', {request}", level="info", color="blue")
|
449
437
|
else:
|
450
438
|
Logger().log(message="Cannot recognize your request.", level="error", color="red")
|
451
|
-
self.
|
452
|
-
self.
|
439
|
+
self.usage = self.usage if self.usage else UsageMetrics(id=self.id)
|
440
|
+
self.usage.record_errors(type=ErrorType.HUMAN_INTERACTION)
|
453
441
|
|
454
442
|
return request
|
455
443
|
|
456
444
|
|
445
|
+
def _handle_usage(self, start_dt: datetime = None, end_dt: datetime = None) -> UsageMetrics:
|
446
|
+
usage = self.usage if self.usage else UsageMetrics(id=self.id)
|
447
|
+
|
448
|
+
if self.outputs:
|
449
|
+
for item in self.outputs.values():
|
450
|
+
if isinstance(item.usage, UsageMetrics):
|
451
|
+
usage = usage.aggregate(metrics=item.usage)
|
452
|
+
|
453
|
+
if start_dt and end_dt:
|
454
|
+
usage.record_latency(start_dt, end_dt)
|
455
|
+
|
456
|
+
self.usage = usage
|
457
|
+
return usage
|
458
|
+
|
459
|
+
|
457
460
|
def add_task(self, task: Node | Task) -> Node:
|
458
461
|
"""Convert `task` to a Node object and add it to G"""
|
459
462
|
|
@@ -596,6 +599,7 @@ class TaskGraph(Graph):
|
|
596
599
|
"""
|
597
600
|
|
598
601
|
Logger().log(color="blue", message=f"Start to activate the graph: {str(self.id)}", level="info")
|
602
|
+
start_dt = datetime.datetime.now()
|
599
603
|
|
600
604
|
if target:
|
601
605
|
if not [k for k in self.nodes.keys() if k == target]:
|
@@ -659,7 +663,6 @@ class TaskGraph(Graph):
|
|
659
663
|
node_identifier = edge.target.identifier
|
660
664
|
self.outputs.update({ node_identifier: res })
|
661
665
|
|
662
|
-
|
663
666
|
if self.should_reform:
|
664
667
|
target = [k for k in self.outputs.keys()][-1] if self.outputs else self.find_start_nodes()[0].identifier if self.find_start_nodes() else None
|
665
668
|
|
@@ -670,8 +673,9 @@ class TaskGraph(Graph):
|
|
670
673
|
|
671
674
|
self.concl = res
|
672
675
|
self.concl_response_schema = self.concl_response_schema if self.concl_response_schema else res.pydantic.__class__ if res.pydantic else None
|
673
|
-
|
674
|
-
|
676
|
+
|
677
|
+
end_dt = datetime.datetime.now()
|
678
|
+
self._handle_usage(start_dt, end_dt)
|
675
679
|
return res, self.outputs
|
676
680
|
|
677
681
|
|
File without changes
|
@@ -7,10 +7,9 @@ from typing_extensions import Self
|
|
7
7
|
|
8
8
|
from pydantic import BaseModel, Field, model_validator, field_validator, UUID4, PrivateAttr
|
9
9
|
from pydantic_core import PydanticCustomError
|
10
|
-
|
11
10
|
from composio import ComposioToolSet
|
12
11
|
|
13
|
-
from versionhq.tool.
|
12
|
+
from versionhq.tool.composio.params import ComposioAppName, ComposioAuthScheme, composio_app_set, ComposioStatus, ComposioAction
|
14
13
|
from versionhq.tool.cache_handler import CacheHandler
|
15
14
|
from versionhq._utils.logger import Logger
|
16
15
|
|
@@ -22,7 +21,7 @@ DEFAULT_USER_ID = os.environ.get("DEFAULT_USER_ID", None)
|
|
22
21
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
|
23
22
|
|
24
23
|
|
25
|
-
class
|
24
|
+
class ComposioBaseTool(ABC, BaseModel):
|
26
25
|
"""
|
27
26
|
A class to handle connecting account with Composio and executing actions using Composio ecosystem.
|
28
27
|
`connected_account_id` is set up per `app_name` to call the actions on the given app. i.e., salesforce
|
@@ -78,8 +77,8 @@ class ComposioHandler(ABC, BaseModel):
|
|
78
77
|
"""
|
79
78
|
Composio toolset on LangChain for action execution using LLM.
|
80
79
|
"""
|
81
|
-
from composio_langchain import
|
82
|
-
return
|
80
|
+
from composio_langchain import ComposioBaseToolSet
|
81
|
+
return ComposioBaseToolSet(api_key=os.environ.get("COMPOSIO_API_KEY"), metadata={**metadata})
|
83
82
|
|
84
83
|
|
85
84
|
def _connect(
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
|
4
|
+
class GPTSizeEnum(str, Enum):
|
5
|
+
LOW = "low"
|
6
|
+
MEDIUM = "medium"
|
7
|
+
HIGH = "high"
|
8
|
+
|
9
|
+
|
10
|
+
class GPTCUPEnvironmentEnum(str, Enum):
|
11
|
+
BROWSER = "browser"
|
12
|
+
MAC = "mac"
|
13
|
+
WINDOWS = "windows"
|
14
|
+
UNBUNTU = "ubuntu"
|
15
|
+
|
16
|
+
|
17
|
+
class GPTCUPTypeEnum(str, Enum):
|
18
|
+
COMPUTER_CALL_OUTPUT = "computer_call_output"
|
19
|
+
COMPUTER_USE_PREVIEW = "computer_use_preview"
|
20
|
+
|
21
|
+
|
22
|
+
class GPTFilterTypeEnum(str, Enum):
|
23
|
+
eq = "eq"
|
24
|
+
ne = "ne"
|
25
|
+
gt = "gt"
|
26
|
+
gte = "gte"
|
27
|
+
lt = "lt"
|
28
|
+
lte = "lte"
|
@@ -0,0 +1,145 @@
|
|
1
|
+
from typing import List, Dict, Any
|
2
|
+
|
3
|
+
from versionhq._utils import convert_img_url
|
4
|
+
from versionhq.tool.gpt import openai_client
|
5
|
+
from versionhq.tool.gpt._enum import GPTCUPEnvironmentEnum, GPTCUPTypeEnum, GPTSizeEnum
|
6
|
+
from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType
|
7
|
+
|
8
|
+
|
9
|
+
class CUPToolSchema:
|
10
|
+
type: str = GPTCUPTypeEnum.COMPUTER_USE_PREVIEW.value
|
11
|
+
display_width: int = 1024
|
12
|
+
display_height: int = 768
|
13
|
+
environment: str = GPTCUPEnvironmentEnum.BROWSER.value
|
14
|
+
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
type: str | GPTCUPTypeEnum = None,
|
18
|
+
display_width: int = None,
|
19
|
+
display_height: int = None,
|
20
|
+
environment: str | GPTCUPEnvironmentEnum = None
|
21
|
+
):
|
22
|
+
self.display_height = display_height if display_height else self.display_height
|
23
|
+
self.display_width = display_width if display_width else self.display_width
|
24
|
+
|
25
|
+
if type and is_valid_enum(enum=GPTCUPTypeEnum, val=type):
|
26
|
+
self.type = type.value if isinstance(type, GPTCUPTypeEnum) else type
|
27
|
+
|
28
|
+
if environment and is_valid_enum(enum=GPTCUPEnvironmentEnum, val=environment):
|
29
|
+
self.environment = environment.value if isinstance(environment, GPTCUPEnvironmentEnum) else environment
|
30
|
+
|
31
|
+
self.environment = environment if environment else self.environment
|
32
|
+
|
33
|
+
|
34
|
+
@property
|
35
|
+
def schema(self) -> Dict[str, Any]:
|
36
|
+
return {
|
37
|
+
"type": self.type if isinstance(self.type, str) else self.type.value,
|
38
|
+
"display_width": self.display_width,
|
39
|
+
"display_height": self.display_height,
|
40
|
+
"environment": self.environment if isinstance(self.environment, str) else self.environment.value,
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
class GPTToolCUP:
|
45
|
+
model: str = "computer-use-preview"
|
46
|
+
tools: List[CUPToolSchema] = list()
|
47
|
+
user_prompt: str = None
|
48
|
+
img_url: str = None
|
49
|
+
reasoning_effort: str = GPTSizeEnum.MEDIUM.value
|
50
|
+
truncation: str = "auto"
|
51
|
+
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
user_prompt: str,
|
55
|
+
tools: List[CUPToolSchema] | CUPToolSchema = None,
|
56
|
+
img_url: str = None,
|
57
|
+
reasoning_effort: GPTSizeEnum | str = None,
|
58
|
+
truncation: str = None
|
59
|
+
):
|
60
|
+
self.user_prompt = user_prompt
|
61
|
+
self.truncation = truncation if truncation else self.truncation
|
62
|
+
|
63
|
+
if img_url:
|
64
|
+
img_url = convert_img_url(img_url)
|
65
|
+
self.img_url = img_url
|
66
|
+
|
67
|
+
if reasoning_effort and is_valid_enum(enum=GPTSizeEnum, val=reasoning_effort):
|
68
|
+
self.reasoning_effort = reasoning_effort.value if isinstance(reasoning_effort, GPTSizeEnum) else reasoning_effort
|
69
|
+
|
70
|
+
if tools:
|
71
|
+
match tools:
|
72
|
+
case list():
|
73
|
+
if self.tools:
|
74
|
+
self.tools.extend(tools)
|
75
|
+
else:
|
76
|
+
self.tools = tools
|
77
|
+
case CUPToolSchema():
|
78
|
+
if self.tools:
|
79
|
+
self.tools.append(tools)
|
80
|
+
else:
|
81
|
+
self.tools = [tools]
|
82
|
+
case _:
|
83
|
+
pass
|
84
|
+
|
85
|
+
def run(self):
|
86
|
+
raw_res = ""
|
87
|
+
usage = UsageMetrics()
|
88
|
+
|
89
|
+
try:
|
90
|
+
res = openai_client.responses.create(**self.schema)
|
91
|
+
if not res:
|
92
|
+
usage.record_errors(ErrorType.TOOL)
|
93
|
+
else:
|
94
|
+
raw_res = res.output[1].summary[0].text
|
95
|
+
usage.record_token_usage(**res.usage.__dict__)
|
96
|
+
return raw_res, None, usage
|
97
|
+
except:
|
98
|
+
usage.record_errors(ErrorType.TOOL)
|
99
|
+
return raw_res, None, usage
|
100
|
+
|
101
|
+
|
102
|
+
@property
|
103
|
+
def schema(self) -> Dict[str, Any]:
|
104
|
+
img_url = convert_img_url(self.img_url) if self.img_url else None
|
105
|
+
inputs = [{ "role": "user", "content": self.user_prompt } ]
|
106
|
+
|
107
|
+
if img_url:
|
108
|
+
inputs.append({"type": "input_image", "image_url": f"data:image/png;base64,{img_url}"})
|
109
|
+
|
110
|
+
tool_schema = [item.schema for item in self.tools]
|
111
|
+
schema = dict(model=self.model, tools=tool_schema, input=inputs, reasoning={ "effort": self.reasoning_effort}, truncation=self.truncation)
|
112
|
+
return schema
|
113
|
+
|
114
|
+
|
115
|
+
# "output": [
|
116
|
+
# {
|
117
|
+
# "type": "reasoning",
|
118
|
+
# "id": "rs_67cb...",
|
119
|
+
# "summary": [
|
120
|
+
# {
|
121
|
+
# "type": "summary_text",
|
122
|
+
# "text": "Exploring 'File' menu option."
|
123
|
+
# }
|
124
|
+
# ]
|
125
|
+
# },
|
126
|
+
# {
|
127
|
+
# "type": "computer_call",
|
128
|
+
# "id": "cu_67cb...",
|
129
|
+
# "call_id": "call_nEJ...",
|
130
|
+
# "action": {
|
131
|
+
# "type": "click",
|
132
|
+
# "button": "left",
|
133
|
+
# "x": 135,
|
134
|
+
# "y": 193
|
135
|
+
# },
|
136
|
+
# "pending_safety_checks": [
|
137
|
+
# {
|
138
|
+
# "id": "cu_sc_67cb...",
|
139
|
+
# "code": "malicious_instructions",
|
140
|
+
# "message": "We've detected instructions that may cause your application to perform malicious or unauthorized actions. Please acknowledge this warning if you'd like to proceed."
|
141
|
+
# }
|
142
|
+
# ],
|
143
|
+
# "status": "completed"
|
144
|
+
# }
|
145
|
+
# ]
|