versionhq 1.2.4.5__py3-none-any.whl → 1.2.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. versionhq/__init__.py +12 -3
  2. versionhq/_prompt/auto_feedback.py +1 -1
  3. versionhq/_prompt/model.py +11 -8
  4. versionhq/_utils/__init__.py +2 -0
  5. versionhq/_utils/convert_img_url.py +15 -0
  6. versionhq/_utils/is_valid_enum.py +25 -0
  7. versionhq/_utils/llm_as_a_judge.py +0 -1
  8. versionhq/_utils/usage_metrics.py +35 -14
  9. versionhq/agent/model.py +91 -27
  10. versionhq/agent_network/formation.py +3 -9
  11. versionhq/agent_network/model.py +3 -4
  12. versionhq/clients/customer/__init__.py +2 -2
  13. versionhq/clients/product/model.py +4 -4
  14. versionhq/clients/workflow/model.py +1 -1
  15. versionhq/llm/llm_vars.py +0 -2
  16. versionhq/llm/model.py +1 -1
  17. versionhq/storage/task_output_storage.py +2 -2
  18. versionhq/task/evaluation.py +11 -2
  19. versionhq/task/model.py +72 -59
  20. versionhq/task_graph/model.py +30 -26
  21. versionhq/tool/composio/__init__.py +0 -0
  22. versionhq/tool/{composio_tool.py → composio/model.py} +4 -5
  23. versionhq/tool/gpt/__init__.py +6 -0
  24. versionhq/tool/gpt/_enum.py +28 -0
  25. versionhq/tool/gpt/cup.py +145 -0
  26. versionhq/tool/gpt/file_search.py +163 -0
  27. versionhq/tool/gpt/web_search.py +89 -0
  28. {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/METADATA +1 -1
  29. {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/RECORD +33 -25
  30. /versionhq/tool/{composio_tool_vars.py → composio/params.py} +0 -0
  31. {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/LICENSE +0 -0
  32. {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/WHEEL +0 -0
  33. {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/top_level.txt +0 -0
versionhq/task/model.py CHANGED
@@ -3,7 +3,7 @@ import threading
3
3
  import datetime
4
4
  import uuid
5
5
  import inspect
6
- import enum
6
+ from enum import IntEnum
7
7
  from concurrent.futures import Future
8
8
  from hashlib import md5
9
9
  from typing import Any, Dict, List, Set, Optional, Callable, Type
@@ -14,11 +14,15 @@ from pydantic_core import PydanticCustomError
14
14
 
15
15
  import versionhq as vhq
16
16
  from versionhq.task.evaluation import Evaluation, EvaluationItem
17
- from versionhq.tool.model import Tool, ToolSet
17
+ from versionhq.tool.model import Tool, ToolSet, BaseTool
18
+ from versionhq.tool.rag_tool import RagTool
19
+ from versionhq.tool.gpt.web_search import GPTToolWebSearch
20
+ from versionhq.tool.gpt.file_search import GPTToolFileSearch
21
+ from versionhq.tool.gpt.cup import GPTToolCUP
18
22
  from versionhq._utils import process_config, Logger, UsageMetrics, ErrorType
19
23
 
20
24
 
21
- class TaskExecutionType(enum.Enum):
25
+ class TaskExecutionType(IntEnum):
22
26
  """
23
27
  Enumeration to store task execution types of independent tasks without dependencies.
24
28
  """
@@ -174,14 +178,15 @@ class TaskOutput(BaseModel):
174
178
  """
175
179
  A class to store the final output of the given task in raw (string), json_dict, and pydantic class formats.
176
180
  """
177
-
178
- task_id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True, description="store Task ID")
179
- raw: str = Field(default="", description="Raw output of the task")
180
- json_dict: Dict[str, Any] = Field(default=None, description="`raw` converted to dictionary")
181
+ task_id: UUID4 = Field(default_factory=uuid.uuid4)
182
+ raw: str = Field(default="")
183
+ json_dict: Dict[str, Any] = Field(default=None)
181
184
  pydantic: Optional[Any] = Field(default=None)
182
185
  tool_output: Optional[Any] = Field(default=None, description="stores tool result when the task takes tool output as its final output")
183
186
  callback_output: Optional[Any] = Field(default=None, description="stores task or agent callback outcome")
187
+ annotations: Optional[Dict[str, Any]] = Field(default=None)
184
188
  evaluation: Optional[InstanceOf[Evaluation]] = Field(default=None, description="stores overall evaluation of the task output. stored in ltm")
189
+ usage: Optional[UsageMetrics] = Field(default=None)
185
190
 
186
191
 
187
192
  def _fetch_value_of(self, key: str = None) -> Any:
@@ -311,7 +316,7 @@ class Task(BaseModel):
311
316
  response_schema: Optional[Type[BaseModel] | List[ResponseField]] = Field(default=None, description="stores response format")
312
317
 
313
318
  # tool usage
314
- tools: Optional[List[ToolSet | Tool | Any]] = Field(default_factory=list, description="tools that the agent can use aside from their tools")
319
+ tools: Optional[List[Any]] = Field(default_factory=list, description="tools that the agent can use aside from their tools")
315
320
  can_use_agent_tools: bool = Field(default=True, description="whether the agent can use their own tools when executing the task")
316
321
  tool_res_as_final: bool = Field(default=False, description="when set True, tools res will be stored in the `TaskOutput`")
317
322
 
@@ -336,7 +341,6 @@ class Task(BaseModel):
336
341
  fsls: Optional[list[str]] = Field(default=None, description="stores ideal/weak responses")
337
342
 
338
343
  # recording
339
- _usage: UsageMetrics = PrivateAttr(default=None)
340
344
  _delegations: int = 0
341
345
  processed_agents: Set[str] = Field(default_factory=set, description="store keys of the agents that executed the task")
342
346
  output: Optional[TaskOutput] = Field(default=None, description="store the final TaskOutput object")
@@ -361,24 +365,30 @@ class Task(BaseModel):
361
365
  for field in required_fields:
362
366
  if getattr(self, field) is None:
363
367
  raise ValueError( f"{field} must be provided either directly or through config")
364
-
365
- self._usage = UsageMetrics(id=self.id)
366
368
  return self
367
369
 
368
370
 
369
371
  @model_validator(mode="after")
370
372
  def set_up_tools(self) -> Self:
371
- if not self.tools:
372
- pass
373
- else:
373
+ if self.tools:
374
374
  tool_list = []
375
375
  for item in self.tools:
376
- if isinstance(item, Tool) or isinstance(item, ToolSet):
377
- tool_list.append(item)
378
- elif (isinstance(item, dict) and "function" not in item) or isinstance(item, str):
379
- pass
380
- else:
381
- tool_list.append(item) # address custom tool
376
+ match item:
377
+ case Tool() | ToolSet() | BaseTool() | RagTool() | GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
378
+ tool_list.append(item)
379
+ case type(item, callable):
380
+ tool_list.append(Tool(func=item))
381
+ case dict():
382
+ tool = None
383
+ try:
384
+ tool = Tool(**item)
385
+ except:
386
+ try:
387
+ tool = RagTool(**item)
388
+ except:
389
+ pass
390
+ case _:
391
+ pass
382
392
  self.tools = tool_list
383
393
  return self
384
394
 
@@ -472,7 +482,6 @@ class Task(BaseModel):
472
482
  return output
473
483
  except:
474
484
  output = self._sanitize_raw_output(raw=raw)
475
- self._usage.record_errors(type=ErrorType.FORMAT)
476
485
  return output
477
486
 
478
487
 
@@ -637,44 +646,47 @@ class Task(BaseModel):
637
646
 
638
647
  start_dt = datetime.datetime.now()
639
648
  task_output: InstanceOf[TaskOutput] = None
640
- raw_output: str = None
641
- tool_output: str | list = None
642
- task_tools: List[List[InstanceOf[Tool]| InstanceOf[ToolSet] | Type[Tool]]] = []
643
649
  user_prompt, dev_prompt = None, None
644
650
 
645
- if self.tools:
646
- for item in self.tools:
647
- if isinstance(item, ToolSet) or isinstance(item, Tool) or type(item) == Tool:
648
- task_tools.append(item)
649
-
650
651
  if self.allow_delegation == True:
651
652
  agent_to_delegate = self._select_agent_to_delegate(agent=agent)
652
653
  agent = agent_to_delegate
653
654
  self._delegations += 1
654
655
 
655
- if self.tool_res_as_final == True:
656
- user_prompt, dev_prompt, tool_output = agent.execute_task(task=self, context=context, task_tools=task_tools)
657
- raw_output = str(tool_output) if tool_output else ""
658
- if not raw_output:
659
- self._usage.record_errors(type=ErrorType.TOOL)
660
- task_output = TaskOutput(task_id=self.id, tool_output=tool_output, raw=raw_output)
656
+ user_prompt, dev_prompt, raw_output, usage = agent.execute_task(task=self, context=context)
657
+ match raw_output:
658
+ case TaskOutput():
659
+ raw_output.task_id = self.id
660
+ raw_output.usage = usage
661
+ task_output = raw_output
662
+
663
+ case str():
664
+ json_dict_output = self._create_json_output(raw=raw_output)
665
+ if "outcome" in json_dict_output:
666
+ json_dict_output = self._create_json_output(raw=str(json_dict_output["outcome"]))
667
+
668
+ pydantic_output = self._create_pydantic_output(raw=raw_output, json_dict=json_dict_output)
669
+ task_output = TaskOutput(
670
+ task_id=self.id,
671
+ raw=raw_output if raw_output is not None else "",
672
+ pydantic=pydantic_output,
673
+ json_dict=json_dict_output,
674
+ tool_output=raw_output if self.tool_res_as_final else None,
675
+ usage=usage
676
+ )
661
677
 
662
- else:
663
- user_prompt, dev_prompt, raw_output = agent.execute_task(task=self, context=context, task_tools=task_tools)
664
- json_dict_output = self._create_json_output(raw=raw_output)
665
- if "outcome" in json_dict_output:
666
- json_dict_output = self._create_json_output(raw=str(json_dict_output["outcome"]))
667
-
668
- pydantic_output = self._create_pydantic_output(raw=raw_output, json_dict=json_dict_output)
669
-
670
- task_output = TaskOutput(
671
- task_id=self.id,
672
- raw=raw_output if raw_output is not None else "",
673
- pydantic=pydantic_output,
674
- json_dict=json_dict_output,
675
- )
678
+ case None | "":
679
+ task_output = TaskOutput(task_id=self.id, raw="", usage=usage)
680
+ task_output.usage.record_errors(type=ErrorType.FORMAT)
681
+
682
+ case _:
683
+ task_output = TaskOutput(
684
+ task_id=self.id,
685
+ raw=raw_output,
686
+ tool_output=raw_output if self.tool_res_as_final else None,
687
+ usage=usage
688
+ )
676
689
 
677
- self.output = task_output
678
690
  self.processed_agents.add(agent.key)
679
691
 
680
692
  # if self.output_file: ## disabled for now
@@ -690,10 +702,9 @@ class Task(BaseModel):
690
702
  self._pfg.user_prompts.update({ index: user_prompt })
691
703
  self._pfg.dev_prompts.update({ index: dev_prompt })
692
704
 
693
- if raw_output:
705
+ if task_output.raw:
694
706
  if self.should_evaluate:
695
707
  task_output.evaluate(task=self)
696
- self.output = task_output
697
708
 
698
709
  self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
699
710
 
@@ -704,11 +715,14 @@ class Task(BaseModel):
704
715
  valid_kwargs = { k: kwargs[k] if k in kwargs else None for k in valid_keys }
705
716
  callback_res = self.callback(**valid_kwargs)
706
717
  task_output.callback_output = callback_res
707
- self.output = task_output
708
- self._store_logs()
709
718
 
710
719
  end_dt = datetime.datetime.now()
711
- self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
720
+ task_output.usage.record_latency(start_dt=start_dt, end_dt=end_dt)
721
+ if task_output.json_dict and "output" in task_output.json_dict:
722
+ task_output.usage.record_errors(type=ErrorType.FORMAT)
723
+
724
+ self.output = task_output
725
+ self._store_logs()
712
726
  return task_output
713
727
 
714
728
 
@@ -719,7 +733,6 @@ class Task(BaseModel):
719
733
  from versionhq._prompt.model import Prompt
720
734
  from versionhq._prompt.auto_feedback import PromptFeedbackGraph
721
735
 
722
- # self._usage = None
723
736
  prompt = Prompt(task=self, agent=agent, context=context)
724
737
  pfg = PromptFeedbackGraph(prompt=prompt, should_reform=self.human, reform_trigger_event=ReformTriggerEvent.USER_INPUT if self.human else None)
725
738
  pfg = pfg.set_up_graph()
@@ -728,11 +741,11 @@ class Task(BaseModel):
728
741
  try:
729
742
  if self._pfg and self.output is None:
730
743
  res, all_outputs = self._pfg.activate()
731
- if all_outputs: self._usage = self._pfg._usage
744
+ if all_outputs:
745
+ res.usage = self._pfg.usage
732
746
  return res
733
-
734
747
  except:
735
- self._usage.record_errors(type=ErrorType.API)
748
+ self._pfg.usage.record_errors(type=ErrorType.API)
736
749
  Logger().log(level="error", message="Failed to execute the task.", color="red")
737
750
  return None
738
751
 
@@ -1,15 +1,16 @@
1
1
  import matplotlib
2
2
  matplotlib.use('agg')
3
3
 
4
- import enum
5
4
  import uuid
6
- import networkx as nx
7
- import matplotlib.pyplot as plt
5
+ import datetime
6
+ from enum import IntEnum, Enum
8
7
  from abc import ABC
9
8
  from concurrent.futures import Future
10
9
  from typing import List, Any, Optional, Callable, Dict, Type, Tuple
11
10
  from typing_extensions import Self
12
11
 
12
+ import networkx as nx
13
+ import matplotlib.pyplot as plt
13
14
  from pydantic import BaseModel, InstanceOf, Field, UUID4, field_validator, model_validator
14
15
  from pydantic_core import PydanticCustomError
15
16
 
@@ -18,13 +19,13 @@ from versionhq.task.model import Task, TaskOutput, Evaluation, ResponseField
18
19
  from versionhq._utils import Logger, UsageMetrics, ErrorType
19
20
 
20
21
 
21
- class ReformTriggerEvent(enum.Enum):
22
+ class ReformTriggerEvent(IntEnum):
22
23
  USER_INPUT = 1 # ask human
23
24
  TEST_TIME_COMPUTATION = 2 # mismatch between actual responses and expected outcome
24
25
  ERROR_DETECTION = 3 # response error
25
26
 
26
27
 
27
- class ConditionType(enum.Enum):
28
+ class ConditionType(IntEnum):
28
29
  AND = 1
29
30
  OR = 2
30
31
 
@@ -71,7 +72,7 @@ class Condition(BaseModel):
71
72
  return bool(len([item for item in cond_list if item == True]) == len(cond_list))
72
73
 
73
74
 
74
- class TaskStatus(enum.Enum):
75
+ class TaskStatus(IntEnum):
75
76
  """
76
77
  Enum to track the task execution status
77
78
  """
@@ -84,7 +85,7 @@ class TaskStatus(enum.Enum):
84
85
  ERROR = 7 # tried task execute but returned error. resupmtion follows edge weights and agent settings
85
86
 
86
87
 
87
- class DependencyType(enum.Enum):
88
+ class DependencyType(str, Enum):
88
89
  """
89
90
  Concise enumeration of the edge type.
90
91
  """
@@ -393,14 +394,13 @@ class Graph(ABC, BaseModel):
393
394
 
394
395
 
395
396
  class TaskGraph(Graph):
396
- _usage: Optional[UsageMetrics] = None
397
-
398
397
  id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True)
399
398
  should_reform: bool = False
400
399
  reform_trigger_event: Optional[ReformTriggerEvent] = None
401
400
  outputs: Dict[str, TaskOutput] = Field(default_factory=dict, description="stores node identifier and TaskOutput")
402
401
  concl_response_schema: Optional[List[ResponseField] | Type[BaseModel]] = Field(default=None, description="stores final response schema in Pydantic class or response fields")
403
402
  concl: Optional[TaskOutput] = Field(default=None, description="stores the final or latest conclusion of the entire task graph")
403
+ usage: Optional[UsageMetrics] = None
404
404
 
405
405
 
406
406
  def _save(self, title: str, abs_file_path: str = None) -> None:
@@ -420,18 +420,6 @@ class TaskGraph(Graph):
420
420
  Logger().log(level="error", message=f"Failed to save the graph {str(self.id)}: {str(e)}", color="red")
421
421
 
422
422
 
423
- def _handle_usage(self) -> None:
424
- """Returns total tokens and latency spended for the graph execution."""
425
- if not self.nodes:
426
- return None
427
-
428
- self._usage = self._usage if self._usage else UsageMetrics(id=self.id)
429
-
430
- for node in self.nodes.values():
431
- if node.task and node.task._usage:
432
- self._usage.aggregate(metrics=node.task._usage)
433
-
434
-
435
423
  def _handle_human_input(self) -> str | None:
436
424
  """Handles input from human."""
437
425
  request = None
@@ -448,12 +436,27 @@ class TaskGraph(Graph):
448
436
  Logger().log(message=f"Ok. regenerating the graph based on your input: ', {request}", level="info", color="blue")
449
437
  else:
450
438
  Logger().log(message="Cannot recognize your request.", level="error", color="red")
451
- self._usage = self._usage if self._usage else UsageMetrics(id=self.id)
452
- self._usage.record_errors(type=ErrorType.HUMAN_INTERACTION)
439
+ self.usage = self.usage if self.usage else UsageMetrics(id=self.id)
440
+ self.usage.record_errors(type=ErrorType.HUMAN_INTERACTION)
453
441
 
454
442
  return request
455
443
 
456
444
 
445
+ def _handle_usage(self, start_dt: datetime = None, end_dt: datetime = None) -> UsageMetrics:
446
+ usage = self.usage if self.usage else UsageMetrics(id=self.id)
447
+
448
+ if self.outputs:
449
+ for item in self.outputs.values():
450
+ if isinstance(item.usage, UsageMetrics):
451
+ usage = usage.aggregate(metrics=item.usage)
452
+
453
+ if start_dt and end_dt:
454
+ usage.record_latency(start_dt, end_dt)
455
+
456
+ self.usage = usage
457
+ return usage
458
+
459
+
457
460
  def add_task(self, task: Node | Task) -> Node:
458
461
  """Convert `task` to a Node object and add it to G"""
459
462
 
@@ -596,6 +599,7 @@ class TaskGraph(Graph):
596
599
  """
597
600
 
598
601
  Logger().log(color="blue", message=f"Start to activate the graph: {str(self.id)}", level="info")
602
+ start_dt = datetime.datetime.now()
599
603
 
600
604
  if target:
601
605
  if not [k for k in self.nodes.keys() if k == target]:
@@ -659,7 +663,6 @@ class TaskGraph(Graph):
659
663
  node_identifier = edge.target.identifier
660
664
  self.outputs.update({ node_identifier: res })
661
665
 
662
-
663
666
  if self.should_reform:
664
667
  target = [k for k in self.outputs.keys()][-1] if self.outputs else self.find_start_nodes()[0].identifier if self.find_start_nodes() else None
665
668
 
@@ -670,8 +673,9 @@ class TaskGraph(Graph):
670
673
 
671
674
  self.concl = res
672
675
  self.concl_response_schema = self.concl_response_schema if self.concl_response_schema else res.pydantic.__class__ if res.pydantic else None
673
- # last_task_output = [v for v in self.outputs.values()][len([v for v in self.outputs.values()]) - 1] if [v for v in self.outputs.values()] else None
674
- self._handle_usage()
676
+
677
+ end_dt = datetime.datetime.now()
678
+ self._handle_usage(start_dt, end_dt)
675
679
  return res, self.outputs
676
680
 
677
681
 
File without changes
@@ -7,10 +7,9 @@ from typing_extensions import Self
7
7
 
8
8
  from pydantic import BaseModel, Field, model_validator, field_validator, UUID4, PrivateAttr
9
9
  from pydantic_core import PydanticCustomError
10
-
11
10
  from composio import ComposioToolSet
12
11
 
13
- from versionhq.tool.composio_tool_vars import ComposioAppName, ComposioAuthScheme, composio_app_set, ComposioStatus, ComposioAction
12
+ from versionhq.tool.composio.params import ComposioAppName, ComposioAuthScheme, composio_app_set, ComposioStatus, ComposioAction
14
13
  from versionhq.tool.cache_handler import CacheHandler
15
14
  from versionhq._utils.logger import Logger
16
15
 
@@ -22,7 +21,7 @@ DEFAULT_USER_ID = os.environ.get("DEFAULT_USER_ID", None)
22
21
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
23
22
 
24
23
 
25
- class ComposioHandler(ABC, BaseModel):
24
+ class ComposioBaseTool(ABC, BaseModel):
26
25
  """
27
26
  A class to handle connecting account with Composio and executing actions using Composio ecosystem.
28
27
  `connected_account_id` is set up per `app_name` to call the actions on the given app. i.e., salesforce
@@ -78,8 +77,8 @@ class ComposioHandler(ABC, BaseModel):
78
77
  """
79
78
  Composio toolset on LangChain for action execution using LLM.
80
79
  """
81
- from composio_langchain import ComposioToolSet
82
- return ComposioToolSet(api_key=os.environ.get("COMPOSIO_API_KEY"), metadata={**metadata})
80
+ from composio_langchain import ComposioBaseToolSet
81
+ return ComposioBaseToolSet(api_key=os.environ.get("COMPOSIO_API_KEY"), metadata={**metadata})
83
82
 
84
83
 
85
84
  def _connect(
@@ -0,0 +1,6 @@
1
+ from dotenv import load_dotenv
2
+ load_dotenv(override=True)
3
+
4
+ import os
5
+ from openai import OpenAI
6
+ openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
@@ -0,0 +1,28 @@
1
+ from enum import Enum
2
+
3
+
4
+ class GPTSizeEnum(str, Enum):
5
+ LOW = "low"
6
+ MEDIUM = "medium"
7
+ HIGH = "high"
8
+
9
+
10
+ class GPTCUPEnvironmentEnum(str, Enum):
11
+ BROWSER = "browser"
12
+ MAC = "mac"
13
+ WINDOWS = "windows"
14
+ UNBUNTU = "ubuntu"
15
+
16
+
17
+ class GPTCUPTypeEnum(str, Enum):
18
+ COMPUTER_CALL_OUTPUT = "computer_call_output"
19
+ COMPUTER_USE_PREVIEW = "computer_use_preview"
20
+
21
+
22
+ class GPTFilterTypeEnum(str, Enum):
23
+ eq = "eq"
24
+ ne = "ne"
25
+ gt = "gt"
26
+ gte = "gte"
27
+ lt = "lt"
28
+ lte = "lte"
@@ -0,0 +1,145 @@
1
+ from typing import List, Dict, Any
2
+
3
+ from versionhq._utils import convert_img_url
4
+ from versionhq.tool.gpt import openai_client
5
+ from versionhq.tool.gpt._enum import GPTCUPEnvironmentEnum, GPTCUPTypeEnum, GPTSizeEnum
6
+ from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType
7
+
8
+
9
+ class CUPToolSchema:
10
+ type: str = GPTCUPTypeEnum.COMPUTER_USE_PREVIEW.value
11
+ display_width: int = 1024
12
+ display_height: int = 768
13
+ environment: str = GPTCUPEnvironmentEnum.BROWSER.value
14
+
15
+ def __init__(
16
+ self,
17
+ type: str | GPTCUPTypeEnum = None,
18
+ display_width: int = None,
19
+ display_height: int = None,
20
+ environment: str | GPTCUPEnvironmentEnum = None
21
+ ):
22
+ self.display_height = display_height if display_height else self.display_height
23
+ self.display_width = display_width if display_width else self.display_width
24
+
25
+ if type and is_valid_enum(enum=GPTCUPTypeEnum, val=type):
26
+ self.type = type.value if isinstance(type, GPTCUPTypeEnum) else type
27
+
28
+ if environment and is_valid_enum(enum=GPTCUPEnvironmentEnum, val=environment):
29
+ self.environment = environment.value if isinstance(environment, GPTCUPEnvironmentEnum) else environment
30
+
31
+ self.environment = environment if environment else self.environment
32
+
33
+
34
+ @property
35
+ def schema(self) -> Dict[str, Any]:
36
+ return {
37
+ "type": self.type if isinstance(self.type, str) else self.type.value,
38
+ "display_width": self.display_width,
39
+ "display_height": self.display_height,
40
+ "environment": self.environment if isinstance(self.environment, str) else self.environment.value,
41
+ }
42
+
43
+
44
+ class GPTToolCUP:
45
+ model: str = "computer-use-preview"
46
+ tools: List[CUPToolSchema] = list()
47
+ user_prompt: str = None
48
+ img_url: str = None
49
+ reasoning_effort: str = GPTSizeEnum.MEDIUM.value
50
+ truncation: str = "auto"
51
+
52
+ def __init__(
53
+ self,
54
+ user_prompt: str,
55
+ tools: List[CUPToolSchema] | CUPToolSchema = None,
56
+ img_url: str = None,
57
+ reasoning_effort: GPTSizeEnum | str = None,
58
+ truncation: str = None
59
+ ):
60
+ self.user_prompt = user_prompt
61
+ self.truncation = truncation if truncation else self.truncation
62
+
63
+ if img_url:
64
+ img_url = convert_img_url(img_url)
65
+ self.img_url = img_url
66
+
67
+ if reasoning_effort and is_valid_enum(enum=GPTSizeEnum, val=reasoning_effort):
68
+ self.reasoning_effort = reasoning_effort.value if isinstance(reasoning_effort, GPTSizeEnum) else reasoning_effort
69
+
70
+ if tools:
71
+ match tools:
72
+ case list():
73
+ if self.tools:
74
+ self.tools.extend(tools)
75
+ else:
76
+ self.tools = tools
77
+ case CUPToolSchema():
78
+ if self.tools:
79
+ self.tools.append(tools)
80
+ else:
81
+ self.tools = [tools]
82
+ case _:
83
+ pass
84
+
85
+ def run(self):
86
+ raw_res = ""
87
+ usage = UsageMetrics()
88
+
89
+ try:
90
+ res = openai_client.responses.create(**self.schema)
91
+ if not res:
92
+ usage.record_errors(ErrorType.TOOL)
93
+ else:
94
+ raw_res = res.output[1].summary[0].text
95
+ usage.record_token_usage(**res.usage.__dict__)
96
+ return raw_res, None, usage
97
+ except:
98
+ usage.record_errors(ErrorType.TOOL)
99
+ return raw_res, None, usage
100
+
101
+
102
+ @property
103
+ def schema(self) -> Dict[str, Any]:
104
+ img_url = convert_img_url(self.img_url) if self.img_url else None
105
+ inputs = [{ "role": "user", "content": self.user_prompt } ]
106
+
107
+ if img_url:
108
+ inputs.append({"type": "input_image", "image_url": f"data:image/png;base64,{img_url}"})
109
+
110
+ tool_schema = [item.schema for item in self.tools]
111
+ schema = dict(model=self.model, tools=tool_schema, input=inputs, reasoning={ "effort": self.reasoning_effort}, truncation=self.truncation)
112
+ return schema
113
+
114
+
115
+ # "output": [
116
+ # {
117
+ # "type": "reasoning",
118
+ # "id": "rs_67cb...",
119
+ # "summary": [
120
+ # {
121
+ # "type": "summary_text",
122
+ # "text": "Exploring 'File' menu option."
123
+ # }
124
+ # ]
125
+ # },
126
+ # {
127
+ # "type": "computer_call",
128
+ # "id": "cu_67cb...",
129
+ # "call_id": "call_nEJ...",
130
+ # "action": {
131
+ # "type": "click",
132
+ # "button": "left",
133
+ # "x": 135,
134
+ # "y": 193
135
+ # },
136
+ # "pending_safety_checks": [
137
+ # {
138
+ # "id": "cu_sc_67cb...",
139
+ # "code": "malicious_instructions",
140
+ # "message": "We've detected instructions that may cause your application to perform malicious or unauthorized actions. Please acknowledge this warning if you'd like to proceed."
141
+ # }
142
+ # ],
143
+ # "status": "completed"
144
+ # }
145
+ # ]