versionhq 1.2.4.5__py3-none-any.whl → 1.2.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versionhq/__init__.py +12 -3
- versionhq/_prompt/auto_feedback.py +1 -1
- versionhq/_prompt/model.py +11 -8
- versionhq/_utils/__init__.py +2 -0
- versionhq/_utils/convert_img_url.py +15 -0
- versionhq/_utils/is_valid_enum.py +25 -0
- versionhq/_utils/llm_as_a_judge.py +0 -1
- versionhq/_utils/usage_metrics.py +35 -14
- versionhq/agent/model.py +91 -27
- versionhq/agent_network/formation.py +3 -9
- versionhq/agent_network/model.py +3 -4
- versionhq/clients/customer/__init__.py +2 -2
- versionhq/clients/product/model.py +4 -4
- versionhq/clients/workflow/model.py +1 -1
- versionhq/llm/llm_vars.py +0 -2
- versionhq/llm/model.py +1 -1
- versionhq/storage/task_output_storage.py +2 -2
- versionhq/task/evaluation.py +11 -2
- versionhq/task/model.py +72 -59
- versionhq/task_graph/model.py +30 -26
- versionhq/tool/composio/__init__.py +0 -0
- versionhq/tool/{composio_tool.py → composio/model.py} +4 -5
- versionhq/tool/gpt/__init__.py +6 -0
- versionhq/tool/gpt/_enum.py +28 -0
- versionhq/tool/gpt/cup.py +145 -0
- versionhq/tool/gpt/file_search.py +163 -0
- versionhq/tool/gpt/web_search.py +89 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/METADATA +1 -1
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/RECORD +33 -25
- /versionhq/tool/{composio_tool_vars.py → composio/params.py} +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/LICENSE +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/WHEEL +0 -0
- {versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/top_level.txt +0 -0
versionhq/__init__.py
CHANGED
@@ -24,7 +24,10 @@ from versionhq.tool.model import Tool, ToolSet
|
|
24
24
|
from versionhq.tool.rag_tool import RagTool
|
25
25
|
from versionhq.tool.cache_handler import CacheHandler
|
26
26
|
from versionhq.tool.tool_handler import ToolHandler
|
27
|
-
from versionhq.tool.
|
27
|
+
from versionhq.tool.composio.model import ComposioBaseTool
|
28
|
+
from versionhq.tool.gpt.cup import GPTToolCUP, CUPToolSchema
|
29
|
+
from versionhq.tool.gpt.file_search import GPTToolFileSearch, FilterSchema
|
30
|
+
from versionhq.tool.gpt.web_search import GPTToolWebSearch
|
28
31
|
from versionhq.memory.contextual_memory import ContextualMemory
|
29
32
|
from versionhq.memory.model import ShortTermMemory,LongTermMemory, UserMemory, MemoryItem
|
30
33
|
|
@@ -32,7 +35,7 @@ from versionhq.agent_network.formation import form_agent_network
|
|
32
35
|
from versionhq.task_graph.draft import workflow
|
33
36
|
|
34
37
|
|
35
|
-
__version__ = "1.2.4.
|
38
|
+
__version__ = "1.2.4.7"
|
36
39
|
__all__ = [
|
37
40
|
"Agent",
|
38
41
|
|
@@ -85,7 +88,13 @@ __all__ = [
|
|
85
88
|
"RagTool",
|
86
89
|
"CacheHandler",
|
87
90
|
"ToolHandler",
|
88
|
-
"
|
91
|
+
"ComposioBaseTool",
|
92
|
+
|
93
|
+
"GPTToolCUP",
|
94
|
+
"CUPToolSchema",
|
95
|
+
"GPTToolFileSearch",
|
96
|
+
"FilterSchema",
|
97
|
+
"GPTToolWebSearch",
|
89
98
|
|
90
99
|
"ContextualMemory",
|
91
100
|
"ShortTermMemory",
|
@@ -5,7 +5,7 @@ from pydantic import InstanceOf, Field
|
|
5
5
|
|
6
6
|
from versionhq.agent.model import Agent
|
7
7
|
from versionhq.task.model import Task
|
8
|
-
from versionhq.task_graph.model import TaskGraph, Node, DependencyType
|
8
|
+
from versionhq.task_graph.model import TaskGraph, Node, DependencyType
|
9
9
|
from versionhq._prompt.model import Prompt
|
10
10
|
from versionhq._prompt.constants import REFLECT, INTEGRATE, parameter_sets
|
11
11
|
|
versionhq/_prompt/model.py
CHANGED
@@ -4,7 +4,7 @@ from textwrap import dedent
|
|
4
4
|
|
5
5
|
from pydantic import InstanceOf, BaseModel
|
6
6
|
|
7
|
-
from versionhq._utils import is_valid_url
|
7
|
+
from versionhq._utils import is_valid_url, convert_img_url
|
8
8
|
|
9
9
|
|
10
10
|
class Prompt:
|
@@ -99,12 +99,9 @@ Ref. Output image: {output_formats_to_follow}
|
|
99
99
|
content_messages = {}
|
100
100
|
|
101
101
|
if self.task.image:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
encoded_file = base64.b64encode(content).decode("utf-8")
|
106
|
-
img_url = f"data:image/jpeg;base64,{encoded_file}"
|
107
|
-
content_messages.update({ "type": "image_url", "image_url": { "url": img_url }})
|
102
|
+
img_url = convert_img_url(self.task.image)
|
103
|
+
if img_url:
|
104
|
+
content_messages.update({ "type": "image_url", "image_url": { "url": img_url }})
|
108
105
|
|
109
106
|
if self.task.file:
|
110
107
|
if is_valid_url(self.task.file):
|
@@ -146,7 +143,7 @@ Ref. Output image: {output_formats_to_follow}
|
|
146
143
|
return "\n".join(task_slices)
|
147
144
|
|
148
145
|
|
149
|
-
def format_core(self, rag_tools: List[Any] = None) -> Tuple[str, str, List[Dict[str, str]]]:
|
146
|
+
def format_core(self, rag_tools: List[Any] = None, gpt_tools: List[Any] = None) -> Tuple[str, str, List[Dict[str, str]]]:
|
150
147
|
"""Formats prompt messages sent to the LLM, then returns task prompt, developer prompt, and messages."""
|
151
148
|
|
152
149
|
from versionhq.knowledge._utils import extract_knowledge_context
|
@@ -168,6 +165,12 @@ Ref. Output image: {output_formats_to_follow}
|
|
168
165
|
if rag_tool_context:
|
169
166
|
user_prompt += ",".join(rag_tool_context) if isinstance(rag_tool_context, list) else str(rag_tool_context)
|
170
167
|
|
168
|
+
if gpt_tools:
|
169
|
+
for item in gpt_tools:
|
170
|
+
raw, _, _ = item.run()
|
171
|
+
if raw:
|
172
|
+
user_prompt += str(raw)
|
173
|
+
|
171
174
|
if self.agent.with_memory == True:
|
172
175
|
contextual_memory = ContextualMemory(
|
173
176
|
memory_config=self.agent.memory_config, stm=self.agent.short_term_memory, ltm=self.agent.long_term_memory, um=self.agent.user_memory
|
versionhq/_utils/__init__.py
CHANGED
@@ -3,3 +3,5 @@ from versionhq._utils.process_config import process_config
|
|
3
3
|
from versionhq._utils.vars import KNOWLEDGE_DIRECTORY, MAX_FILE_NAME_LENGTH
|
4
4
|
from versionhq._utils.is_valid_url import is_valid_url
|
5
5
|
from versionhq._utils.usage_metrics import UsageMetrics, ErrorType
|
6
|
+
from versionhq._utils.convert_img_url import convert_img_url
|
7
|
+
from versionhq._utils.is_valid_enum import is_valid_enum
|
@@ -0,0 +1,15 @@
|
|
1
|
+
import base64
|
2
|
+
|
3
|
+
def convert_img_url(img_url: str) -> str | None:
|
4
|
+
try:
|
5
|
+
with open(img_url, "rb") as file:
|
6
|
+
content = file.read()
|
7
|
+
if content:
|
8
|
+
encoded_file = base64.b64encode(content).decode("utf-8")
|
9
|
+
img_url = f"data:image/jpeg;base64,{encoded_file}"
|
10
|
+
return img_url
|
11
|
+
|
12
|
+
else: return None
|
13
|
+
|
14
|
+
except:
|
15
|
+
return None
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from enum import Enum, IntEnum
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
|
5
|
+
def is_valid_enum(enum: Enum | IntEnum, key: str = None, val: str | Enum | IntEnum = None) -> bool:
|
6
|
+
if not enum: return False
|
7
|
+
|
8
|
+
if key:
|
9
|
+
key = key.upper()
|
10
|
+
matched = [k for k in enum._member_map_.keys() if hasattr(enum, "_member_map_") and k == key]
|
11
|
+
return bool(matched)
|
12
|
+
|
13
|
+
elif val:
|
14
|
+
match val:
|
15
|
+
case str():
|
16
|
+
matched = [k for k in enum._value2member_map_.keys() if hasattr(enum, "_value2member_map_") and k == val]
|
17
|
+
return bool(matched)
|
18
|
+
|
19
|
+
case Enum() | IntEnum():
|
20
|
+
return val in enum
|
21
|
+
|
22
|
+
case _:
|
23
|
+
return False
|
24
|
+
|
25
|
+
else: return False
|
@@ -1,13 +1,13 @@
|
|
1
1
|
import uuid
|
2
|
-
import enum
|
3
2
|
import datetime
|
3
|
+
from enum import IntEnum
|
4
4
|
from typing import Dict, List
|
5
5
|
from typing_extensions import Self
|
6
6
|
|
7
7
|
from pydantic import BaseModel, UUID4, InstanceOf
|
8
8
|
|
9
9
|
|
10
|
-
class ErrorType(
|
10
|
+
class ErrorType(IntEnum):
|
11
11
|
FORMAT = 1
|
12
12
|
TOOL = 2
|
13
13
|
API = 3
|
@@ -22,19 +22,38 @@ class UsageMetrics(BaseModel):
|
|
22
22
|
total_tokens: int = 0
|
23
23
|
prompt_tokens: int = 0
|
24
24
|
completion_tokens: int = 0
|
25
|
+
input_tokens: int = 0
|
26
|
+
output_tokens: int = 0
|
25
27
|
successful_requests: int = 0
|
26
28
|
total_errors: int = 0
|
27
29
|
error_breakdown: Dict[ErrorType, int] = dict()
|
28
30
|
latency: float = 0.0 # in ms
|
29
31
|
|
30
|
-
|
32
|
+
|
33
|
+
def record_token_usage(self, *args, **kwargs) -> None:
|
31
34
|
"""Records usage metrics from the raw response of the model."""
|
32
35
|
|
33
|
-
if
|
34
|
-
for item in
|
35
|
-
|
36
|
-
|
37
|
-
|
36
|
+
if args:
|
37
|
+
for item in args:
|
38
|
+
match item:
|
39
|
+
case dict():
|
40
|
+
if hasattr(self, k):
|
41
|
+
setattr(self, k, int(getattr(self, k)) + int(v))
|
42
|
+
case UsageMetrics():
|
43
|
+
self = self.aggregate(metrics=item)
|
44
|
+
case _:
|
45
|
+
try:
|
46
|
+
self.completion_tokens += item.completion_tokens if hasattr(item, "completion_tokens") else 0
|
47
|
+
self.prompt_tokens += item.prompt_tokens if hasattr(item, "prompt_tokens") else 0
|
48
|
+
self.total_tokens += item.total_tokens if hasattr(item, "total_tokens") else 0
|
49
|
+
self.input_tokens += item.input_tokens if hasattr(item, "input_tokens") else 0
|
50
|
+
self.output_tokens += item.output_tokens if hasattr(item, "output_tokens") else 0
|
51
|
+
except:
|
52
|
+
pass
|
53
|
+
if kwargs:
|
54
|
+
for k, v in kwargs.items():
|
55
|
+
if hasattr(self, k):
|
56
|
+
setattr(self, k, int(getattr(self, k)) + int(v))
|
38
57
|
|
39
58
|
|
40
59
|
def record_errors(self, type: ErrorType = None) -> None:
|
@@ -54,12 +73,14 @@ class UsageMetrics(BaseModel):
|
|
54
73
|
if not metrics:
|
55
74
|
return self
|
56
75
|
|
57
|
-
self.total_tokens += metrics.total_tokens
|
58
|
-
self.prompt_tokens += metrics.prompt_tokens
|
59
|
-
self.completion_tokens += metrics.completion_tokens
|
60
|
-
self.
|
61
|
-
self.
|
62
|
-
self.
|
76
|
+
self.total_tokens += metrics.total_tokens
|
77
|
+
self.prompt_tokens += metrics.prompt_tokens
|
78
|
+
self.completion_tokens += metrics.completion_tokens
|
79
|
+
self.input_tokens += metrics.input_tokens
|
80
|
+
self.output_tokens += metrics.output_tokens
|
81
|
+
self.successful_requests += metrics.successful_requests
|
82
|
+
self.total_errors += metrics.total_errors
|
83
|
+
self.latency += metrics.latency
|
63
84
|
self.latency = round(self.latency, 3)
|
64
85
|
|
65
86
|
if metrics.error_breakdown:
|
versionhq/agent/model.py
CHANGED
@@ -11,7 +11,7 @@ from versionhq.agent.rpm_controller import RPMController
|
|
11
11
|
from versionhq.tool.model import Tool, ToolSet, BaseTool
|
12
12
|
from versionhq.knowledge.model import BaseKnowledgeSource, Knowledge
|
13
13
|
from versionhq.memory.model import ShortTermMemory, LongTermMemory, UserMemory
|
14
|
-
from versionhq._utils import Logger, process_config, is_valid_url, ErrorType
|
14
|
+
from versionhq._utils import Logger, process_config, is_valid_url, ErrorType, UsageMetrics
|
15
15
|
|
16
16
|
|
17
17
|
load_dotenv(override=True)
|
@@ -124,6 +124,9 @@ class Agent(BaseModel):
|
|
124
124
|
Similar to the LLM set up, when the agent has tools, we will declare them using the Tool class.
|
125
125
|
"""
|
126
126
|
from versionhq.tool.rag_tool import RagTool
|
127
|
+
from versionhq.tool.gpt.web_search import GPTToolWebSearch
|
128
|
+
from versionhq.tool.gpt.file_search import GPTToolFileSearch
|
129
|
+
from versionhq.tool.gpt.cup import GPTToolCUP
|
127
130
|
|
128
131
|
if not self.tools:
|
129
132
|
return self
|
@@ -131,7 +134,7 @@ class Agent(BaseModel):
|
|
131
134
|
tool_list = []
|
132
135
|
for item in self.tools:
|
133
136
|
match item:
|
134
|
-
case RagTool() | BaseTool():
|
137
|
+
case RagTool() | BaseTool() | GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
|
135
138
|
tool_list.append(item)
|
136
139
|
|
137
140
|
case Tool():
|
@@ -353,8 +356,8 @@ class Agent(BaseModel):
|
|
353
356
|
response_format: Optional[Dict[str, Any]] = None,
|
354
357
|
tools: Optional[List[InstanceOf[Tool]| InstanceOf[ToolSet] | Type[Tool]]] = None,
|
355
358
|
tool_res_as_final: bool = False,
|
356
|
-
task: Any = None
|
357
|
-
) ->
|
359
|
+
# task: Any = None
|
360
|
+
) -> Tuple[str, UsageMetrics]:
|
358
361
|
"""
|
359
362
|
Create formatted prompts using the developer prompt and the agent's backstory, then call the base model.
|
360
363
|
- Execute the task up to `self.max_retry_limit` times in case of receiving an error or empty response.
|
@@ -364,6 +367,7 @@ class Agent(BaseModel):
|
|
364
367
|
task_execution_counter = 0
|
365
368
|
iterations = 0
|
366
369
|
raw_response = None
|
370
|
+
usage = UsageMetrics()
|
367
371
|
|
368
372
|
try:
|
369
373
|
if self._rpm_controller and self.max_rpm:
|
@@ -373,17 +377,17 @@ class Agent(BaseModel):
|
|
373
377
|
|
374
378
|
if tool_res_as_final:
|
375
379
|
raw_response = self.func_calling_llm.call(messages=messages, tools=tools, tool_res_as_final=True)
|
376
|
-
|
380
|
+
usage.record_token_usage(*self.func_calling_llm._usages)
|
377
381
|
else:
|
378
382
|
raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools)
|
379
|
-
|
383
|
+
usage.record_token_usage(*self.llm._usages)
|
380
384
|
|
381
385
|
task_execution_counter += 1
|
382
386
|
Logger(**self._logger_config, filename=self.key).log(level="info", message=f"Agent response: {raw_response}", color="green")
|
383
|
-
return raw_response
|
387
|
+
return raw_response, usage
|
384
388
|
|
385
389
|
except Exception as e:
|
386
|
-
|
390
|
+
usage.record_errors(type=ErrorType.API)
|
387
391
|
Logger(**self._logger_config, filename=self.key).log(level="error", message=f"An error occured. The agent will retry: {str(e)}", color="red")
|
388
392
|
|
389
393
|
while not raw_response and task_execution_counter <= self.max_retry_limit:
|
@@ -392,12 +396,12 @@ class Agent(BaseModel):
|
|
392
396
|
self._rpm_controller.check_or_wait()
|
393
397
|
|
394
398
|
raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools)
|
395
|
-
|
399
|
+
usage.record_token_usage(*self.llm._usages)
|
396
400
|
iterations += 1
|
397
401
|
|
398
402
|
task_execution_counter += 1
|
399
403
|
Logger(**self._logger_config, filename=self.key).log(level="info", message=f"Agent #{task_execution_counter} response: {raw_response}", color="green")
|
400
|
-
return raw_response
|
404
|
+
return raw_response, usage
|
401
405
|
|
402
406
|
if not raw_response:
|
403
407
|
Logger(**self._logger_config, filename=self.key).log(level="error", message="Received None or empty response from the model", color="red")
|
@@ -423,6 +427,57 @@ class Agent(BaseModel):
|
|
423
427
|
return self.set_up_llm()
|
424
428
|
|
425
429
|
|
430
|
+
def _sort_tools(self, task = None) -> Tuple[List[Any], List[Any], List[Any]]:
|
431
|
+
"""Sorts agent and task tools by class."""
|
432
|
+
|
433
|
+
from versionhq.tool.rag_tool import RagTool
|
434
|
+
from versionhq.tool.gpt.web_search import GPTToolWebSearch
|
435
|
+
from versionhq.tool.gpt.file_search import GPTToolFileSearch
|
436
|
+
from versionhq.tool.gpt.cup import GPTToolCUP
|
437
|
+
|
438
|
+
all_tools = []
|
439
|
+
if task: all_tools = task.tools + self.tools if task.can_use_agent_tools else task.tools
|
440
|
+
else: all_tools = self.tools
|
441
|
+
|
442
|
+
rag_tools, gpt_tools, tools = [], [], []
|
443
|
+
if all_tools:
|
444
|
+
for item in all_tools:
|
445
|
+
match item:
|
446
|
+
case RagTool():
|
447
|
+
rag_tools.append(item)
|
448
|
+
|
449
|
+
case GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
|
450
|
+
gpt_tools.append(item)
|
451
|
+
|
452
|
+
case Tool() | BaseTool() | ToolSet():
|
453
|
+
tools.append(item)
|
454
|
+
|
455
|
+
return rag_tools, gpt_tools, tools
|
456
|
+
|
457
|
+
|
458
|
+
def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput
|
459
|
+
"""Generates k, v pairs from multiple GPT tool results and stores them in TaskOutput class."""
|
460
|
+
|
461
|
+
from versionhq.task.model import TaskOutput
|
462
|
+
from versionhq._utils import UsageMetrics
|
463
|
+
|
464
|
+
if not gpt_tools:
|
465
|
+
return
|
466
|
+
|
467
|
+
tool_res = dict()
|
468
|
+
annotation_set = dict()
|
469
|
+
total_usage = UsageMetrics()
|
470
|
+
|
471
|
+
for i, item in enumerate(gpt_tools):
|
472
|
+
raw, annotations, usage = item.run()
|
473
|
+
tool_res.update({ str(i): raw })
|
474
|
+
annotation_set.update({ str(i): annotations })
|
475
|
+
total_usage.aggregate(metrics=usage)
|
476
|
+
|
477
|
+
res = TaskOutput(raw=str(tool_res), tool_output=tool_res, usage=total_usage, annotations=annotation_set)
|
478
|
+
return res
|
479
|
+
|
480
|
+
|
426
481
|
def update(self, **kwargs) -> Self:
|
427
482
|
"""
|
428
483
|
Update the existing agent. Address variables that require runnning set_up_x methods first, then update remaining variables.
|
@@ -482,15 +537,21 @@ class Agent(BaseModel):
|
|
482
537
|
image: str = None,
|
483
538
|
file: str = None,
|
484
539
|
audio: str = None
|
485
|
-
) ->
|
540
|
+
) -> Any:
|
486
541
|
"""
|
487
542
|
Defines and executes a task, then returns TaskOutput object with the generated task.
|
488
543
|
"""
|
489
544
|
|
545
|
+
from versionhq.task.model import Task
|
546
|
+
|
490
547
|
if not self.role:
|
491
|
-
return None
|
548
|
+
return None, None
|
492
549
|
|
493
|
-
|
550
|
+
_, gpt_tools, _ = self._sort_tools()
|
551
|
+
|
552
|
+
if gpt_tools and tool_res_as_final == True:
|
553
|
+
res = self._handle_gpt_tools(gpt_tools=gpt_tools)
|
554
|
+
return res
|
494
555
|
|
495
556
|
class Output(BaseModel):
|
496
557
|
result: str
|
@@ -503,44 +564,47 @@ class Agent(BaseModel):
|
|
503
564
|
image=image, #REFINEME - query memory/knowledge or self create
|
504
565
|
file=file,
|
505
566
|
audio=audio,
|
567
|
+
can_use_agent_tools=True if self.tools else False,
|
506
568
|
)
|
507
569
|
res = task.execute(agent=self, context=context)
|
508
|
-
return res
|
570
|
+
return res
|
509
571
|
|
510
572
|
|
511
|
-
def execute_task(self, task, context: Optional[Any] = None
|
573
|
+
def execute_task(self, task, context: Optional[Any] = None) -> Tuple[str, str, Any, UsageMetrics]:
|
512
574
|
"""Handling task execution."""
|
513
575
|
|
514
|
-
from versionhq.task.model import Task
|
515
|
-
from versionhq.tool.rag_tool import RagTool
|
516
576
|
from versionhq._prompt.model import Prompt
|
577
|
+
from versionhq.task.model import Task
|
517
578
|
|
518
579
|
task: InstanceOf[Task] = task
|
519
|
-
|
520
|
-
|
521
|
-
|
580
|
+
rag_tools, gpt_tools, tools = self._sort_tools(task=task)
|
581
|
+
raw_response = ""
|
582
|
+
user_prompt, dev_prompt = "", ""
|
583
|
+
usage = UsageMetrics(id=task.id)
|
522
584
|
|
523
585
|
if self.max_rpm and self._rpm_controller:
|
524
586
|
self._rpm_controller._reset_request_count()
|
525
587
|
|
526
|
-
|
588
|
+
if task.tool_res_as_final == True and gpt_tools:
|
589
|
+
self._times_executed += 1
|
590
|
+
res = self._handle_gpt_tools(gpt_tools=gpt_tools)
|
591
|
+
return user_prompt, dev_prompt, res, res.usage
|
592
|
+
|
593
|
+
user_prompt, dev_prompt, messages = Prompt(task=task, agent=self, context=context).format_core(rag_tools=rag_tools, gpt_tools=gpt_tools)
|
527
594
|
|
528
595
|
try:
|
529
596
|
self._times_executed += 1
|
530
|
-
raw_response = self._invoke(
|
597
|
+
raw_response, usage = self._invoke(
|
531
598
|
messages=messages,
|
532
599
|
response_format=task._structure_response_format(model_provider=self.llm.provider),
|
533
600
|
tools=tools,
|
534
601
|
tool_res_as_final=task.tool_res_as_final,
|
535
|
-
task=task
|
536
602
|
)
|
537
|
-
if raw_response:
|
538
|
-
task._usage.successful_requests += 1
|
539
603
|
|
540
604
|
except Exception as e:
|
541
605
|
self._times_executed += 1
|
542
606
|
Logger(**self._logger_config, filename=self.key).log(level="error", message=f"The agent failed to execute the task. Error: {str(e)}", color="red")
|
543
|
-
user_prompt, dev_prompt, raw_response = self.execute_task(task, context
|
607
|
+
user_prompt, dev_prompt, raw_response, usage = self.execute_task(task, context)
|
544
608
|
|
545
609
|
if self._times_executed > self.max_retry_limit:
|
546
610
|
Logger(**self._logger_config, filename=self.key).log(level="error", message=f"Max retry limit has exceeded.", color="red")
|
@@ -549,7 +613,7 @@ class Agent(BaseModel):
|
|
549
613
|
if self.max_rpm and self._rpm_controller:
|
550
614
|
self._rpm_controller.stop_rpm_counter()
|
551
615
|
|
552
|
-
return user_prompt, dev_prompt, raw_response
|
616
|
+
return user_prompt, dev_prompt, raw_response, usage
|
553
617
|
|
554
618
|
|
555
619
|
@property
|
@@ -7,7 +7,7 @@ from versionhq.task.model import Task
|
|
7
7
|
from versionhq.agent.model import Agent
|
8
8
|
from versionhq.agent_network.model import AgentNetwork, Member, Formation
|
9
9
|
from versionhq.agent.inhouse_agents import vhq_formation_planner
|
10
|
-
from versionhq._utils import Logger
|
10
|
+
from versionhq._utils import Logger, is_valid_enum
|
11
11
|
|
12
12
|
import chromadb
|
13
13
|
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
@@ -83,14 +83,6 @@ def form_agent_network(
|
|
83
83
|
|
84
84
|
res = vhq_task.execute(agent=vhq_formation_planner, context=context)
|
85
85
|
|
86
|
-
formation_keys = []
|
87
|
-
if hasattr(res.pydantic, "formation"):
|
88
|
-
formation_keys = [k for k in Formation._member_map_.keys() if k == res.pydantic.formation.upper()]
|
89
|
-
elif "formation" in res.json_dict:
|
90
|
-
formation_keys = [k for k in Formation._member_map_.keys() if k == res.json_dict["formation"].upper()]
|
91
|
-
|
92
|
-
_formation = Formation[formation_keys[0]] if formation_keys else Formation.SUPERVISING
|
93
|
-
|
94
86
|
network_tasks = []
|
95
87
|
members = []
|
96
88
|
leader = res._fetch_value_of(key="leader_agent")
|
@@ -98,6 +90,8 @@ def form_agent_network(
|
|
98
90
|
created_agents = [Agent(role=str(item), goal=str(item)) for item in agent_roles] if agent_roles else []
|
99
91
|
task_descriptions = res._fetch_value_of(key="task_descriptions")
|
100
92
|
task_outcomes = res._fetch_value_of(key="task_outcomes")
|
93
|
+
formation_key = res.json_dict["formation"] if "formation" in res.json_dict else None
|
94
|
+
_formation = Formation[formation_key] if is_valid_enum(key=formation_key, enum=Formation) else Formation.SUPERVISING
|
101
95
|
|
102
96
|
if agents:
|
103
97
|
for i, agent in enumerate(created_agents):
|
versionhq/agent_network/model.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import uuid
|
2
2
|
import warnings
|
3
|
-
from enum import
|
4
|
-
from concurrent.futures import Future
|
3
|
+
from enum import IntEnum
|
5
4
|
from hashlib import md5
|
6
5
|
from typing import Any, Dict, List, Callable, Optional, Tuple
|
7
6
|
from typing_extensions import Self
|
@@ -30,7 +29,7 @@ GenerateSchema.match_type = match_type
|
|
30
29
|
warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
|
31
30
|
|
32
31
|
|
33
|
-
class Formation(
|
32
|
+
class Formation(IntEnum):
|
34
33
|
SOLO = 1
|
35
34
|
SUPERVISING = 2
|
36
35
|
SQUAD = 3
|
@@ -38,7 +37,7 @@ class Formation(str, Enum):
|
|
38
37
|
HYBRID = 10
|
39
38
|
|
40
39
|
|
41
|
-
class TaskHandlingProcess(
|
40
|
+
class TaskHandlingProcess(IntEnum):
|
42
41
|
"""
|
43
42
|
A class representing task handling processes to tackle multiple tasks.
|
44
43
|
When the agent network has multiple tasks that connect with edges, follow the edge conditions.
|
@@ -1,11 +1,11 @@
|
|
1
1
|
import uuid
|
2
|
-
from abc import ABC
|
3
|
-
from typing import
|
2
|
+
from abc import ABC
|
3
|
+
from typing import Optional, List
|
4
4
|
|
5
|
-
from pydantic import UUID4,
|
5
|
+
from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
|
6
6
|
from pydantic_core import PydanticCustomError
|
7
7
|
|
8
|
-
from versionhq.tool.
|
8
|
+
from versionhq.tool.composio.params import ComposioAppName
|
9
9
|
|
10
10
|
|
11
11
|
class ProductProvider(ABC, BaseModel):
|
@@ -10,7 +10,7 @@ from versionhq.agent.model import Agent
|
|
10
10
|
from versionhq.agent_network.model import AgentNetwork
|
11
11
|
from versionhq.clients.product.model import Product
|
12
12
|
from versionhq.clients.customer.model import Customer
|
13
|
-
from versionhq.tool.
|
13
|
+
from versionhq.tool.composio.params import ComposioAppName
|
14
14
|
|
15
15
|
|
16
16
|
class MessagingComponent(ABC, BaseModel):
|
versionhq/llm/llm_vars.py
CHANGED
versionhq/llm/model.py
CHANGED
@@ -313,7 +313,7 @@ class LLM(BaseModel):
|
|
313
313
|
cred = self._set_env_vars()
|
314
314
|
|
315
315
|
if self.provider == "gemini":
|
316
|
-
self.response_format = { "type": "json_object" } if not tools else None
|
316
|
+
self.response_format = { "type": "json_object" } if not tools and self.model != "gemini/gemini-2.0-flash-thinking-exp" else None
|
317
317
|
else:
|
318
318
|
self.response_format = response_format
|
319
319
|
|
@@ -147,8 +147,8 @@ class TaskOutputStorageHandler:
|
|
147
147
|
description=str(task.description),
|
148
148
|
raw=str(task.output.raw),
|
149
149
|
responsible_agents=str(task.processed_agents),
|
150
|
-
tokens=task.
|
151
|
-
latency=task.
|
150
|
+
tokens=task.output.usage.total_tokens,
|
151
|
+
latency=task.output.usage.latency,
|
152
152
|
score=task.output.aggregate_score if task.output.aggregate_score else "None",
|
153
153
|
)
|
154
154
|
self.storage.add(task=task, output=output_to_store, inputs=inputs)
|
versionhq/task/evaluation.py
CHANGED
@@ -79,6 +79,7 @@ class Evaluation(BaseModel):
|
|
79
79
|
Returns:
|
80
80
|
A pandas DataFrame with normalized 'weight' and 'score' columns, or an empty DataFrame if the input is empty.
|
81
81
|
"""
|
82
|
+
|
82
83
|
if not self.items:
|
83
84
|
return pd.DataFrame()
|
84
85
|
|
@@ -87,7 +88,6 @@ class Evaluation(BaseModel):
|
|
87
88
|
|
88
89
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
89
90
|
df[['weight', 'score']] = scaler.fit_transform(df[['weight', 'score']])
|
90
|
-
|
91
91
|
return df
|
92
92
|
|
93
93
|
|
@@ -98,7 +98,16 @@ class Evaluation(BaseModel):
|
|
98
98
|
|
99
99
|
df = self._normalize_df()
|
100
100
|
df['weighted_score'] = df['weight'] * df['score']
|
101
|
-
|
101
|
+
n = df['weighted_score'].sum()
|
102
|
+
if n == 0.0 or n == 1.0:
|
103
|
+
import math
|
104
|
+
s = [[item.score for item in self.items]]
|
105
|
+
w = [[item.weight for item in self.items]]
|
106
|
+
r = [math.sumprod(x, y) for x, y in zip(s, w)]
|
107
|
+
if r and sum(w[0]):
|
108
|
+
n = r[0] / sum(w[0])
|
109
|
+
|
110
|
+
aggregate_score = round(n, 3)
|
102
111
|
return aggregate_score
|
103
112
|
|
104
113
|
|