PyPI - versionhq - Versions diffs - 1.2.4.5__py3-none-any.whl → 1.2.4.7__py3-none-any.whl - Mend

versionhq 1.2.4.5py3-none-any.whl → 1.2.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

versionhq/__init__.py +12 -3
versionhq/_prompt/auto_feedback.py +1 -1
versionhq/_prompt/model.py +11 -8
versionhq/_utils/__init__.py +2 -0
versionhq/_utils/convert_img_url.py +15 -0
versionhq/_utils/is_valid_enum.py +25 -0
versionhq/_utils/llm_as_a_judge.py +0 -1
versionhq/_utils/usage_metrics.py +35 -14
versionhq/agent/model.py +91 -27
versionhq/agent_network/formation.py +3 -9
versionhq/agent_network/model.py +3 -4
versionhq/clients/customer/__init__.py +2 -2
versionhq/clients/product/model.py +4 -4
versionhq/clients/workflow/model.py +1 -1
versionhq/llm/llm_vars.py +0 -2
versionhq/llm/model.py +1 -1
versionhq/storage/task_output_storage.py +2 -2
versionhq/task/evaluation.py +11 -2
versionhq/task/model.py +72 -59
versionhq/task_graph/model.py +30 -26
versionhq/tool/composio/__init__.py +0 -0
versionhq/tool/{composio_tool.py → composio/model.py} +4 -5
versionhq/tool/gpt/__init__.py +6 -0
versionhq/tool/gpt/_enum.py +28 -0
versionhq/tool/gpt/cup.py +145 -0
versionhq/tool/gpt/file_search.py +163 -0
versionhq/tool/gpt/web_search.py +89 -0
{versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/METADATA +1 -1
{versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/RECORD +33 -25
/versionhq/tool/{composio_tool_vars.py → composio/params.py} +0 -0
{versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/LICENSE +0 -0
{versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/WHEEL +0 -0
{versionhq-1.2.4.5.dist-info → versionhq-1.2.4.7.dist-info}/top_level.txt +0 -0

versionhq/__init__.py CHANGED Viewed

@@ -24,7 +24,10 @@ from versionhq.tool.model import Tool, ToolSet
 from versionhq.tool.rag_tool import RagTool
 from versionhq.tool.cache_handler import CacheHandler
 from versionhq.tool.tool_handler import ToolHandler
-from versionhq.tool.composio_tool import ComposioHandler
+from versionhq.tool.composio.model import ComposioBaseTool
+from versionhq.tool.gpt.cup import GPTToolCUP, CUPToolSchema
+from versionhq.tool.gpt.file_search import GPTToolFileSearch, FilterSchema
+from versionhq.tool.gpt.web_search import GPTToolWebSearch
 from versionhq.memory.contextual_memory import ContextualMemory
 from versionhq.memory.model import ShortTermMemory,LongTermMemory, UserMemory, MemoryItem
@@ -32,7 +35,7 @@ from versionhq.agent_network.formation import form_agent_network
 from versionhq.task_graph.draft import workflow
-__version__ = "1.2.4.5"
+__version__ = "1.2.4.7"
 __all__ = [
     "Agent",
@@ -85,7 +88,13 @@ __all__ = [
     "RagTool",
     "CacheHandler",
     "ToolHandler",
-    "ComposioHandler",
+    "ComposioBaseTool",
+    "GPTToolCUP",
+    "CUPToolSchema",
+    "GPTToolFileSearch",
+    "FilterSchema",
+    "GPTToolWebSearch",
     "ContextualMemory",
     "ShortTermMemory",

versionhq/_prompt/auto_feedback.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import InstanceOf, Field
 from versionhq.agent.model import Agent
 from versionhq.task.model import Task
-from versionhq.task_graph.model import TaskGraph, Node, DependencyType, ReformTriggerEvent
+from versionhq.task_graph.model import TaskGraph, Node, DependencyType
 from versionhq._prompt.model import Prompt
 from versionhq._prompt.constants import REFLECT, INTEGRATE, parameter_sets

versionhq/_prompt/model.py CHANGED Viewed

@@ -4,7 +4,7 @@ from textwrap import dedent
 from pydantic import InstanceOf, BaseModel
-from versionhq._utils import is_valid_url
+from versionhq._utils import is_valid_url, convert_img_url
 class Prompt:
@@ -99,12 +99,9 @@ Ref. Output image: {output_formats_to_follow}
         content_messages = {}
         if self.task.image:
-            with open(self.task.image, "rb") as file:
-                content = file.read()
-                if content:
-                    encoded_file = base64.b64encode(content).decode("utf-8")
-                    img_url = f"data:image/jpeg;base64,{encoded_file}"
-                    content_messages.update({ "type": "image_url", "image_url": { "url": img_url }})
+            img_url = convert_img_url(self.task.image)
+            if img_url:
+                content_messages.update({ "type": "image_url", "image_url": { "url": img_url }})
         if self.task.file:
             if is_valid_url(self.task.file):
@@ -146,7 +143,7 @@ Ref. Output image: {output_formats_to_follow}
         return "\n".join(task_slices)
-    def format_core(self, rag_tools: List[Any] = None) -> Tuple[str, str, List[Dict[str, str]]]:
+    def format_core(self, rag_tools: List[Any] = None, gpt_tools: List[Any] = None) -> Tuple[str, str, List[Dict[str, str]]]:
         """Formats prompt messages sent to the LLM, then returns task prompt, developer prompt, and messages."""
         from versionhq.knowledge._utils import extract_knowledge_context
@@ -168,6 +165,12 @@ Ref. Output image: {output_formats_to_follow}
                 if rag_tool_context:
                     user_prompt += ",".join(rag_tool_context) if isinstance(rag_tool_context, list) else str(rag_tool_context)
+        if gpt_tools:
+            for item in gpt_tools:
+                raw, _, _ = item.run()
+                if raw:
+                    user_prompt += str(raw)
         if self.agent.with_memory == True:
             contextual_memory = ContextualMemory(
                 memory_config=self.agent.memory_config, stm=self.agent.short_term_memory, ltm=self.agent.long_term_memory, um=self.agent.user_memory

versionhq/_utils/__init__.py CHANGED Viewed

@@ -3,3 +3,5 @@ from versionhq._utils.process_config import process_config
 from versionhq._utils.vars import KNOWLEDGE_DIRECTORY, MAX_FILE_NAME_LENGTH
 from versionhq._utils.is_valid_url import is_valid_url
 from versionhq._utils.usage_metrics import UsageMetrics, ErrorType
+from versionhq._utils.convert_img_url import convert_img_url
+from versionhq._utils.is_valid_enum import is_valid_enum

versionhq/_utils/convert_img_url.py ADDED Viewed

@@ -0,0 +1,15 @@
+import base64
+def convert_img_url(img_url: str) -> str | None:
+    try:
+        with open(img_url, "rb") as file:
+            content = file.read()
+            if content:
+                encoded_file = base64.b64encode(content).decode("utf-8")
+                img_url = f"data:image/jpeg;base64,{encoded_file}"
+                return img_url
+            else: return None
+    except:
+        return None

versionhq/_utils/is_valid_enum.py ADDED Viewed

@@ -0,0 +1,25 @@
+from enum import Enum, IntEnum
+from typing import Any
+def is_valid_enum(enum: Enum | IntEnum, key: str = None, val: str | Enum | IntEnum = None) -> bool:
+    if not enum: return False
+    if key:
+        key = key.upper()
+        matched = [k for k in enum._member_map_.keys() if hasattr(enum, "_member_map_") and k == key]
+        return bool(matched)
+    elif val:
+        match val:
+            case str():
+                matched = [k for k in enum._value2member_map_.keys() if hasattr(enum, "_value2member_map_") and k == val]
+                return bool(matched)
+            case Enum() | IntEnum():
+                return val in enum
+            case _:
+                return False
+    else: return False

versionhq/_utils/llm_as_a_judge.py CHANGED Viewed

@@ -2,7 +2,6 @@ import json
 import numpy as np
 from sklearn.metrics import precision_score, recall_score, roc_auc_score, cohen_kappa_score
 from typing import List, Tuple, Dict, Any
-from pathlib import Path
 class LLMJudge:

versionhq/_utils/usage_metrics.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import uuid
-import enum
 import datetime
+from enum import IntEnum
 from typing import Dict, List
 from typing_extensions import Self
 from pydantic import BaseModel, UUID4, InstanceOf
-class ErrorType(enum.Enum):
+class ErrorType(IntEnum):
     FORMAT = 1
     TOOL = 2
     API = 3
@@ -22,19 +22,38 @@ class UsageMetrics(BaseModel):
     total_tokens: int = 0
     prompt_tokens: int = 0
     completion_tokens: int = 0
+    input_tokens: int = 0
+    output_tokens: int = 0
     successful_requests: int = 0
     total_errors: int = 0
     error_breakdown: Dict[ErrorType, int] = dict()
     latency: float = 0.0  # in ms
-    def record_token_usage(self, token_usages: List[Dict[str, int]]) -> None:
+    def record_token_usage(self, *args, **kwargs) -> None:
         """Records usage metrics from the raw response of the model."""
-        if token_usages:
-            for item in token_usages:
-                self.total_tokens += int(item["total_tokens"]) if "total_tokens" in item else 0
-                self.completion_tokens += int(item["completion_tokens"])  if "completion_tokens" in item else 0
-                self.prompt_tokens += int(item["prompt_tokens"]) if "prompt_tokens" in item else 0
+        if args:
+            for item in args:
+                match item:
+                    case dict():
+                        if hasattr(self, k):
+                            setattr(self, k, int(getattr(self, k)) + int(v))
+                    case UsageMetrics():
+                        self = self.aggregate(metrics=item)
+                    case _:
+                        try:
+                            self.completion_tokens += item.completion_tokens if hasattr(item, "completion_tokens") else 0
+                            self.prompt_tokens += item.prompt_tokens if hasattr(item, "prompt_tokens") else 0
+                            self.total_tokens += item.total_tokens if hasattr(item, "total_tokens") else 0
+                            self.input_tokens += item.input_tokens if hasattr(item, "input_tokens") else 0
+                            self.output_tokens += item.output_tokens if hasattr(item, "output_tokens") else 0
+                        except:
+                            pass
+        if kwargs:
+            for k, v in kwargs.items():
+                if hasattr(self, k):
+                    setattr(self, k, int(getattr(self, k)) + int(v))
     def record_errors(self, type: ErrorType = None) -> None:
@@ -54,12 +73,14 @@ class UsageMetrics(BaseModel):
         if not metrics:
             return self
-        self.total_tokens += metrics.total_tokens if metrics.total_tokens else 0
-        self.prompt_tokens += metrics.prompt_tokens if metrics.prompt_tokens else 0
-        self.completion_tokens += metrics.completion_tokens if metrics.completion_tokens else 0
-        self.successful_requests += metrics.successful_requests  if metrics.successful_requests else 0
-        self.total_errors += metrics.total_errors if metrics.total_errors else 0
-        self.latency += metrics.latency if metrics.latency else 0.0
+        self.total_tokens += metrics.total_tokens
+        self.prompt_tokens += metrics.prompt_tokens
+        self.completion_tokens += metrics.completion_tokens
+        self.input_tokens += metrics.input_tokens
+        self.output_tokens += metrics.output_tokens
+        self.successful_requests += metrics.successful_requests
+        self.total_errors += metrics.total_errors
+        self.latency += metrics.latency
         self.latency = round(self.latency, 3)
         if metrics.error_breakdown:

versionhq/agent/model.py CHANGED Viewed

@@ -11,7 +11,7 @@ from versionhq.agent.rpm_controller import RPMController
 from versionhq.tool.model import Tool, ToolSet, BaseTool
 from versionhq.knowledge.model import BaseKnowledgeSource, Knowledge
 from versionhq.memory.model import ShortTermMemory, LongTermMemory, UserMemory
-from versionhq._utils import Logger, process_config, is_valid_url, ErrorType
+from versionhq._utils import Logger, process_config, is_valid_url, ErrorType, UsageMetrics
 load_dotenv(override=True)
@@ -124,6 +124,9 @@ class Agent(BaseModel):
         Similar to the LLM set up, when the agent has tools, we will declare them using the Tool class.
         """
         from versionhq.tool.rag_tool import RagTool
+        from versionhq.tool.gpt.web_search import GPTToolWebSearch
+        from versionhq.tool.gpt.file_search import GPTToolFileSearch
+        from versionhq.tool.gpt.cup import GPTToolCUP
         if not self.tools:
             return self
@@ -131,7 +134,7 @@ class Agent(BaseModel):
         tool_list = []
         for item in self.tools:
             match item:
-                case RagTool() | BaseTool():
+                case RagTool() | BaseTool() | GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
                     tool_list.append(item)
                 case Tool():
@@ -353,8 +356,8 @@ class Agent(BaseModel):
         response_format: Optional[Dict[str, Any]] = None,
         tools: Optional[List[InstanceOf[Tool]| InstanceOf[ToolSet] | Type[Tool]]] = None,
         tool_res_as_final: bool = False,
-        task: Any = None
-        ) -> Dict[str, Any]:
+        # task: Any = None
+        ) -> Tuple[str, UsageMetrics]:
         """
         Create formatted prompts using the developer prompt and the agent's backstory, then call the base model.
         - Execute the task up to `self.max_retry_limit` times in case of receiving an error or empty response.
@@ -364,6 +367,7 @@ class Agent(BaseModel):
         task_execution_counter = 0
         iterations = 0
         raw_response = None
+        usage = UsageMetrics()
         try:
             if self._rpm_controller and self.max_rpm:
@@ -373,17 +377,17 @@ class Agent(BaseModel):
             if tool_res_as_final:
                 raw_response = self.func_calling_llm.call(messages=messages, tools=tools, tool_res_as_final=True)
-                task._usage.record_token_usage(token_usages=self.func_calling_llm._usages)
+                usage.record_token_usage(*self.func_calling_llm._usages)
             else:
                 raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools)
-                task._usage.record_token_usage(token_usages=self.llm._usages)
+                usage.record_token_usage(*self.llm._usages)
             task_execution_counter += 1
             Logger(**self._logger_config, filename=self.key).log(level="info", message=f"Agent response: {raw_response}", color="green")
-            return raw_response
+            return raw_response, usage
         except Exception as e:
-            task._usage.record_errors(type=ErrorType.API)
+            usage.record_errors(type=ErrorType.API)
             Logger(**self._logger_config, filename=self.key).log(level="error", message=f"An error occured. The agent will retry: {str(e)}", color="red")
             while not raw_response and task_execution_counter <= self.max_retry_limit:
@@ -392,12 +396,12 @@ class Agent(BaseModel):
                         self._rpm_controller.check_or_wait()
                     raw_response = self.llm.call(messages=messages, response_format=response_format, tools=tools)
-                    task._tokens = self.llm._tokens
+                    usage.record_token_usage(*self.llm._usages)
                     iterations += 1
                 task_execution_counter += 1
                 Logger(**self._logger_config, filename=self.key).log(level="info", message=f"Agent #{task_execution_counter} response: {raw_response}", color="green")
-                return raw_response
+                return raw_response, usage
             if not raw_response:
                 Logger(**self._logger_config, filename=self.key).log(level="error", message="Received None or empty response from the model", color="red")
@@ -423,6 +427,57 @@ class Agent(BaseModel):
         return self.set_up_llm()
+    def _sort_tools(self, task = None) -> Tuple[List[Any], List[Any], List[Any]]:
+        """Sorts agent and task tools by class."""
+        from versionhq.tool.rag_tool import RagTool
+        from versionhq.tool.gpt.web_search import GPTToolWebSearch
+        from versionhq.tool.gpt.file_search import GPTToolFileSearch
+        from versionhq.tool.gpt.cup import GPTToolCUP
+        all_tools = []
+        if task: all_tools = task.tools + self.tools if task.can_use_agent_tools else task.tools
+        else: all_tools = self.tools
+        rag_tools, gpt_tools, tools = [], [], []
+        if all_tools:
+            for item in all_tools:
+                match item:
+                    case RagTool():
+                        rag_tools.append(item)
+                    case GPTToolCUP() | GPTToolFileSearch() | GPTToolWebSearch():
+                        gpt_tools.append(item)
+                    case Tool() | BaseTool() | ToolSet():
+                        tools.append(item)
+        return rag_tools, gpt_tools, tools
+    def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput
+        """Generates k, v pairs from multiple GPT tool results and stores them in TaskOutput class."""
+        from versionhq.task.model import TaskOutput
+        from versionhq._utils import UsageMetrics
+        if not gpt_tools:
+            return
+        tool_res = dict()
+        annotation_set = dict()
+        total_usage = UsageMetrics()
+        for i, item in enumerate(gpt_tools):
+            raw, annotations, usage = item.run()
+            tool_res.update({ str(i): raw })
+            annotation_set.update({ str(i): annotations })
+            total_usage.aggregate(metrics=usage)
+        res = TaskOutput(raw=str(tool_res), tool_output=tool_res, usage=total_usage, annotations=annotation_set)
+        return res
     def update(self, **kwargs) -> Self:
         """
         Update the existing agent. Address variables that require runnning set_up_x methods first, then update remaining variables.
@@ -482,15 +537,21 @@ class Agent(BaseModel):
             image: str = None,
             file: str = None,
             audio: str = None
-        ) -> Tuple[Any | None, Any | None]:
+        ) -> Any:
         """
         Defines and executes a task, then returns TaskOutput object with the generated task.
         """
+        from versionhq.task.model import Task
         if not self.role:
-            return None
+            return None, None
-        from versionhq.task.model import Task
+        _, gpt_tools, _ = self._sort_tools()
+        if gpt_tools and tool_res_as_final == True:
+            res = self._handle_gpt_tools(gpt_tools=gpt_tools)
+            return res
         class Output(BaseModel):
             result: str
@@ -503,44 +564,47 @@ class Agent(BaseModel):
             image=image, #REFINEME - query memory/knowledge or self create
             file=file,
             audio=audio,
+            can_use_agent_tools=True if self.tools else False,
         )
         res = task.execute(agent=self, context=context)
-        return res, task
+        return res
-    def execute_task(self, task, context: Optional[Any] = None, task_tools: Optional[List[Tool | ToolSet]] = list()) -> str:
+    def execute_task(self, task, context: Optional[Any] = None) -> Tuple[str, str, Any, UsageMetrics]:
         """Handling task execution."""
-        from versionhq.task.model import Task
-        from versionhq.tool.rag_tool import RagTool
         from versionhq._prompt.model import Prompt
+        from versionhq.task.model import Task
         task: InstanceOf[Task] = task
-        all_tools: Optional[List[Tool | ToolSet | RagTool | Type[BaseTool]]] = task_tools + self.tools if task.can_use_agent_tools else task_tools
-        rag_tools: Optional[List[RagTool]] = [item for item in all_tools if isinstance(item, RagTool)] if all_tools else None
-        tools: Optional[List[Tool | ToolSet | RagTool | Type[BaseTool]]] = [item for item in all_tools if not isinstance(item, RagTool)] if all_tools else None
+        rag_tools, gpt_tools, tools = self._sort_tools(task=task)
+        raw_response = ""
+        user_prompt, dev_prompt = "", ""
+        usage = UsageMetrics(id=task.id)
         if self.max_rpm and self._rpm_controller:
             self._rpm_controller._reset_request_count()
-        user_prompt, dev_prompt, messages = Prompt(task=task, agent=self, context=context).format_core(rag_tools=rag_tools)
+        if task.tool_res_as_final == True and gpt_tools:
+            self._times_executed += 1
+            res = self._handle_gpt_tools(gpt_tools=gpt_tools)
+            return user_prompt, dev_prompt, res, res.usage
+        user_prompt, dev_prompt, messages = Prompt(task=task, agent=self, context=context).format_core(rag_tools=rag_tools, gpt_tools=gpt_tools)
         try:
             self._times_executed += 1
-            raw_response = self._invoke(
+            raw_response, usage = self._invoke(
                 messages=messages,
                 response_format=task._structure_response_format(model_provider=self.llm.provider),
                 tools=tools,
                 tool_res_as_final=task.tool_res_as_final,
-                task=task
             )
-            if raw_response:
-                task._usage.successful_requests += 1
         except Exception as e:
             self._times_executed += 1
             Logger(**self._logger_config, filename=self.key).log(level="error", message=f"The agent failed to execute the task. Error: {str(e)}", color="red")
-            user_prompt, dev_prompt, raw_response = self.execute_task(task, context, task_tools)
+            user_prompt, dev_prompt, raw_response, usage = self.execute_task(task, context)
             if self._times_executed > self.max_retry_limit:
                 Logger(**self._logger_config, filename=self.key).log(level="error", message=f"Max retry limit has exceeded.", color="red")
@@ -549,7 +613,7 @@ class Agent(BaseModel):
         if self.max_rpm and self._rpm_controller:
             self._rpm_controller.stop_rpm_counter()
-        return user_prompt, dev_prompt, raw_response
+        return user_prompt, dev_prompt, raw_response, usage
     @property

versionhq/agent_network/formation.py CHANGED Viewed

@@ -7,7 +7,7 @@ from versionhq.task.model import Task
 from versionhq.agent.model import Agent
 from versionhq.agent_network.model import AgentNetwork, Member, Formation
 from versionhq.agent.inhouse_agents  import vhq_formation_planner
-from versionhq._utils import Logger
+from versionhq._utils import Logger, is_valid_enum
 import chromadb
 chromadb.api.client.SharedSystemClient.clear_system_cache()
@@ -83,14 +83,6 @@ def form_agent_network(
     res = vhq_task.execute(agent=vhq_formation_planner, context=context)
-    formation_keys = []
-    if hasattr(res.pydantic, "formation"):
-        formation_keys = [k for k in Formation._member_map_.keys() if k == res.pydantic.formation.upper()]
-    elif "formation" in res.json_dict:
-        formation_keys = [k for k in Formation._member_map_.keys() if k == res.json_dict["formation"].upper()]
-    _formation = Formation[formation_keys[0]] if formation_keys else Formation.SUPERVISING
     network_tasks = []
     members = []
     leader = res._fetch_value_of(key="leader_agent")
@@ -98,6 +90,8 @@ def form_agent_network(
     created_agents = [Agent(role=str(item), goal=str(item)) for item in agent_roles] if agent_roles else []
     task_descriptions = res._fetch_value_of(key="task_descriptions")
     task_outcomes = res._fetch_value_of(key="task_outcomes")
+    formation_key = res.json_dict["formation"] if "formation" in res.json_dict else None
+    _formation = Formation[formation_key] if is_valid_enum(key=formation_key, enum=Formation) else Formation.SUPERVISING
     if agents:
         for i, agent in enumerate(created_agents):

versionhq/agent_network/model.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import uuid
 import warnings
-from enum import Enum
-from concurrent.futures import Future
+from enum import IntEnum
 from hashlib import md5
 from typing import Any, Dict, List, Callable, Optional, Tuple
 from typing_extensions import Self
@@ -30,7 +29,7 @@ GenerateSchema.match_type = match_type
 warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
-class Formation(str, Enum):
+class Formation(IntEnum):
     SOLO = 1
     SUPERVISING = 2
     SQUAD = 3
@@ -38,7 +37,7 @@ class Formation(str, Enum):
     HYBRID = 10
-class TaskHandlingProcess(str, Enum):
+class TaskHandlingProcess(IntEnum):
     """
     A class representing task handling processes to tackle multiple tasks.
     When the agent network has multiple tasks that connect with edges, follow the edge conditions.

versionhq/clients/customer/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from enum import Enum
+from enum import IntEnum
-class Status(str, Enum):
+class Status(IntEnum):
     NOT_ASSIGNED = 0
     READY_TO_DEPLOY = 1
     ACTIVE_ON_WORKFLOW = 2

versionhq/clients/product/model.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import uuid
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Callable, Type, Optional, get_args, get_origin
+from abc import ABC
+from typing import Optional, List
-from pydantic import UUID4, InstanceOf, BaseModel, ConfigDict, Field, create_model, field_validator, model_validator
+from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
 from pydantic_core import PydanticCustomError
-from versionhq.tool.composio_tool_vars import ComposioAppName
+from versionhq.tool.composio.params import ComposioAppName
 class ProductProvider(ABC, BaseModel):

versionhq/clients/workflow/model.py CHANGED Viewed

@@ -10,7 +10,7 @@ from versionhq.agent.model import Agent
 from versionhq.agent_network.model import AgentNetwork
 from versionhq.clients.product.model import Product
 from versionhq.clients.customer.model import Customer
-from versionhq.tool.composio_tool_vars import ComposioAppName
+from versionhq.tool.composio.params import ComposioAppName
 class MessagingComponent(ABC, BaseModel):

versionhq/llm/llm_vars.py CHANGED Viewed

@@ -80,7 +80,6 @@ ENV_VARS = {
 }
 """
 Max input token size by the model.
 """
@@ -131,7 +130,6 @@ LLM_CONTEXT_WINDOW_SIZES = {
 """
 Params for litellm.completion().
 """
 PARAMS = {
     "litellm": [
         "api_base",

versionhq/llm/model.py CHANGED Viewed

@@ -313,7 +313,7 @@ class LLM(BaseModel):
                 cred = self._set_env_vars()
                 if self.provider == "gemini":
-                    self.response_format = { "type": "json_object" } if not tools else None
+                    self.response_format = { "type": "json_object" } if not tools and self.model != "gemini/gemini-2.0-flash-thinking-exp" else None
                 else:
                     self.response_format = response_format

versionhq/storage/task_output_storage.py CHANGED Viewed

@@ -147,8 +147,8 @@ class TaskOutputStorageHandler:
             description=str(task.description),
             raw=str(task.output.raw),
             responsible_agents=str(task.processed_agents),
-            tokens=task._usage.total_tokens,
-            latency=task._usage.latency,
+            tokens=task.output.usage.total_tokens,
+            latency=task.output.usage.latency,
             score=task.output.aggregate_score if task.output.aggregate_score else "None",
         )
         self.storage.add(task=task, output=output_to_store, inputs=inputs)

versionhq/task/evaluation.py CHANGED Viewed

@@ -79,6 +79,7 @@ class Evaluation(BaseModel):
         Returns:
             A pandas DataFrame with normalized 'weight' and 'score' columns, or an empty DataFrame if the input is empty.
         """
         if not self.items:
             return pd.DataFrame()
@@ -87,7 +88,6 @@ class Evaluation(BaseModel):
         scaler = MinMaxScaler(feature_range=(0, 1))
         df[['weight', 'score']] = scaler.fit_transform(df[['weight', 'score']])
         return df
@@ -98,7 +98,16 @@ class Evaluation(BaseModel):
         df = self._normalize_df()
         df['weighted_score'] = df['weight'] * df['score']
-        aggregate_score = round(df['weighted_score'].sum(), 3)
+        n = df['weighted_score'].sum()
+        if n == 0.0 or n == 1.0:
+            import math
+            s = [[item.score for item in self.items]]
+            w = [[item.weight for item in self.items]]
+            r = [math.sumprod(x, y) for x, y in zip(s, w)]
+            if r and sum(w[0]):
+                n = r[0] / sum(w[0])
+        aggregate_score = round(n, 3)
         return aggregate_score

versionhq 1.2.4.5__py3-none-any.whl → 1.2.4.7__py3-none-any.whl

versionhq 1.2.4.5py3-none-any.whl → 1.2.4.7py3-none-any.whl