PyPI - versionhq - Versions diffs - 1.2.1.22__py3-none-any.whl → 1.2.2.0__py3-none-any.whl - Mend

versionhq 1.2.1.22py3-none-any.whl → 1.2.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

versionhq/__init__.py +3 -4
versionhq/_utils/__init__.py +1 -1
versionhq/_utils/usage_metrics.py +32 -0
versionhq/agent/inhouse_agents.py +5 -1
versionhq/agent/model.py +4 -37
versionhq/agent_network/model.py +193 -189
versionhq/llm/model.py +35 -35
versionhq/memory/model.py +4 -2
versionhq/storage/task_output_storage.py +51 -16
versionhq/storage/utils.py +1 -0
versionhq/task/TEMPLATES/Description.py +6 -1
versionhq/task/{evaluate.py → evaluation.py} +38 -22
versionhq/task/model.py +60 -61
versionhq/task_graph/draft.py +1 -1
versionhq/task_graph/model.py +73 -48
{versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/METADATA +8 -7
{versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/RECORD +20 -21
versionhq/task/log_handler.py +0 -59
{versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/LICENSE +0 -0
{versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/WHEEL +0 -0
{versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/top_level.txt +0 -0

versionhq/storage/task_output_storage.py CHANGED Viewed

@@ -34,9 +34,7 @@ class TaskOutputSQLiteStorage:
                     CREATE TABLE IF NOT EXISTS task_output (
                         task_id TEXT PRIMARY KEY,
                         output JSON,
-                        task_index INTEGER,
                         inputs JSON,
-                        was_replayed BOOLEAN,
                         timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                     )
                 """
@@ -47,24 +45,24 @@ class TaskOutputSQLiteStorage:
             self._logger.log(level="error", message=f"SQL database initialization failed: {str(e)}", color="red")
-    def add(self, task, output: Dict[str, Any], task_index: int, was_replayed: bool = False, inputs: Dict[str, Any] = {}):
+    def add(self, task, output: Dict[str, Any], inputs: Dict[str, Any] = {}):
         try:
             with sqlite3.connect(self.db_path) as conn:
                 cursor = conn.cursor()
                 cursor.execute(
                         """INSERT OR REPLACE INTO task_output
-                            (task_id, output, task_index, inputs, was_replayed, timestamp)
-                            VALUES (?, ?, ?, ?, ?, ?)
+                            (task_id, output, inputs, timestamp)
+                            VALUES (?, ?, ?, ?)
                         """,
-                     (str(task.id), json.dumps(output), task_index, json.dumps(inputs), was_replayed, datetime.datetime.now())
+                     (str(task.id), json.dumps(output), json.dumps(inputs), datetime.datetime.now())
                 )
                 conn.commit()
         except sqlite3.Error as e:
-            self._logger.log(level="error", message=f"SAVING TASK OUTPUTS ERROR: {e}", color="red")
+            self._logger.log(level="error", message=f"SAVING TASK OUTPUT ERROR: {e}", color="red")
-    def update(self, task_index: int, **kwargs):
+    def update(self, task_id: str, **kwargs):
         try:
             with sqlite3.connect(self.db_path) as conn:
                 cursor = conn.cursor()
@@ -73,14 +71,14 @@ class TaskOutputSQLiteStorage:
                     fields.append(f"{k} = ?")
                     values.append(json.dumps(v) if isinstance(v, dict) else v)
-                query = f"UPDATE latest_kickoff_task_output SET {', '.join(fields)} WHERE task_index = ?"
-                values.append(task_index)
+                query = f"UPDATE task_output SET {', '.join(fields)} WHERE task = ?"
+                values.append(task_id)
                 cursor.execute(query, tuple(values))
                 conn.commit()
                 if cursor.rowcount == 0:
                     self._logger.log(
-                        level="warning", message=f"No row found with task_index {task_index}. No update performed.", color="yellow",
+                        level="warning", message=f"No row found with task_id {task_id}. No update performed.", color="yellow",
                     )
         except sqlite3.Error as e:
@@ -94,7 +92,7 @@ class TaskOutputSQLiteStorage:
                 cursor.execute("""
                 SELECT *
                 FROM task_output
-                ORDER BY task_index
+                ORDER BY task_id
                 """)
                 rows = cursor.fetchall()
@@ -103,10 +101,8 @@ class TaskOutputSQLiteStorage:
                     result = {
                         "task_id": row[0],
                         "output": json.loads(row[1]),
-                        "task_index": row[2],
-                        "inputs": json.loads(row[3]),
-                        "was_replayed": row[4],
-                        "timestamp": row[5],
+                        "inputs": json.loads(row[2]),
+                        "timestamp": row[3],
                     }
                     results.append(result)
                 return results
@@ -125,3 +121,42 @@ class TaskOutputSQLiteStorage:
         except sqlite3.Error as e:
             self._logger.log(level="error", message=f"ERROR: Failed to delete all: {e}", color="red")
+class TaskOutputStorageHandler:
+    """A class to task output storage."""
+    from versionhq.task.model import Task
+    def __init__(self):
+        self.storage = TaskOutputSQLiteStorage()
+    def update(self, task: Task, inputs: Dict[str, Any] = {}) -> None:
+        saved_outputs = self.load()
+        if saved_outputs is None:
+            raise ValueError("Logs cannot be None")
+        self.add(task, inputs)
+    def add(self, task: Task, inputs: Dict[str, Any] = {}) -> None:
+        output_to_store = dict(
+            id=str(task.id),
+            description=str(task.description),
+            raw=str(task.output.raw),
+            responsible_agents=str(task.processed_agents),
+            tokens=task.output._tokens,
+            latency=task.output.latency,
+            score=task.output.aggregate_score if task.output.aggregate_score else "None",
+        )
+        self.storage.add(task=task, output=output_to_store, inputs=inputs)
+    def reset(self) -> None:
+        self.storage.delete_all()
+    def load(self) -> Optional[List[Dict[str, Any]]]:
+        return self.storage.load()

versionhq/storage/utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 load_dotenv(override=True)
 def fetch_db_storage_path() -> str:
     directory_name = get_project_directory_name()
     data_dir = Path(appdirs.user_data_dir(appname=directory_name, appauthor="Version IO Sdn Bhd", version=None, roaming=False))

versionhq/task/TEMPLATES/Description.py CHANGED Viewed

@@ -1,5 +1,10 @@
-EVALUATE="""Evaluate the provided task output against the given task description, assigning a score between 0 (worst) and 1 (best) based on the specified criteria. Scores should be numerical (integers or decimals). Provide specific suggestions for improvement. Do not assign identical scores to different criteria:
+EVALUATE="""Evaluate the provided task output against the given task description, assigning a score between 0 (worst) and 1 (best) based on the specified criteria. Scores should be numerical (integers or decimals). Provide specific suggestions for improvement. Do not assign identical scores to different criteria unless otherwise you have clear reasons to do so:
 Task output: {task_output}
 Task description: {task_description}
 Evaluation criteria: {eval_criteria}
 """
+SHOTS="""Here are two examples of task outputs. The first is considered excellent due to its clear planning and alignment with the goal. The second is weak due to clichéd language. Now, evaluate the given task output.
+First = Excellent example: {c}
+Second = Weak example: {w}
+"""

versionhq/task/{evaluate.py → evaluation.py} RENAMED Viewed

@@ -1,12 +1,12 @@
 from typing import List, Optional, Dict, Any
 from typing_extensions import Self
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, model_validator
 from versionhq.memory.model import MemoryMetadata
 """
-Evaluate task output from accuracy, token consumption, latency perspectives, and mark the score from 0 to 1.
+Evaluate task output from accuracy, token consumption, and latency perspectives, and mark the score from 0 to 1.
 """
@@ -24,16 +24,7 @@ class Score:
     `weight`: Importance of each factor to the aggregated score.
     """
-    def __init__(
-        self,
-        brand_tone: ScoreFormat = ScoreFormat(0, 0),
-        audience: ScoreFormat = ScoreFormat(0, 0),
-        track_record: ScoreFormat = ScoreFormat(0, 0),
-        config: Optional[Dict[str, ScoreFormat]] = None
-    ):
-        self.brand_tone = brand_tone
-        self.audience = audience
-        self.track_record = track_record
+    def __init__(self, config: Optional[Dict[str, ScoreFormat]] = None):
         self.config = config
         if self.config:
@@ -42,7 +33,7 @@ class Score:
                     setattr(self, k, v)
-    def result(self) -> int:
+    def result(self) -> float:
         aggregate_score, denominator = 0, 0
         for k, v in self.__dict__.items():
@@ -52,18 +43,18 @@ class Score:
         if denominator == 0:
             return 0
-        return round(aggregate_score / denominator, 2)
+        return round(aggregate_score / denominator, 3)
 class EvaluationItem(BaseModel):
     """
-    A class to store evaluation and suggestion by the given criteria such as accuracy.
+    A Pydantic class to store the evaluation result with scoring and suggestion based on the given criteria.
     """
     criteria: str
     suggestion: str
     score: float
-    def _convert_score_to_score_format(self, weight: int = 1) -> ScoreFormat | None:
+    def _format_score(self, weight: int = 1) -> ScoreFormat | None:
         if self.score and isinstance(self.score, float):
             return ScoreFormat(rate=self.score, weight=weight)
@@ -71,10 +62,13 @@ class EvaluationItem(BaseModel):
 class Evaluation(BaseModel):
+    """
+    A Pydantic class to handle evaluation of the task output.
+    """
     items: List[EvaluationItem] = []
-    latency: float = Field(default=None, description="job execution latency in seconds")
-    tokens: int = Field(default=None, description="tokens consumed")
-    eval_by: Any = Field(default=None, description="stores agent object that evaluates the outcome")
+    eval_by: Any = None
     @model_validator(mode="after")
     def set_up_evaluator(self) -> Self:
@@ -87,7 +81,7 @@ class Evaluation(BaseModel):
         """
         Create and store evaluation results in the memory metadata
         """
-        eval_by = self.eval_by.role if self.eval_by else None
+        eval_by = self.eval_by.key # saving memory
         score = self.aggregate_score
         eval_criteria = ", ".join([item.criteria for item in self.items]) if self.items else None
         suggestion = self.suggestion_summary
@@ -95,6 +89,28 @@ class Evaluation(BaseModel):
         return memory_metadata
+    def _draft_fsl_prompt(self, task_description: str = None) -> str | None:
+        """
+        Search competitive and weak cases in the past and draft few shot learning prompt.
+        """
+        from versionhq.task.TEMPLATES.Description import SHOTS
+        shot_prompt = None
+        if self.eval_by.long_term_memory:
+            res = self.eval_by.long_term_memory.search(query=task_description, latest_n=10)
+            if res:
+                new_res = filter(lambda x: "score" in x["metadata"], res)
+                new_res = list(new_res)
+                new_res.sort(key=lambda x: x["metadata"]["score"], reverse=True)
+                if new_res[0]['data']:
+                    c = new_res[0]['data']['task_output']
+                    w = new_res[len(new_res)-1]['data']['task_output'] if new_res[len(new_res)-1]['metadata']['score'] < new_res[0]['metadata']['score'] else ""
+                    shot_prompt = SHOTS.format(c=c, w=w)
+        return shot_prompt
     @property
     def aggregate_score(self) -> float:
         """
@@ -107,7 +123,7 @@ class Evaluation(BaseModel):
         denominator = 0
         for item in self.items:
-            score_format = item._convert_score_to_score_format()
+            score_format = item._format_score()
             aggregate_score += score_format.aggregate if score_format else 0
             denominator += score_format.weight if score_format else 0
@@ -120,7 +136,7 @@ class Evaluation(BaseModel):
     @property
     def suggestion_summary(self) -> str | None:
         """
-        Return a summary of the suggestions
+        Returns a summary of the suggestions
         """
         if not self.items:
             return None

versionhq/task/model.py CHANGED Viewed

@@ -14,8 +14,7 @@ from pydantic import UUID4, BaseModel, Field, PrivateAttr, field_validator, mode
 from pydantic_core import PydanticCustomError
 import versionhq as vhq
-from versionhq.task.log_handler import TaskOutputStorageHandler
-from versionhq.task.evaluate import Evaluation, EvaluationItem
+from versionhq.task.evaluation import Evaluation, EvaluationItem
 from versionhq.tool.model import Tool, ToolSet
 from versionhq._utils import process_config, Logger
@@ -176,14 +175,16 @@ class TaskOutput(BaseModel):
     """
     A class to store the final output of the given task in raw (string), json_dict, and pydantic class formats.
     """
+    _tokens: int = PrivateAttr(default=0)
     task_id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True, description="store Task ID")
     raw: str = Field(default="", description="Raw output of the task")
     json_dict: Dict[str, Any] = Field(default=None, description="`raw` converted to dictionary")
     pydantic: Optional[Any] = Field(default=None)
-    tool_output: Optional[Any] = Field(default=None, description="store tool result when the task takes tool output as its final output")
-    callback_output: Optional[Any] = Field(default=None, description="store task or agent callback outcome")
-    evaluation: Optional[InstanceOf[Evaluation]] = Field(default=None, description="store overall evaluation of the task output. passed to ltm")
+    tool_output: Optional[Any] = Field(default=None, description="stores tool result when the task takes tool output as its final output")
+    callback_output: Optional[Any] = Field(default=None, description="stores task or agent callback outcome")
+    latency: float = Field(default=None, description="job latency in ms")
+    evaluation: Optional[InstanceOf[Evaluation]] = Field(default=None, description="stores overall evaluation of the task output. stored in ltm")
     def to_context_prompt(self) -> str:
@@ -206,21 +207,25 @@ class TaskOutput(BaseModel):
         """
         Evaluate the output based on the criteria, score each from 0 to 1 scale, and raise suggestions for future improvement.
         """
-        from versionhq.task.TEMPLATES.Description import EVALUATE
+        from versionhq.task.TEMPLATES.Description import EVALUATE, SHOTS
         self.evaluation = Evaluation() if not self.evaluation else self.evaluation
-        # self.evaluation.latency = latency if latency is not None else task.latency
-        # self.evaluation.tokens = tokens if tokens is not None else task.tokens
+        eval_criteria = task.eval_criteria if task.eval_criteria else  ["accuracy", "completeness", "conciseness", ]
+        fsl_prompt = ""
-        eval_criteria = task.eval_criteria if task.eval_criteria else  ["Overall competitiveness", ]
+        if task.fsls:
+            fsl_prompt = SHOTS.format(c=task.fsls[0], w=task.fsls[1] if len(task.fsls) > 1 else "")
+        else:
+            fsl_prompt = self.evaluation._draft_fsl_prompt(task_description=task.description)
         for item in eval_criteria:
-            task_eval = Task(
-                description=EVALUATE.format(task_description=task.description, task_output=self.raw, eval_criteria=str(item)),
-                pydantic_output=EvaluationItem
-            )
+            description = EVALUATE.format(task_description=task.description, task_output=self.raw, eval_criteria=str(item))
+            description = description + fsl_prompt if fsl_prompt else description
+            task_eval = Task(description=description, pydantic_output=EvaluationItem)
             res = task_eval.execute(agent=self.evaluation.eval_by)
+            self._tokens += task_eval._tokens
             if res.pydantic:
                 item = EvaluationItem(score=res.pydantic.score, suggestion=res.pydantic.suggestion, criteria=res.pydantic.criteria)
@@ -263,7 +268,6 @@ class Task(BaseModel):
     __hash__ = object.__hash__
     _original_description: str = PrivateAttr(default=None)
-    _task_output_handler = TaskOutputStorageHandler()
     config: Optional[Dict[str, Any]] = Field(default=None, description="values to set on Task class")
     id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True, description="unique identifier for the object, not set by user")
@@ -287,15 +291,15 @@ class Task(BaseModel):
     # evaluation
     should_evaluate: bool = Field(default=False, description="True to run the evaluation flow")
-    eval_criteria: Optional[List[str]] = Field(default_factory=list, description="criteria to evaluate the outcome. i.e., fit to the brand tone")
+    eval_criteria: Optional[List[str]] = Field(default_factory=list, description="stores a list of criteria to evaluate the outcome")
+    fsls: Optional[list[str]] = Field(default=None, description="stores ideal/weak responses")
-    # recording !# REFINEME - eval_callbacks
-    processed_agents: Set[str] = Field(default_factory=set, description="store roles of the agents that executed the task")
+    # recording
+    _tokens: int = 0
+    processed_agents: Set[str] = Field(default_factory=set, description="store keys of the agents that executed the task")
     tool_errors: int = 0
     delegations: int = 0
-    latency: int | float = 0 # job latency in sec
-    tokens: int = 0 # tokens consumed
-    output: Optional[TaskOutput] = Field(default=None, description="store the final task output in TaskOutput class")
+    output: Optional[TaskOutput] = Field(default=None, description="store the final TaskOutput object")
     @model_validator(mode="before")
@@ -553,7 +557,7 @@ Ref. Output image: {output_formats_to_follow}
                 task_output=str(task_output.raw),
                 agent=str(agent.role),
                 metadata=memory_metadata
-                )
+            )
         except AttributeError as e:
             Logger().log(level="error", message=f"Missing attributes for long term memory: {str(e)}", color="red")
@@ -604,10 +608,16 @@ Ref. Output image: {output_formats_to_follow}
         return agent_to_delegate
+    def _store_logs(self, inputs: Optional[Dict[str, Any]] = {}) -> None:
+        from versionhq.storage.task_output_storage import TaskOutputStorageHandler
+        TaskOutputStorageHandler().update(task=self, inputs=inputs)
     # task execution
     def execute(
             self, type: TaskExecutionType = None, agent: Optional["vhq.Agent"] = None, context: Optional[Any] = None
-            ) -> TaskOutput | Future[TaskOutput]:
+        ) -> TaskOutput | Future[TaskOutput]:
         """
         A main method to handle task execution. Build an agent when the agent is not given.
         """
@@ -632,27 +642,19 @@ Ref. Output image: {output_formats_to_follow}
     def _execute_async(self, agent, context: Optional[Any] = None) -> Future[TaskOutput]:
         """Executes the task asynchronously."""
         future: Future[TaskOutput] = Future()
-        threading.Thread(daemon=True, target=self._execute_task_async, args=(agent, context, future)).start()
-        return future
+        def _handle_task_async(self, agent, context: Optional[str], future: Future[TaskOutput]) -> None:
+            result = self._execute_core(agent, context)
+            future.set_result(result)
-    def _execute_task_async(self, agent, context: Optional[str], future: Future[TaskOutput]) -> None:
-        """
-        Executes the task asynchronously with context handling.
-        """
-        result = self._execute_core(agent, context)
-        future.set_result(result)
+        threading.Thread(daemon=True, target=_handle_task_async, args=(agent, context, future)).start()
+        return future
     def _execute_core(self, agent, context: Optional[Any]) -> TaskOutput:
         """
-        A core method for task execution.
-        Handles 1. agent delegation, 2. tools, 3. context to add to the prompt, and 4. callbacks.
+        A core method to execute a task.
         """
-        from versionhq.agent.model import Agent
-        from versionhq.agent_network.model import AgentNetwork
         task_output: InstanceOf[TaskOutput] = None
         raw_output: str = None
         tool_output: str | list = None
@@ -669,12 +671,12 @@ Ref. Output image: {output_formats_to_follow}
             agent = agent_to_delegate
             self.delegations += 1
         if self.tool_res_as_final == True:
             started_at = datetime.datetime.now()
             tool_output = agent.execute_task(task=self, context=context, task_tools=task_tools)
+            raw_output = str(tool_output) if tool_output else ""
             ended_at = datetime.datetime.now()
-            task_output = TaskOutput(task_id=self.id, tool_output=tool_output, raw=str(tool_output) if tool_output else "")
+            task_output = TaskOutput(task_id=self.id, tool_output=tool_output, raw=raw_output)
         else:
             started_at = datetime.datetime.now()
@@ -691,26 +693,13 @@ Ref. Output image: {output_formats_to_follow}
                 task_id=self.id,
                 raw=raw_output if raw_output is not None else "",
                 pydantic=pydantic_output,
-                json_dict=json_dict_output
+                json_dict=json_dict_output,
             )
-        self.latency = (ended_at - started_at).total_seconds()
-        task_output.evaluation = Evaluation(latency=self.latency, tokens=self.tokens)
+        task_output.latency = round((ended_at - started_at).total_seconds() * 1000, 3)
+        task_output._tokens = self._tokens
         self.output = task_output
-        self.processed_agents.add(agent.role)
-        if self.should_evaluate and raw_output: # eval only when raw output exsits
-            task_output.evaluate(task=self)
-        self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
-        if self.callback and isinstance(self.callback, Callable):
-            kwargs = { **self.callback_kwargs, **task_output.json_dict }
-            sig = inspect.signature(self.callback)
-            valid_keys = [param.name for param in sig.parameters.values() if param.kind == param.POSITIONAL_OR_KEYWORD]
-            valid_kwargs = { k: kwargs[k] if  k in kwargs else None for k in valid_keys }
-            callback_res = self.callback(**valid_kwargs)
-            task_output.callback_output = callback_res
+        self.processed_agents.add(agent.key)
         # if self.output_file: ## disabled for now
         #     content = (
@@ -719,15 +708,25 @@ Ref. Output image: {output_formats_to_follow}
         #         else pydantic_output.model_dump_json() if pydantic_output else result
         #     )
         #     self._save_file(content)
-        return task_output
-    def _store_execution_log(self, task_index: int, was_replayed: bool = False, inputs: Optional[Dict[str, Any]] = {}) -> None:
-        """
-        Store the task execution log.
-        """
+        # successful output will be evaluated and stored in the logs
+        if raw_output:
+            if self.should_evaluate:
+                task_output.evaluate(task=self)
+            self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
-        self._task_output_handler.update(task=self, task_index=task_index, was_replayed=was_replayed, inputs=inputs)
+            if self.callback and isinstance(self.callback, Callable):
+                kwargs = { **self.callback_kwargs, **task_output.json_dict }
+                sig = inspect.signature(self.callback)
+                valid_keys = [param.name for param in sig.parameters.values() if param.kind == param.POSITIONAL_OR_KEYWORD]
+                valid_kwargs = { k: kwargs[k] if  k in kwargs else None for k in valid_keys }
+                callback_res = self.callback(**valid_kwargs)
+                task_output.callback_output = callback_res
+            self._store_logs()
+        return task_output
     @property

versionhq/task_graph/draft.py CHANGED Viewed

@@ -90,7 +90,7 @@ def workflow(final_output: Type[BaseModel], context: Any = None, human: bool = F
                 target = [v for v in task_graph.nodes.values() if v.task.name == target_task_name][0]
                 dependency_type = dependency_types[i]
                 task_graph.add_dependency(
-                    source_node_identifier=source.identifier, target_node_identifier=target.identifier, dependency_type=dependency_type)
+                    source=source.identifier, target=target.identifier, dependency_type=dependency_type)
     task_graph.visualize()

versionhq 1.2.1.22__py3-none-any.whl → 1.2.2.0__py3-none-any.whl

versionhq 1.2.1.22py3-none-any.whl → 1.2.2.0py3-none-any.whl