versionhq 1.2.1.22__py3-none-any.whl → 1.2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versionhq/__init__.py +3 -4
- versionhq/_utils/__init__.py +1 -1
- versionhq/_utils/usage_metrics.py +32 -0
- versionhq/agent/inhouse_agents.py +5 -1
- versionhq/agent/model.py +4 -37
- versionhq/agent_network/model.py +193 -189
- versionhq/llm/model.py +35 -35
- versionhq/memory/model.py +4 -2
- versionhq/storage/task_output_storage.py +51 -16
- versionhq/storage/utils.py +1 -0
- versionhq/task/TEMPLATES/Description.py +6 -1
- versionhq/task/{evaluate.py → evaluation.py} +38 -22
- versionhq/task/model.py +60 -61
- versionhq/task_graph/draft.py +1 -1
- versionhq/task_graph/model.py +73 -48
- {versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/METADATA +8 -7
- {versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/RECORD +20 -21
- versionhq/task/log_handler.py +0 -59
- {versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/LICENSE +0 -0
- {versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/WHEEL +0 -0
- {versionhq-1.2.1.22.dist-info → versionhq-1.2.2.0.dist-info}/top_level.txt +0 -0
@@ -34,9 +34,7 @@ class TaskOutputSQLiteStorage:
|
|
34
34
|
CREATE TABLE IF NOT EXISTS task_output (
|
35
35
|
task_id TEXT PRIMARY KEY,
|
36
36
|
output JSON,
|
37
|
-
task_index INTEGER,
|
38
37
|
inputs JSON,
|
39
|
-
was_replayed BOOLEAN,
|
40
38
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
41
39
|
)
|
42
40
|
"""
|
@@ -47,24 +45,24 @@ class TaskOutputSQLiteStorage:
|
|
47
45
|
self._logger.log(level="error", message=f"SQL database initialization failed: {str(e)}", color="red")
|
48
46
|
|
49
47
|
|
50
|
-
def add(self, task, output: Dict[str, Any],
|
48
|
+
def add(self, task, output: Dict[str, Any], inputs: Dict[str, Any] = {}):
|
51
49
|
try:
|
52
50
|
with sqlite3.connect(self.db_path) as conn:
|
53
51
|
cursor = conn.cursor()
|
54
52
|
cursor.execute(
|
55
53
|
"""INSERT OR REPLACE INTO task_output
|
56
|
-
(task_id, output,
|
57
|
-
VALUES (?, ?, ?,
|
54
|
+
(task_id, output, inputs, timestamp)
|
55
|
+
VALUES (?, ?, ?, ?)
|
58
56
|
""",
|
59
|
-
(str(task.id), json.dumps(output),
|
57
|
+
(str(task.id), json.dumps(output), json.dumps(inputs), datetime.datetime.now())
|
60
58
|
)
|
61
59
|
conn.commit()
|
62
60
|
|
63
61
|
except sqlite3.Error as e:
|
64
|
-
self._logger.log(level="error", message=f"SAVING TASK
|
62
|
+
self._logger.log(level="error", message=f"SAVING TASK OUTPUT ERROR: {e}", color="red")
|
65
63
|
|
66
64
|
|
67
|
-
def update(self,
|
65
|
+
def update(self, task_id: str, **kwargs):
|
68
66
|
try:
|
69
67
|
with sqlite3.connect(self.db_path) as conn:
|
70
68
|
cursor = conn.cursor()
|
@@ -73,14 +71,14 @@ class TaskOutputSQLiteStorage:
|
|
73
71
|
fields.append(f"{k} = ?")
|
74
72
|
values.append(json.dumps(v) if isinstance(v, dict) else v)
|
75
73
|
|
76
|
-
query = f"UPDATE
|
77
|
-
values.append(
|
74
|
+
query = f"UPDATE task_output SET {', '.join(fields)} WHERE task = ?"
|
75
|
+
values.append(task_id)
|
78
76
|
cursor.execute(query, tuple(values))
|
79
77
|
conn.commit()
|
80
78
|
|
81
79
|
if cursor.rowcount == 0:
|
82
80
|
self._logger.log(
|
83
|
-
level="warning", message=f"No row found with
|
81
|
+
level="warning", message=f"No row found with task_id {task_id}. No update performed.", color="yellow",
|
84
82
|
)
|
85
83
|
|
86
84
|
except sqlite3.Error as e:
|
@@ -94,7 +92,7 @@ class TaskOutputSQLiteStorage:
|
|
94
92
|
cursor.execute("""
|
95
93
|
SELECT *
|
96
94
|
FROM task_output
|
97
|
-
ORDER BY
|
95
|
+
ORDER BY task_id
|
98
96
|
""")
|
99
97
|
|
100
98
|
rows = cursor.fetchall()
|
@@ -103,10 +101,8 @@ class TaskOutputSQLiteStorage:
|
|
103
101
|
result = {
|
104
102
|
"task_id": row[0],
|
105
103
|
"output": json.loads(row[1]),
|
106
|
-
"
|
107
|
-
"
|
108
|
-
"was_replayed": row[4],
|
109
|
-
"timestamp": row[5],
|
104
|
+
"inputs": json.loads(row[2]),
|
105
|
+
"timestamp": row[3],
|
110
106
|
}
|
111
107
|
results.append(result)
|
112
108
|
return results
|
@@ -125,3 +121,42 @@ class TaskOutputSQLiteStorage:
|
|
125
121
|
|
126
122
|
except sqlite3.Error as e:
|
127
123
|
self._logger.log(level="error", message=f"ERROR: Failed to delete all: {e}", color="red")
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
class TaskOutputStorageHandler:
|
128
|
+
"""A class to task output storage."""
|
129
|
+
|
130
|
+
from versionhq.task.model import Task
|
131
|
+
|
132
|
+
def __init__(self):
|
133
|
+
self.storage = TaskOutputSQLiteStorage()
|
134
|
+
|
135
|
+
|
136
|
+
def update(self, task: Task, inputs: Dict[str, Any] = {}) -> None:
|
137
|
+
saved_outputs = self.load()
|
138
|
+
if saved_outputs is None:
|
139
|
+
raise ValueError("Logs cannot be None")
|
140
|
+
|
141
|
+
self.add(task, inputs)
|
142
|
+
|
143
|
+
|
144
|
+
def add(self, task: Task, inputs: Dict[str, Any] = {}) -> None:
|
145
|
+
output_to_store = dict(
|
146
|
+
id=str(task.id),
|
147
|
+
description=str(task.description),
|
148
|
+
raw=str(task.output.raw),
|
149
|
+
responsible_agents=str(task.processed_agents),
|
150
|
+
tokens=task.output._tokens,
|
151
|
+
latency=task.output.latency,
|
152
|
+
score=task.output.aggregate_score if task.output.aggregate_score else "None",
|
153
|
+
)
|
154
|
+
self.storage.add(task=task, output=output_to_store, inputs=inputs)
|
155
|
+
|
156
|
+
|
157
|
+
def reset(self) -> None:
|
158
|
+
self.storage.delete_all()
|
159
|
+
|
160
|
+
|
161
|
+
def load(self) -> Optional[List[Dict[str, Any]]]:
|
162
|
+
return self.storage.load()
|
versionhq/storage/utils.py
CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
5
5
|
|
6
6
|
load_dotenv(override=True)
|
7
7
|
|
8
|
+
|
8
9
|
def fetch_db_storage_path() -> str:
|
9
10
|
directory_name = get_project_directory_name()
|
10
11
|
data_dir = Path(appdirs.user_data_dir(appname=directory_name, appauthor="Version IO Sdn Bhd", version=None, roaming=False))
|
@@ -1,5 +1,10 @@
|
|
1
|
-
EVALUATE="""Evaluate the provided task output against the given task description, assigning a score between 0 (worst) and 1 (best) based on the specified criteria. Scores should be numerical (integers or decimals). Provide specific suggestions for improvement. Do not assign identical scores to different criteria:
|
1
|
+
EVALUATE="""Evaluate the provided task output against the given task description, assigning a score between 0 (worst) and 1 (best) based on the specified criteria. Scores should be numerical (integers or decimals). Provide specific suggestions for improvement. Do not assign identical scores to different criteria unless otherwise you have clear reasons to do so:
|
2
2
|
Task output: {task_output}
|
3
3
|
Task description: {task_description}
|
4
4
|
Evaluation criteria: {eval_criteria}
|
5
5
|
"""
|
6
|
+
|
7
|
+
SHOTS="""Here are two examples of task outputs. The first is considered excellent due to its clear planning and alignment with the goal. The second is weak due to clichéd language. Now, evaluate the given task output.
|
8
|
+
First = Excellent example: {c}
|
9
|
+
Second = Weak example: {w}
|
10
|
+
"""
|
@@ -1,12 +1,12 @@
|
|
1
1
|
from typing import List, Optional, Dict, Any
|
2
2
|
from typing_extensions import Self
|
3
3
|
|
4
|
-
from pydantic import BaseModel,
|
4
|
+
from pydantic import BaseModel, model_validator
|
5
5
|
|
6
6
|
from versionhq.memory.model import MemoryMetadata
|
7
7
|
|
8
8
|
"""
|
9
|
-
Evaluate task output from accuracy, token consumption, latency perspectives, and mark the score from 0 to 1.
|
9
|
+
Evaluate task output from accuracy, token consumption, and latency perspectives, and mark the score from 0 to 1.
|
10
10
|
"""
|
11
11
|
|
12
12
|
|
@@ -24,16 +24,7 @@ class Score:
|
|
24
24
|
`weight`: Importance of each factor to the aggregated score.
|
25
25
|
"""
|
26
26
|
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
brand_tone: ScoreFormat = ScoreFormat(0, 0),
|
30
|
-
audience: ScoreFormat = ScoreFormat(0, 0),
|
31
|
-
track_record: ScoreFormat = ScoreFormat(0, 0),
|
32
|
-
config: Optional[Dict[str, ScoreFormat]] = None
|
33
|
-
):
|
34
|
-
self.brand_tone = brand_tone
|
35
|
-
self.audience = audience
|
36
|
-
self.track_record = track_record
|
27
|
+
def __init__(self, config: Optional[Dict[str, ScoreFormat]] = None):
|
37
28
|
self.config = config
|
38
29
|
|
39
30
|
if self.config:
|
@@ -42,7 +33,7 @@ class Score:
|
|
42
33
|
setattr(self, k, v)
|
43
34
|
|
44
35
|
|
45
|
-
def result(self) ->
|
36
|
+
def result(self) -> float:
|
46
37
|
aggregate_score, denominator = 0, 0
|
47
38
|
|
48
39
|
for k, v in self.__dict__.items():
|
@@ -52,18 +43,18 @@ class Score:
|
|
52
43
|
if denominator == 0:
|
53
44
|
return 0
|
54
45
|
|
55
|
-
return round(aggregate_score / denominator,
|
46
|
+
return round(aggregate_score / denominator, 3)
|
56
47
|
|
57
48
|
|
58
49
|
class EvaluationItem(BaseModel):
|
59
50
|
"""
|
60
|
-
A class to store evaluation and suggestion
|
51
|
+
A Pydantic class to store the evaluation result with scoring and suggestion based on the given criteria.
|
61
52
|
"""
|
62
53
|
criteria: str
|
63
54
|
suggestion: str
|
64
55
|
score: float
|
65
56
|
|
66
|
-
def
|
57
|
+
def _format_score(self, weight: int = 1) -> ScoreFormat | None:
|
67
58
|
if self.score and isinstance(self.score, float):
|
68
59
|
return ScoreFormat(rate=self.score, weight=weight)
|
69
60
|
|
@@ -71,10 +62,13 @@ class EvaluationItem(BaseModel):
|
|
71
62
|
|
72
63
|
|
73
64
|
class Evaluation(BaseModel):
|
65
|
+
"""
|
66
|
+
A Pydantic class to handle evaluation of the task output.
|
67
|
+
"""
|
68
|
+
|
74
69
|
items: List[EvaluationItem] = []
|
75
|
-
|
76
|
-
|
77
|
-
eval_by: Any = Field(default=None, description="stores agent object that evaluates the outcome")
|
70
|
+
eval_by: Any = None
|
71
|
+
|
78
72
|
|
79
73
|
@model_validator(mode="after")
|
80
74
|
def set_up_evaluator(self) -> Self:
|
@@ -87,7 +81,7 @@ class Evaluation(BaseModel):
|
|
87
81
|
"""
|
88
82
|
Create and store evaluation results in the memory metadata
|
89
83
|
"""
|
90
|
-
eval_by = self.eval_by.
|
84
|
+
eval_by = self.eval_by.key # saving memory
|
91
85
|
score = self.aggregate_score
|
92
86
|
eval_criteria = ", ".join([item.criteria for item in self.items]) if self.items else None
|
93
87
|
suggestion = self.suggestion_summary
|
@@ -95,6 +89,28 @@ class Evaluation(BaseModel):
|
|
95
89
|
return memory_metadata
|
96
90
|
|
97
91
|
|
92
|
+
def _draft_fsl_prompt(self, task_description: str = None) -> str | None:
|
93
|
+
"""
|
94
|
+
Search competitive and weak cases in the past and draft few shot learning prompt.
|
95
|
+
"""
|
96
|
+
from versionhq.task.TEMPLATES.Description import SHOTS
|
97
|
+
shot_prompt = None
|
98
|
+
|
99
|
+
if self.eval_by.long_term_memory:
|
100
|
+
res = self.eval_by.long_term_memory.search(query=task_description, latest_n=10)
|
101
|
+
|
102
|
+
if res:
|
103
|
+
new_res = filter(lambda x: "score" in x["metadata"], res)
|
104
|
+
new_res = list(new_res)
|
105
|
+
new_res.sort(key=lambda x: x["metadata"]["score"], reverse=True)
|
106
|
+
if new_res[0]['data']:
|
107
|
+
c = new_res[0]['data']['task_output']
|
108
|
+
w = new_res[len(new_res)-1]['data']['task_output'] if new_res[len(new_res)-1]['metadata']['score'] < new_res[0]['metadata']['score'] else ""
|
109
|
+
shot_prompt = SHOTS.format(c=c, w=w)
|
110
|
+
|
111
|
+
return shot_prompt
|
112
|
+
|
113
|
+
|
98
114
|
@property
|
99
115
|
def aggregate_score(self) -> float:
|
100
116
|
"""
|
@@ -107,7 +123,7 @@ class Evaluation(BaseModel):
|
|
107
123
|
denominator = 0
|
108
124
|
|
109
125
|
for item in self.items:
|
110
|
-
score_format = item.
|
126
|
+
score_format = item._format_score()
|
111
127
|
aggregate_score += score_format.aggregate if score_format else 0
|
112
128
|
denominator += score_format.weight if score_format else 0
|
113
129
|
|
@@ -120,7 +136,7 @@ class Evaluation(BaseModel):
|
|
120
136
|
@property
|
121
137
|
def suggestion_summary(self) -> str | None:
|
122
138
|
"""
|
123
|
-
|
139
|
+
Returns a summary of the suggestions
|
124
140
|
"""
|
125
141
|
if not self.items:
|
126
142
|
return None
|
versionhq/task/model.py
CHANGED
@@ -14,8 +14,7 @@ from pydantic import UUID4, BaseModel, Field, PrivateAttr, field_validator, mode
|
|
14
14
|
from pydantic_core import PydanticCustomError
|
15
15
|
|
16
16
|
import versionhq as vhq
|
17
|
-
from versionhq.task.
|
18
|
-
from versionhq.task.evaluate import Evaluation, EvaluationItem
|
17
|
+
from versionhq.task.evaluation import Evaluation, EvaluationItem
|
19
18
|
from versionhq.tool.model import Tool, ToolSet
|
20
19
|
from versionhq._utils import process_config, Logger
|
21
20
|
|
@@ -176,14 +175,16 @@ class TaskOutput(BaseModel):
|
|
176
175
|
"""
|
177
176
|
A class to store the final output of the given task in raw (string), json_dict, and pydantic class formats.
|
178
177
|
"""
|
178
|
+
_tokens: int = PrivateAttr(default=0)
|
179
179
|
|
180
180
|
task_id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True, description="store Task ID")
|
181
181
|
raw: str = Field(default="", description="Raw output of the task")
|
182
182
|
json_dict: Dict[str, Any] = Field(default=None, description="`raw` converted to dictionary")
|
183
183
|
pydantic: Optional[Any] = Field(default=None)
|
184
|
-
tool_output: Optional[Any] = Field(default=None, description="
|
185
|
-
callback_output: Optional[Any] = Field(default=None, description="
|
186
|
-
|
184
|
+
tool_output: Optional[Any] = Field(default=None, description="stores tool result when the task takes tool output as its final output")
|
185
|
+
callback_output: Optional[Any] = Field(default=None, description="stores task or agent callback outcome")
|
186
|
+
latency: float = Field(default=None, description="job latency in ms")
|
187
|
+
evaluation: Optional[InstanceOf[Evaluation]] = Field(default=None, description="stores overall evaluation of the task output. stored in ltm")
|
187
188
|
|
188
189
|
|
189
190
|
def to_context_prompt(self) -> str:
|
@@ -206,21 +207,25 @@ class TaskOutput(BaseModel):
|
|
206
207
|
"""
|
207
208
|
Evaluate the output based on the criteria, score each from 0 to 1 scale, and raise suggestions for future improvement.
|
208
209
|
"""
|
209
|
-
from versionhq.task.TEMPLATES.Description import EVALUATE
|
210
|
+
from versionhq.task.TEMPLATES.Description import EVALUATE, SHOTS
|
210
211
|
|
211
212
|
self.evaluation = Evaluation() if not self.evaluation else self.evaluation
|
212
213
|
|
213
|
-
|
214
|
-
|
214
|
+
eval_criteria = task.eval_criteria if task.eval_criteria else ["accuracy", "completeness", "conciseness", ]
|
215
|
+
fsl_prompt = ""
|
215
216
|
|
216
|
-
|
217
|
+
if task.fsls:
|
218
|
+
fsl_prompt = SHOTS.format(c=task.fsls[0], w=task.fsls[1] if len(task.fsls) > 1 else "")
|
219
|
+
else:
|
220
|
+
fsl_prompt = self.evaluation._draft_fsl_prompt(task_description=task.description)
|
217
221
|
|
218
222
|
for item in eval_criteria:
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
)
|
223
|
+
description = EVALUATE.format(task_description=task.description, task_output=self.raw, eval_criteria=str(item))
|
224
|
+
description = description + fsl_prompt if fsl_prompt else description
|
225
|
+
|
226
|
+
task_eval = Task(description=description, pydantic_output=EvaluationItem)
|
223
227
|
res = task_eval.execute(agent=self.evaluation.eval_by)
|
228
|
+
self._tokens += task_eval._tokens
|
224
229
|
|
225
230
|
if res.pydantic:
|
226
231
|
item = EvaluationItem(score=res.pydantic.score, suggestion=res.pydantic.suggestion, criteria=res.pydantic.criteria)
|
@@ -263,7 +268,6 @@ class Task(BaseModel):
|
|
263
268
|
|
264
269
|
__hash__ = object.__hash__
|
265
270
|
_original_description: str = PrivateAttr(default=None)
|
266
|
-
_task_output_handler = TaskOutputStorageHandler()
|
267
271
|
config: Optional[Dict[str, Any]] = Field(default=None, description="values to set on Task class")
|
268
272
|
|
269
273
|
id: UUID4 = Field(default_factory=uuid.uuid4, frozen=True, description="unique identifier for the object, not set by user")
|
@@ -287,15 +291,15 @@ class Task(BaseModel):
|
|
287
291
|
|
288
292
|
# evaluation
|
289
293
|
should_evaluate: bool = Field(default=False, description="True to run the evaluation flow")
|
290
|
-
eval_criteria: Optional[List[str]] = Field(default_factory=list, description="criteria to evaluate the outcome
|
294
|
+
eval_criteria: Optional[List[str]] = Field(default_factory=list, description="stores a list of criteria to evaluate the outcome")
|
295
|
+
fsls: Optional[list[str]] = Field(default=None, description="stores ideal/weak responses")
|
291
296
|
|
292
|
-
# recording
|
293
|
-
|
297
|
+
# recording
|
298
|
+
_tokens: int = 0
|
299
|
+
processed_agents: Set[str] = Field(default_factory=set, description="store keys of the agents that executed the task")
|
294
300
|
tool_errors: int = 0
|
295
301
|
delegations: int = 0
|
296
|
-
|
297
|
-
tokens: int = 0 # tokens consumed
|
298
|
-
output: Optional[TaskOutput] = Field(default=None, description="store the final task output in TaskOutput class")
|
302
|
+
output: Optional[TaskOutput] = Field(default=None, description="store the final TaskOutput object")
|
299
303
|
|
300
304
|
|
301
305
|
@model_validator(mode="before")
|
@@ -553,7 +557,7 @@ Ref. Output image: {output_formats_to_follow}
|
|
553
557
|
task_output=str(task_output.raw),
|
554
558
|
agent=str(agent.role),
|
555
559
|
metadata=memory_metadata
|
556
|
-
|
560
|
+
)
|
557
561
|
|
558
562
|
except AttributeError as e:
|
559
563
|
Logger().log(level="error", message=f"Missing attributes for long term memory: {str(e)}", color="red")
|
@@ -604,10 +608,16 @@ Ref. Output image: {output_formats_to_follow}
|
|
604
608
|
return agent_to_delegate
|
605
609
|
|
606
610
|
|
611
|
+
def _store_logs(self, inputs: Optional[Dict[str, Any]] = {}) -> None:
|
612
|
+
from versionhq.storage.task_output_storage import TaskOutputStorageHandler
|
613
|
+
|
614
|
+
TaskOutputStorageHandler().update(task=self, inputs=inputs)
|
615
|
+
|
616
|
+
|
607
617
|
# task execution
|
608
618
|
def execute(
|
609
619
|
self, type: TaskExecutionType = None, agent: Optional["vhq.Agent"] = None, context: Optional[Any] = None
|
610
|
-
|
620
|
+
) -> TaskOutput | Future[TaskOutput]:
|
611
621
|
"""
|
612
622
|
A main method to handle task execution. Build an agent when the agent is not given.
|
613
623
|
"""
|
@@ -632,27 +642,19 @@ Ref. Output image: {output_formats_to_follow}
|
|
632
642
|
def _execute_async(self, agent, context: Optional[Any] = None) -> Future[TaskOutput]:
|
633
643
|
"""Executes the task asynchronously."""
|
634
644
|
future: Future[TaskOutput] = Future()
|
635
|
-
threading.Thread(daemon=True, target=self._execute_task_async, args=(agent, context, future)).start()
|
636
|
-
return future
|
637
645
|
|
646
|
+
def _handle_task_async(self, agent, context: Optional[str], future: Future[TaskOutput]) -> None:
|
647
|
+
result = self._execute_core(agent, context)
|
648
|
+
future.set_result(result)
|
638
649
|
|
639
|
-
|
640
|
-
|
641
|
-
Executes the task asynchronously with context handling.
|
642
|
-
"""
|
643
|
-
result = self._execute_core(agent, context)
|
644
|
-
future.set_result(result)
|
650
|
+
threading.Thread(daemon=True, target=_handle_task_async, args=(agent, context, future)).start()
|
651
|
+
return future
|
645
652
|
|
646
653
|
|
647
654
|
def _execute_core(self, agent, context: Optional[Any]) -> TaskOutput:
|
648
655
|
"""
|
649
|
-
A core method
|
650
|
-
Handles 1. agent delegation, 2. tools, 3. context to add to the prompt, and 4. callbacks.
|
656
|
+
A core method to execute a task.
|
651
657
|
"""
|
652
|
-
|
653
|
-
from versionhq.agent.model import Agent
|
654
|
-
from versionhq.agent_network.model import AgentNetwork
|
655
|
-
|
656
658
|
task_output: InstanceOf[TaskOutput] = None
|
657
659
|
raw_output: str = None
|
658
660
|
tool_output: str | list = None
|
@@ -669,12 +671,12 @@ Ref. Output image: {output_formats_to_follow}
|
|
669
671
|
agent = agent_to_delegate
|
670
672
|
self.delegations += 1
|
671
673
|
|
672
|
-
|
673
674
|
if self.tool_res_as_final == True:
|
674
675
|
started_at = datetime.datetime.now()
|
675
676
|
tool_output = agent.execute_task(task=self, context=context, task_tools=task_tools)
|
677
|
+
raw_output = str(tool_output) if tool_output else ""
|
676
678
|
ended_at = datetime.datetime.now()
|
677
|
-
task_output = TaskOutput(task_id=self.id, tool_output=tool_output, raw=
|
679
|
+
task_output = TaskOutput(task_id=self.id, tool_output=tool_output, raw=raw_output)
|
678
680
|
|
679
681
|
else:
|
680
682
|
started_at = datetime.datetime.now()
|
@@ -691,26 +693,13 @@ Ref. Output image: {output_formats_to_follow}
|
|
691
693
|
task_id=self.id,
|
692
694
|
raw=raw_output if raw_output is not None else "",
|
693
695
|
pydantic=pydantic_output,
|
694
|
-
json_dict=json_dict_output
|
696
|
+
json_dict=json_dict_output,
|
695
697
|
)
|
696
698
|
|
697
|
-
|
698
|
-
task_output.
|
699
|
+
task_output.latency = round((ended_at - started_at).total_seconds() * 1000, 3)
|
700
|
+
task_output._tokens = self._tokens
|
699
701
|
self.output = task_output
|
700
|
-
self.processed_agents.add(agent.
|
701
|
-
|
702
|
-
if self.should_evaluate and raw_output: # eval only when raw output exsits
|
703
|
-
task_output.evaluate(task=self)
|
704
|
-
|
705
|
-
self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
|
706
|
-
|
707
|
-
if self.callback and isinstance(self.callback, Callable):
|
708
|
-
kwargs = { **self.callback_kwargs, **task_output.json_dict }
|
709
|
-
sig = inspect.signature(self.callback)
|
710
|
-
valid_keys = [param.name for param in sig.parameters.values() if param.kind == param.POSITIONAL_OR_KEYWORD]
|
711
|
-
valid_kwargs = { k: kwargs[k] if k in kwargs else None for k in valid_keys }
|
712
|
-
callback_res = self.callback(**valid_kwargs)
|
713
|
-
task_output.callback_output = callback_res
|
702
|
+
self.processed_agents.add(agent.key)
|
714
703
|
|
715
704
|
# if self.output_file: ## disabled for now
|
716
705
|
# content = (
|
@@ -719,15 +708,25 @@ Ref. Output image: {output_formats_to_follow}
|
|
719
708
|
# else pydantic_output.model_dump_json() if pydantic_output else result
|
720
709
|
# )
|
721
710
|
# self._save_file(content)
|
722
|
-
return task_output
|
723
711
|
|
724
712
|
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
713
|
+
# successful output will be evaluated and stored in the logs
|
714
|
+
if raw_output:
|
715
|
+
if self.should_evaluate:
|
716
|
+
task_output.evaluate(task=self)
|
717
|
+
self._create_short_and_long_term_memories(agent=agent, task_output=task_output)
|
729
718
|
|
730
|
-
|
719
|
+
if self.callback and isinstance(self.callback, Callable):
|
720
|
+
kwargs = { **self.callback_kwargs, **task_output.json_dict }
|
721
|
+
sig = inspect.signature(self.callback)
|
722
|
+
valid_keys = [param.name for param in sig.parameters.values() if param.kind == param.POSITIONAL_OR_KEYWORD]
|
723
|
+
valid_kwargs = { k: kwargs[k] if k in kwargs else None for k in valid_keys }
|
724
|
+
callback_res = self.callback(**valid_kwargs)
|
725
|
+
task_output.callback_output = callback_res
|
726
|
+
|
727
|
+
self._store_logs()
|
728
|
+
|
729
|
+
return task_output
|
731
730
|
|
732
731
|
|
733
732
|
@property
|
versionhq/task_graph/draft.py
CHANGED
@@ -90,7 +90,7 @@ def workflow(final_output: Type[BaseModel], context: Any = None, human: bool = F
|
|
90
90
|
target = [v for v in task_graph.nodes.values() if v.task.name == target_task_name][0]
|
91
91
|
dependency_type = dependency_types[i]
|
92
92
|
task_graph.add_dependency(
|
93
|
-
|
93
|
+
source=source.identifier, target=target.identifier, dependency_type=dependency_type)
|
94
94
|
|
95
95
|
|
96
96
|
task_graph.visualize()
|