unique_toolkit 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +51 -17
- {unique_toolkit-1.4.0.dist-info → unique_toolkit-1.4.1.dist-info}/METADATA +4 -1
- {unique_toolkit-1.4.0.dist-info → unique_toolkit-1.4.1.dist-info}/RECORD +5 -5
- {unique_toolkit-1.4.0.dist-info → unique_toolkit-1.4.1.dist-info}/LICENSE +0 -0
- {unique_toolkit-1.4.0.dist-info → unique_toolkit-1.4.1.dist-info}/WHEEL +0 -0
@@ -31,6 +31,9 @@ class _SubAgentToolInfo(TypedDict):
|
|
31
31
|
display_name: str
|
32
32
|
|
33
33
|
|
34
|
+
NO_ASSESSMENTS_FOUND = "NO_ASSESSMENTS_FOUND"
|
35
|
+
|
36
|
+
|
34
37
|
class SubAgentsEvaluation(Evaluation):
|
35
38
|
DISPLAY_NAME = "Sub Agents"
|
36
39
|
|
@@ -68,6 +71,7 @@ class SubAgentsEvaluation(Evaluation):
|
|
68
71
|
|
69
72
|
value = ChatMessageAssessmentLabel.GREEN
|
70
73
|
|
74
|
+
# Use a dict in order to compare labels (RED being the worst)
|
71
75
|
label_comparison_dict = defaultdict(
|
72
76
|
lambda: 3
|
73
77
|
) # Unkown labels are highest in the sorting
|
@@ -76,33 +80,56 @@ class SubAgentsEvaluation(Evaluation):
|
|
76
80
|
label_comparison_dict[ChatMessageAssessmentLabel.RED] = 0
|
77
81
|
|
78
82
|
for assistant_id, tool_info in self._assistant_id_to_tool_info.items():
|
79
|
-
assessments = tool_info["assessment"]
|
80
|
-
|
81
|
-
|
83
|
+
assessments = tool_info["assessment"] or []
|
84
|
+
valid_assessments = []
|
85
|
+
for assessment in assessments:
|
86
|
+
if (
|
87
|
+
assessment["label"] is None
|
88
|
+
or assessment["label"] not in ChatMessageAssessmentLabel
|
89
|
+
):
|
90
|
+
logger.warning(
|
91
|
+
"Unkown assistant label %s for assistant %s will be ignored",
|
92
|
+
assessment["label"],
|
93
|
+
assistant_id,
|
94
|
+
)
|
95
|
+
continue
|
96
|
+
if assessment["status"] != ChatMessageAssessmentStatus.DONE:
|
97
|
+
logger.warning(
|
98
|
+
"Assessment %s for assistant %s is not done (status: %s) will be ignored",
|
99
|
+
assessment["label"],
|
100
|
+
assistant_id,
|
101
|
+
)
|
102
|
+
continue
|
103
|
+
valid_assessments.append(assessment)
|
104
|
+
|
105
|
+
if len(valid_assessments) == 0:
|
106
|
+
logger.info("No valid assessment found for assistant %s", assistant_id)
|
82
107
|
continue
|
83
108
|
|
84
|
-
|
85
|
-
|
109
|
+
assessments = sorted(
|
110
|
+
valid_assessments, key=lambda x: label_comparison_dict[x["label"]]
|
86
111
|
)
|
87
112
|
|
88
113
|
for assessment in assessments:
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
"Unkown assistant label %s for assistant %s will be ignored",
|
93
|
-
label,
|
94
|
-
assistant_id,
|
95
|
-
)
|
96
|
-
continue
|
97
|
-
value = min(value, label, key=lambda x: label_comparison_dict[x])
|
114
|
+
value = min(
|
115
|
+
value, assessment["label"], key=lambda x: label_comparison_dict[x]
|
116
|
+
)
|
98
117
|
|
99
118
|
sub_agents_display_data.append(
|
100
119
|
{
|
101
120
|
"name": tool_info["display_name"],
|
102
|
-
"assessments":
|
121
|
+
"assessments": assessments,
|
103
122
|
}
|
104
123
|
)
|
105
124
|
|
125
|
+
if len(sub_agents_display_data) == 0:
|
126
|
+
logger.warning("No valid sub agent assessments found")
|
127
|
+
return EvaluationMetricResult(
|
128
|
+
name=self.get_name(),
|
129
|
+
value=NO_ASSESSMENTS_FOUND,
|
130
|
+
reason="No sub agents assessments found",
|
131
|
+
)
|
132
|
+
|
106
133
|
should_summarize = False
|
107
134
|
reason = ""
|
108
135
|
|
@@ -115,8 +142,6 @@ class SubAgentsEvaluation(Evaluation):
|
|
115
142
|
reason = (
|
116
143
|
sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
|
117
144
|
)
|
118
|
-
else:
|
119
|
-
assert False, "No sub agents assessments found"
|
120
145
|
|
121
146
|
if should_summarize:
|
122
147
|
messages = (
|
@@ -148,6 +173,15 @@ class SubAgentsEvaluation(Evaluation):
|
|
148
173
|
async def evaluation_metric_to_assessment(
|
149
174
|
self, evaluation_result: EvaluationMetricResult
|
150
175
|
) -> EvaluationAssessmentMessage:
|
176
|
+
if evaluation_result.value == NO_ASSESSMENTS_FOUND:
|
177
|
+
return EvaluationAssessmentMessage(
|
178
|
+
status=ChatMessageAssessmentStatus.DONE,
|
179
|
+
explanation="No valid sub agents assessments found to consolidate.",
|
180
|
+
title=self.DISPLAY_NAME,
|
181
|
+
label=ChatMessageAssessmentLabel.GREEN,
|
182
|
+
type=self.get_assessment_type(),
|
183
|
+
)
|
184
|
+
|
151
185
|
return EvaluationAssessmentMessage(
|
152
186
|
status=ChatMessageAssessmentStatus.DONE,
|
153
187
|
explanation=evaluation_result.reason,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: unique_toolkit
|
3
|
-
Version: 1.4.
|
3
|
+
Version: 1.4.1
|
4
4
|
Summary:
|
5
5
|
License: Proprietary
|
6
6
|
Author: Cedric Klinkert
|
@@ -118,6 +118,9 @@ All notable changes to this project will be documented in this file.
|
|
118
118
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
119
119
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
120
120
|
|
121
|
+
## [1.4.1] - 2025-09-30
|
122
|
+
- Handle sub agent failed assessments better in sub agent evaluator.
|
123
|
+
|
121
124
|
## [1.4.0] - 2025-09-29
|
122
125
|
- Add ability to consolidate sub agent's assessments.
|
123
126
|
|
@@ -54,7 +54,7 @@ unique_toolkit/agentic/tools/a2a/__init__.py,sha256=NdY0J33b1G4sbx6UWwNS74JVSAeE
|
|
54
54
|
unique_toolkit/agentic/tools/a2a/config.py,sha256=exKyR-RyQ3RDJcEAKwfOdyj1flfbBaRhcdn5ROnmNB4,1513
|
55
55
|
unique_toolkit/agentic/tools/a2a/evaluation/__init__.py,sha256=H9YhT22w8EadV9b-6IDqYqKQa41qcA3m6ADzmP7g6Cc,246
|
56
56
|
unique_toolkit/agentic/tools/a2a/evaluation/config.py,sha256=o1Xj2H4175C1ALT8-wIfks69Xez3pgY77PFyPBYS4Hs,1692
|
57
|
-
unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=
|
57
|
+
unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=V3y72yAZ5ynDnzp8V7UxMpnwa1Xyw7gvjPqfgHCeMkU,7660
|
58
58
|
unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2,sha256=acP1YqD_sCy6DT0V2EIfhQTmaUKeqpeWNJ7RGgceo8I,271
|
59
59
|
unique_toolkit/agentic/tools/a2a/manager.py,sha256=yuuQuBrAcsT3gAWEdxf6EvRnL_iWtvaK14lRs21w5PA,1665
|
60
60
|
unique_toolkit/agentic/tools/a2a/memory.py,sha256=4VFBzITCv5E_8YCc4iF4Y6FhzplS2C-FZaZHdeC7DyA,1028
|
@@ -137,7 +137,7 @@ unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJ
|
|
137
137
|
unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBuE9sI2o9Aajqjxg,8884
|
138
138
|
unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
139
|
unique_toolkit/smart_rules/compile.py,sha256=cxWjb2dxEI2HGsakKdVCkSNi7VK9mr08w5sDcFCQyWI,9553
|
140
|
-
unique_toolkit-1.4.
|
141
|
-
unique_toolkit-1.4.
|
142
|
-
unique_toolkit-1.4.
|
143
|
-
unique_toolkit-1.4.
|
140
|
+
unique_toolkit-1.4.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
|
141
|
+
unique_toolkit-1.4.1.dist-info/METADATA,sha256=XwdO5BNQidG6zSM_lKxs4SZQYeALlNREgBWrygFFw7o,33949
|
142
|
+
unique_toolkit-1.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
143
|
+
unique_toolkit-1.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|