unique_toolkit 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,9 @@ class _SubAgentToolInfo(TypedDict):
31
31
  display_name: str
32
32
 
33
33
 
34
+ NO_ASSESSMENTS_FOUND = "NO_ASSESSMENTS_FOUND"
35
+
36
+
34
37
  class SubAgentsEvaluation(Evaluation):
35
38
  DISPLAY_NAME = "Sub Agents"
36
39
 
@@ -68,6 +71,7 @@ class SubAgentsEvaluation(Evaluation):
68
71
 
69
72
  value = ChatMessageAssessmentLabel.GREEN
70
73
 
74
+ # Use a dict in order to compare labels (RED being the worst)
71
75
  label_comparison_dict = defaultdict(
72
76
  lambda: 3
73
77
  ) # Unkown labels are highest in the sorting
@@ -76,33 +80,56 @@ class SubAgentsEvaluation(Evaluation):
76
80
  label_comparison_dict[ChatMessageAssessmentLabel.RED] = 0
77
81
 
78
82
  for assistant_id, tool_info in self._assistant_id_to_tool_info.items():
79
- assessments = tool_info["assessment"]
80
- if assessments is None or len(assessments) == 0:
81
- logger.info("No assessment found for assistant %s", assistant_id)
83
+ assessments = tool_info["assessment"] or []
84
+ valid_assessments = []
85
+ for assessment in assessments:
86
+ if (
87
+ assessment["label"] is None
88
+ or assessment["label"] not in ChatMessageAssessmentLabel
89
+ ):
90
+ logger.warning(
91
+ "Unkown assistant label %s for assistant %s will be ignored",
92
+ assessment["label"],
93
+ assistant_id,
94
+ )
95
+ continue
96
+ if assessment["status"] != ChatMessageAssessmentStatus.DONE:
97
+ logger.warning(
98
+ "Assessment %s for assistant %s is not done (status: %s) will be ignored",
99
+ assessment["label"],
100
+ assistant_id,
101
+ )
102
+ continue
103
+ valid_assessments.append(assessment)
104
+
105
+ if len(valid_assessments) == 0:
106
+ logger.info("No valid assessment found for assistant %s", assistant_id)
82
107
  continue
83
108
 
84
- assessments_display_data = sorted(
85
- assessments, key=lambda x: label_comparison_dict[x["label"]]
109
+ assessments = sorted(
110
+ valid_assessments, key=lambda x: label_comparison_dict[x["label"]]
86
111
  )
87
112
 
88
113
  for assessment in assessments:
89
- if label := assessment["label"]:
90
- if label not in ChatMessageAssessmentLabel:
91
- logger.warning(
92
- "Unkown assistant label %s for assistant %s will be ignored",
93
- label,
94
- assistant_id,
95
- )
96
- continue
97
- value = min(value, label, key=lambda x: label_comparison_dict[x])
114
+ value = min(
115
+ value, assessment["label"], key=lambda x: label_comparison_dict[x]
116
+ )
98
117
 
99
118
  sub_agents_display_data.append(
100
119
  {
101
120
  "name": tool_info["display_name"],
102
- "assessments": assessments_display_data,
121
+ "assessments": assessments,
103
122
  }
104
123
  )
105
124
 
125
+ if len(sub_agents_display_data) == 0:
126
+ logger.warning("No valid sub agent assessments found")
127
+ return EvaluationMetricResult(
128
+ name=self.get_name(),
129
+ value=NO_ASSESSMENTS_FOUND,
130
+ reason="No sub agents assessments found",
131
+ )
132
+
106
133
  should_summarize = False
107
134
  reason = ""
108
135
 
@@ -115,8 +142,6 @@ class SubAgentsEvaluation(Evaluation):
115
142
  reason = (
116
143
  sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
117
144
  )
118
- else:
119
- assert False, "No sub agents assessments found"
120
145
 
121
146
  if should_summarize:
122
147
  messages = (
@@ -148,6 +173,15 @@ class SubAgentsEvaluation(Evaluation):
148
173
  async def evaluation_metric_to_assessment(
149
174
  self, evaluation_result: EvaluationMetricResult
150
175
  ) -> EvaluationAssessmentMessage:
176
+ if evaluation_result.value == NO_ASSESSMENTS_FOUND:
177
+ return EvaluationAssessmentMessage(
178
+ status=ChatMessageAssessmentStatus.DONE,
179
+ explanation="No valid sub agents assessments found to consolidate.",
180
+ title=self.DISPLAY_NAME,
181
+ label=ChatMessageAssessmentLabel.GREEN,
182
+ type=self.get_assessment_type(),
183
+ )
184
+
151
185
  return EvaluationAssessmentMessage(
152
186
  status=ChatMessageAssessmentStatus.DONE,
153
187
  explanation=evaluation_result.reason,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.4.0
3
+ Version: 1.4.1
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -118,6 +118,9 @@ All notable changes to this project will be documented in this file.
118
118
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
119
119
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
120
120
 
121
+ ## [1.4.1] - 2025-09-30
122
+ - Handle sub agent failed assessments better in sub agent evaluator.
123
+
121
124
  ## [1.4.0] - 2025-09-29
122
125
  - Add ability to consolidate sub agent's assessments.
123
126
 
@@ -54,7 +54,7 @@ unique_toolkit/agentic/tools/a2a/__init__.py,sha256=NdY0J33b1G4sbx6UWwNS74JVSAeE
54
54
  unique_toolkit/agentic/tools/a2a/config.py,sha256=exKyR-RyQ3RDJcEAKwfOdyj1flfbBaRhcdn5ROnmNB4,1513
55
55
  unique_toolkit/agentic/tools/a2a/evaluation/__init__.py,sha256=H9YhT22w8EadV9b-6IDqYqKQa41qcA3m6ADzmP7g6Cc,246
56
56
  unique_toolkit/agentic/tools/a2a/evaluation/config.py,sha256=o1Xj2H4175C1ALT8-wIfks69Xez3pgY77PFyPBYS4Hs,1692
57
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=1yg2I72ke3BefXwvzLdS74CKVCnUXTvz7ZlQkQR9Ttw,6351
57
+ unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=V3y72yAZ5ynDnzp8V7UxMpnwa1Xyw7gvjPqfgHCeMkU,7660
58
58
  unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2,sha256=acP1YqD_sCy6DT0V2EIfhQTmaUKeqpeWNJ7RGgceo8I,271
59
59
  unique_toolkit/agentic/tools/a2a/manager.py,sha256=yuuQuBrAcsT3gAWEdxf6EvRnL_iWtvaK14lRs21w5PA,1665
60
60
  unique_toolkit/agentic/tools/a2a/memory.py,sha256=4VFBzITCv5E_8YCc4iF4Y6FhzplS2C-FZaZHdeC7DyA,1028
@@ -137,7 +137,7 @@ unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJ
137
137
  unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBuE9sI2o9Aajqjxg,8884
138
138
  unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
139
  unique_toolkit/smart_rules/compile.py,sha256=cxWjb2dxEI2HGsakKdVCkSNi7VK9mr08w5sDcFCQyWI,9553
140
- unique_toolkit-1.4.0.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
141
- unique_toolkit-1.4.0.dist-info/METADATA,sha256=esZSnEGEAT6jSQPUb614RktEvSQWrf5O9ZLBiStfdyM,33855
142
- unique_toolkit-1.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
143
- unique_toolkit-1.4.0.dist-info/RECORD,,
140
+ unique_toolkit-1.4.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
141
+ unique_toolkit-1.4.1.dist-info/METADATA,sha256=XwdO5BNQidG6zSM_lKxs4SZQYeALlNREgBWrygFFw7o,33949
142
+ unique_toolkit-1.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
143
+ unique_toolkit-1.4.1.dist-info/RECORD,,