unique_toolkit 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,9 @@ class _SubAgentToolInfo(TypedDict):
31
31
  display_name: str
32
32
 
33
33
 
34
+ NO_ASSESSMENTS_FOUND = "NO_ASSESSMENTS_FOUND"
35
+
36
+
34
37
  class SubAgentsEvaluation(Evaluation):
35
38
  DISPLAY_NAME = "Sub Agents"
36
39
 
@@ -68,6 +71,7 @@ class SubAgentsEvaluation(Evaluation):
68
71
 
69
72
  value = ChatMessageAssessmentLabel.GREEN
70
73
 
74
+ # Use a dict in order to compare labels (RED being the worst)
71
75
  label_comparison_dict = defaultdict(
72
76
  lambda: 3
73
77
  ) # Unkown labels are highest in the sorting
@@ -76,33 +80,56 @@ class SubAgentsEvaluation(Evaluation):
76
80
  label_comparison_dict[ChatMessageAssessmentLabel.RED] = 0
77
81
 
78
82
  for assistant_id, tool_info in self._assistant_id_to_tool_info.items():
79
- assessments = tool_info["assessment"]
80
- if assessments is None or len(assessments) == 0:
81
- logger.info("No assessment found for assistant %s", assistant_id)
83
+ assessments = tool_info["assessment"] or []
84
+ valid_assessments = []
85
+ for assessment in assessments:
86
+ if (
87
+ assessment["label"] is None
88
+ or assessment["label"] not in ChatMessageAssessmentLabel
89
+ ):
90
+ logger.warning(
91
+ "Unkown assistant label %s for assistant %s will be ignored",
92
+ assessment["label"],
93
+ assistant_id,
94
+ )
95
+ continue
96
+ if assessment["status"] != ChatMessageAssessmentStatus.DONE:
97
+ logger.warning(
98
+ "Assessment %s for assistant %s is not done (status: %s) will be ignored",
99
+ assessment["label"],
100
+ assistant_id,
101
+ )
102
+ continue
103
+ valid_assessments.append(assessment)
104
+
105
+ if len(valid_assessments) == 0:
106
+ logger.info("No valid assessment found for assistant %s", assistant_id)
82
107
  continue
83
108
 
84
- assessments_display_data = sorted(
85
- assessments, key=lambda x: label_comparison_dict[x["label"]]
109
+ assessments = sorted(
110
+ valid_assessments, key=lambda x: label_comparison_dict[x["label"]]
86
111
  )
87
112
 
88
113
  for assessment in assessments:
89
- if label := assessment["label"]:
90
- if label not in ChatMessageAssessmentLabel:
91
- logger.warning(
92
- "Unkown assistant label %s for assistant %s will be ignored",
93
- label,
94
- assistant_id,
95
- )
96
- continue
97
- value = min(value, label, key=lambda x: label_comparison_dict[x])
114
+ value = min(
115
+ value, assessment["label"], key=lambda x: label_comparison_dict[x]
116
+ )
98
117
 
99
118
  sub_agents_display_data.append(
100
119
  {
101
120
  "name": tool_info["display_name"],
102
- "assessments": assessments_display_data,
121
+ "assessments": assessments,
103
122
  }
104
123
  )
105
124
 
125
+ if len(sub_agents_display_data) == 0:
126
+ logger.warning("No valid sub agent assessments found")
127
+ return EvaluationMetricResult(
128
+ name=self.get_name(),
129
+ value=NO_ASSESSMENTS_FOUND,
130
+ reason="No sub agents assessments found",
131
+ )
132
+
106
133
  should_summarize = False
107
134
  reason = ""
108
135
 
@@ -115,8 +142,6 @@ class SubAgentsEvaluation(Evaluation):
115
142
  reason = (
116
143
  sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
117
144
  )
118
- else:
119
- assert False, "No sub agents assessments found"
120
145
 
121
146
  if should_summarize:
122
147
  messages = (
@@ -148,6 +173,15 @@ class SubAgentsEvaluation(Evaluation):
148
173
  async def evaluation_metric_to_assessment(
149
174
  self, evaluation_result: EvaluationMetricResult
150
175
  ) -> EvaluationAssessmentMessage:
176
+ if evaluation_result.value == NO_ASSESSMENTS_FOUND:
177
+ return EvaluationAssessmentMessage(
178
+ status=ChatMessageAssessmentStatus.DONE,
179
+ explanation="No valid sub agents assessments found to consolidate.",
180
+ title=self.DISPLAY_NAME,
181
+ label=ChatMessageAssessmentLabel.GREEN,
182
+ type=self.get_assessment_type(),
183
+ )
184
+
151
185
  return EvaluationAssessmentMessage(
152
186
  status=ChatMessageAssessmentStatus.DONE,
153
187
  explanation=evaluation_result.reason,
@@ -37,7 +37,9 @@ class LanguageModelName(StrEnum):
37
37
  "litellm:anthropic-claude-3-7-sonnet-thinking"
38
38
  )
39
39
  ANTHROPIC_CLAUDE_SONNET_4 = "litellm:anthropic-claude-sonnet-4"
40
+ ANTHROPIC_CLAUDE_SONNET_4_5 = "litellm:anthropic-claude-sonnet-4-5"
40
41
  ANTHROPIC_CLAUDE_OPUS_4 = "litellm:anthropic-claude-opus-4"
42
+ ANTHROPIC_CLAUDE_OPUS_4_1 = "litellm:anthropic-claude-opus-4-1"
41
43
  GEMINI_2_0_FLASH = "litellm:gemini-2-0-flash"
42
44
  GEMINI_2_5_FLASH = "litellm:gemini-2-5-flash"
43
45
  GEMINI_2_5_FLASH_LITE_PREVIEW_0617 = "litellm:gemini-2-5-flash-lite-preview-06-17"
@@ -663,6 +665,26 @@ class LanguageModelInfo(BaseModel):
663
665
  info_cutoff_at=date(2025, 3, 1),
664
666
  published_at=date(2025, 5, 1),
665
667
  )
668
+ case LanguageModelName.ANTHROPIC_CLAUDE_SONNET_4_5:
669
+ return cls(
670
+ name=model_name,
671
+ capabilities=[
672
+ ModelCapabilities.FUNCTION_CALLING,
673
+ ModelCapabilities.STREAMING,
674
+ ModelCapabilities.VISION,
675
+ ModelCapabilities.REASONING,
676
+ ],
677
+ provider=LanguageModelProvider.LITELLM,
678
+ version="claude-sonnet-4-5",
679
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
680
+ token_limits=LanguageModelTokenLimits(
681
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
682
+ token_limit_input=200_000,
683
+ token_limit_output=64_000,
684
+ ),
685
+ info_cutoff_at=date(2025, 7, 1),
686
+ published_at=date(2025, 9, 29),
687
+ )
666
688
  case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4:
667
689
  return cls(
668
690
  name=model_name,
@@ -683,6 +705,25 @@ class LanguageModelInfo(BaseModel):
683
705
  info_cutoff_at=date(2025, 3, 1),
684
706
  published_at=date(2025, 5, 1),
685
707
  )
708
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4_1:
709
+ return cls(
710
+ name=model_name,
711
+ capabilities=[
712
+ ModelCapabilities.FUNCTION_CALLING,
713
+ ModelCapabilities.STREAMING,
714
+ ModelCapabilities.VISION,
715
+ ModelCapabilities.REASONING,
716
+ ],
717
+ provider=LanguageModelProvider.LITELLM,
718
+ version="claude-opus-4",
719
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
720
+ token_limits=LanguageModelTokenLimits(
721
+ token_limit_input=200_000,
722
+ token_limit_output=32_000,
723
+ ),
724
+ info_cutoff_at=date(2025, 3, 1),
725
+ published_at=date(2025, 5, 1),
726
+ )
686
727
  case LanguageModelName.GEMINI_2_0_FLASH:
687
728
  return cls(
688
729
  name=model_name,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -118,6 +118,12 @@ All notable changes to this project will be documented in this file.
118
118
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
119
119
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
120
120
 
121
+ ## [1.4.2] - 2025-09-30
122
+ - Adding litellm models `litellm:anthropic-claude-sonnet-4-5` and `litellm:anthropic-claude-opus-4-1`
123
+
124
+ ## [1.4.1] - 2025-09-30
125
+ - Handle sub agent failed assessments better in sub agent evaluator.
126
+
121
127
  ## [1.4.0] - 2025-09-29
122
128
  - Add ability to consolidate sub agent's assessments.
123
129
 
@@ -54,7 +54,7 @@ unique_toolkit/agentic/tools/a2a/__init__.py,sha256=NdY0J33b1G4sbx6UWwNS74JVSAeE
54
54
  unique_toolkit/agentic/tools/a2a/config.py,sha256=exKyR-RyQ3RDJcEAKwfOdyj1flfbBaRhcdn5ROnmNB4,1513
55
55
  unique_toolkit/agentic/tools/a2a/evaluation/__init__.py,sha256=H9YhT22w8EadV9b-6IDqYqKQa41qcA3m6ADzmP7g6Cc,246
56
56
  unique_toolkit/agentic/tools/a2a/evaluation/config.py,sha256=o1Xj2H4175C1ALT8-wIfks69Xez3pgY77PFyPBYS4Hs,1692
57
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=1yg2I72ke3BefXwvzLdS74CKVCnUXTvz7ZlQkQR9Ttw,6351
57
+ unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py,sha256=V3y72yAZ5ynDnzp8V7UxMpnwa1Xyw7gvjPqfgHCeMkU,7660
58
58
  unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2,sha256=acP1YqD_sCy6DT0V2EIfhQTmaUKeqpeWNJ7RGgceo8I,271
59
59
  unique_toolkit/agentic/tools/a2a/manager.py,sha256=yuuQuBrAcsT3gAWEdxf6EvRnL_iWtvaK14lRs21w5PA,1665
60
60
  unique_toolkit/agentic/tools/a2a/memory.py,sha256=4VFBzITCv5E_8YCc4iF4Y6FhzplS2C-FZaZHdeC7DyA,1028
@@ -123,7 +123,7 @@ unique_toolkit/language_model/__init__.py,sha256=lRQyLlbwHbNFf4-0foBU13UGb09lwEe
123
123
  unique_toolkit/language_model/builder.py,sha256=4OKfwJfj3TrgO1ezc_ewIue6W7BCQ2ZYQXUckWVPPTA,3369
124
124
  unique_toolkit/language_model/constants.py,sha256=B-topqW0r83dkC_25DeQfnPk3n53qzIHUCBS7YJ0-1U,119
125
125
  unique_toolkit/language_model/functions.py,sha256=PNCmbYovhgMSkY89p7-3DunG6jIekaZPvhh3iplG1Vg,16720
126
- unique_toolkit/language_model/infos.py,sha256=eHln--Y5f6znFxknV6A8m-fRaEpH5-kmRh9m-ZWqco4,57188
126
+ unique_toolkit/language_model/infos.py,sha256=jc53AfqUyhgDRSK-nIK2S8d1RsedTNyyE_QQgGG_RFk,59256
127
127
  unique_toolkit/language_model/prompt.py,sha256=JSawaLjQg3VR-E2fK8engFyJnNdk21zaO8pPIodzN4Q,3991
128
128
  unique_toolkit/language_model/reference.py,sha256=nkX2VFz-IrUz8yqyc3G5jUMNwrNpxITBrMEKkbqqYoI,8583
129
129
  unique_toolkit/language_model/schemas.py,sha256=w23zH2OAYkTsS-wAqelUdhO9TCgis0TbFa8PszmhZYY,16501
@@ -137,7 +137,7 @@ unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJ
137
137
  unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBuE9sI2o9Aajqjxg,8884
138
138
  unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
139
  unique_toolkit/smart_rules/compile.py,sha256=cxWjb2dxEI2HGsakKdVCkSNi7VK9mr08w5sDcFCQyWI,9553
140
- unique_toolkit-1.4.0.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
141
- unique_toolkit-1.4.0.dist-info/METADATA,sha256=esZSnEGEAT6jSQPUb614RktEvSQWrf5O9ZLBiStfdyM,33855
142
- unique_toolkit-1.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
143
- unique_toolkit-1.4.0.dist-info/RECORD,,
140
+ unique_toolkit-1.4.2.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
141
+ unique_toolkit-1.4.2.dist-info/METADATA,sha256=aIA-PciV6Tv8mVFR3uJFxGXqFAQ_-mtJ8-1X-he1ehQ,34076
142
+ unique_toolkit-1.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
143
+ unique_toolkit-1.4.2.dist-info/RECORD,,