unique_toolkit 0.8.23__py3-none-any.whl → 0.8.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,11 +78,9 @@ class EvaluationManager:
78
78
  self,
79
79
  logger: Logger,
80
80
  chat_service: ChatService,
81
- assistant_message_id: str,
82
81
  ):
83
82
  self._logger = logger
84
83
  self._chat_service = chat_service
85
- self._assistant_message_id = assistant_message_id
86
84
  self._evaluations: dict[EvaluationMetricName, Evaluation] = {}
87
85
  self._evaluation_passed: bool = True
88
86
 
@@ -96,6 +94,7 @@ class EvaluationManager:
96
94
  self,
97
95
  selected_evaluation_names: list[EvaluationMetricName],
98
96
  loop_response: LanguageModelStreamResponse,
97
+ assistant_message_id: str,
99
98
  ) -> list[EvaluationMetricResult]:
100
99
  task_executor = SafeTaskExecutor(
101
100
  logger=self._logger,
@@ -106,6 +105,7 @@ class EvaluationManager:
106
105
  self.execute_evaluation_call,
107
106
  loop_response=loop_response,
108
107
  evaluation_name=evaluation_name,
108
+ assistant_message_id=assistant_message_id,
109
109
  )
110
110
  for evaluation_name in selected_evaluation_names
111
111
  ]
@@ -126,6 +126,7 @@ class EvaluationManager:
126
126
  self,
127
127
  evaluation_name: EvaluationMetricName,
128
128
  loop_response: LanguageModelStreamResponse,
129
+ assistant_message_id: str,
129
130
  ) -> EvaluationMetricResult:
130
131
  self._logger.info(f"Processing tool call: {evaluation_name}")
131
132
 
@@ -133,13 +134,15 @@ class EvaluationManager:
133
134
 
134
135
  if evaluation_instance:
135
136
  # Execute the evaluation
136
- await self._create_assistant_message(evaluation_instance)
137
+ await self._create_assistant_message(
138
+ evaluation_instance, assistant_message_id
139
+ )
137
140
  evaluation_metric_result: EvaluationMetricResult = (
138
141
  await evaluation_instance.run(loop_response)
139
142
  )
140
143
  # show results to the user
141
144
  await self._show_message_assessment(
142
- evaluation_instance, evaluation_metric_result
145
+ evaluation_instance, evaluation_metric_result, assistant_message_id
143
146
  )
144
147
 
145
148
  return evaluation_metric_result
@@ -182,6 +185,7 @@ class EvaluationManager:
182
185
  self,
183
186
  evaluation_instance: Evaluation,
184
187
  evaluation_metric_result: EvaluationMetricResult,
188
+ assistant_message_id: str,
185
189
  ) -> None:
186
190
  evaluation_assessment_message = (
187
191
  await evaluation_instance.evaluation_metric_to_assessment(
@@ -189,7 +193,7 @@ class EvaluationManager:
189
193
  )
190
194
  )
191
195
  await self._chat_service.modify_message_assessment_async(
192
- assistant_message_id=self._assistant_message_id,
196
+ assistant_message_id=assistant_message_id,
193
197
  status=evaluation_assessment_message.status,
194
198
  title=evaluation_assessment_message.title,
195
199
  explanation=evaluation_assessment_message.explanation,
@@ -197,9 +201,11 @@ class EvaluationManager:
197
201
  type=evaluation_assessment_message.type,
198
202
  )
199
203
 
200
- async def _create_assistant_message(self, evaluation_instance: Evaluation):
204
+ async def _create_assistant_message(
205
+ self, evaluation_instance: Evaluation, assistant_message_id: str
206
+ ):
201
207
  await self._chat_service.create_message_assessment_async(
202
- assistant_message_id=self._assistant_message_id,
208
+ assistant_message_id=assistant_message_id,
203
209
  status=ChatMessageAssessmentStatus.PENDING,
204
210
  type=evaluation_instance.get_assessment_type(),
205
211
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.8.23
3
+ Version: 0.8.24
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -114,9 +114,13 @@ All notable changes to this project will be documented in this file.
114
114
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
115
115
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
116
116
 
117
+ ## [0.8.24] - 2025-08-25
118
+ - Optimized hallucination manager
119
+
117
120
  ## [0.8.23] - 2025-08-27
118
121
  - Add MCP manager that handles MCP related logic
119
122
 
123
+
120
124
  ## [0.8.22] - 2025-08-25
121
125
  - Add DeepSeek-R1, DeepSeek-V3.1, Qwen3-235B-A22B and Qwen3-235B-A22B-Thinking-2507 to supported model list
122
126
 
@@ -40,7 +40,7 @@ unique_toolkit/evals/config.py,sha256=ywHIrJs5SFdKr1WXfrofWuFfzb0iPQw8iZDpq5oEug
40
40
  unique_toolkit/evals/context_relevancy/prompts.py,sha256=EdHFUOB581yVxcOL8482KUv_LzaRjuiem71EF8udYMc,1331
41
41
  unique_toolkit/evals/context_relevancy/schema.py,sha256=lm9x0jExOinUk9itqC8ZpgReC7yj1VDwEMppxlZGqpY,2923
42
42
  unique_toolkit/evals/context_relevancy/service.py,sha256=txTWIhV65QGFhxG1jCb5TTqZc_c7K9I8pi2HVDTAfm8,8384
43
- unique_toolkit/evals/evaluation_manager.py,sha256=luavQrMTrxEi7hRhpUSlGLYpEza7eQCg5Nk46ogbabw,7721
43
+ unique_toolkit/evals/evaluation_manager.py,sha256=g-8qa_6_p53C9Okx8iNkuoIXYSJrf-6sQ-xku7bo9kI,7895
44
44
  unique_toolkit/evals/exception.py,sha256=7lcVbCyoN4Md1chNJDFxpUYyWbVrcr9dcc3TxWykJTc,115
45
45
  unique_toolkit/evals/hallucination/constants.py,sha256=FLcXl5XU07jCvS8YPX9l6UjTaqyQ8YvnSKpx4Z6wZ2Y,1997
46
46
  unique_toolkit/evals/hallucination/hallucination_evaluation.py,sha256=TuZ88jeVn0tVr9d0GhWyJSxKNA16nhvr2xRPo-yK8OM,3063
@@ -112,7 +112,7 @@ unique_toolkit/tools/utils/execution/execution.py,sha256=vjG2Y6awsGNtlvyQAGCTthQ
112
112
  unique_toolkit/tools/utils/source_handling/schema.py,sha256=vzAyf6ZWNexjMO0OrnB8y2glGkvAilmGGQXd6zcDaKw,870
113
113
  unique_toolkit/tools/utils/source_handling/source_formatting.py,sha256=C7uayNbdkNVJdEARA5CENnHtNY1SU6etlaqbgHNyxaQ,9152
114
114
  unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py,sha256=oM5ZxEgzROrnX1229KViCAFjRxl9wCTzWZoinYSHleM,6979
115
- unique_toolkit-0.8.23.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
116
- unique_toolkit-0.8.23.dist-info/METADATA,sha256=e9_t3WfrtzAJirY80IWJtPgvk1shO1OIgR5wRlDw8O4,28527
117
- unique_toolkit-0.8.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
118
- unique_toolkit-0.8.23.dist-info/RECORD,,
115
+ unique_toolkit-0.8.24.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
116
+ unique_toolkit-0.8.24.dist-info/METADATA,sha256=0vvS5x-HsApDZlOrB7vnLQPal3u7Ucqxv7hZa35ebOo,28588
117
+ unique_toolkit-0.8.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
118
+ unique_toolkit-0.8.24.dist-info/RECORD,,