unique_toolkit 0.8.11__tar.gz → 0.8.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/CHANGELOG.md +12 -0
  2. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/PKG-INFO +13 -1
  3. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/pyproject.toml +1 -1
  4. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/_common/validators.py +37 -2
  5. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/service.py +15 -1
  6. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/service.py +11 -0
  7. unique_toolkit-0.8.13/unique_toolkit/evals/evaluation_manager.py +206 -0
  8. unique_toolkit-0.8.13/unique_toolkit/evals/schemas.py +100 -0
  9. unique_toolkit-0.8.13/unique_toolkit/evaluators/exception.py +5 -0
  10. unique_toolkit-0.8.13/unique_toolkit/history_manager/history_manager.py +261 -0
  11. unique_toolkit-0.8.13/unique_toolkit/history_manager/utils.py +174 -0
  12. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/service.py +11 -0
  13. unique_toolkit-0.8.13/unique_toolkit/postprocessor/postprocessor_manager.py +122 -0
  14. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/reference_manager/reference_manager.py +19 -0
  15. unique_toolkit-0.8.13/unique_toolkit/short_term_memory/persistent_short_term_memory_manager.py +140 -0
  16. unique_toolkit-0.8.13/unique_toolkit/thinking_manager/thinking_manager.py +102 -0
  17. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/schemas.py +0 -1
  18. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/tool.py +1 -1
  19. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/tool_manager.py +26 -10
  20. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/LICENSE +0 -0
  21. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/README.md +0 -0
  22. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/__init__.py +0 -0
  23. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/_common/_base_service.py +0 -0
  24. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/_common/_time_utils.py +0 -0
  25. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/_common/exception.py +0 -0
  26. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/_common/validate_required_values.py +0 -0
  27. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/__init__.py +0 -0
  28. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/dev_util.py +0 -0
  29. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/init_logging.py +0 -0
  30. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/init_sdk.py +0 -0
  31. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/performance/async_tasks.py +0 -0
  32. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/performance/async_wrapper.py +0 -0
  33. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/schemas.py +0 -0
  34. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/unique_settings.py +0 -0
  35. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/app/verification.py +0 -0
  36. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/__init__.py +0 -0
  37. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/constants.py +0 -0
  38. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/functions.py +0 -0
  39. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/schemas.py +0 -0
  40. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/service.py +0 -0
  41. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/state.py +0 -0
  42. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/chat/utils.py +0 -0
  43. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/__init__.py +0 -0
  44. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/constants.py +0 -0
  45. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/functions.py +0 -0
  46. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/schemas.py +0 -0
  47. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/content/utils.py +0 -0
  48. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/__init__.py +0 -0
  49. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/constants.py +0 -0
  50. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/functions.py +0 -0
  51. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/schemas.py +0 -0
  52. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/embedding/utils.py +0 -0
  53. {unique_toolkit-0.8.11/unique_toolkit/evaluators → unique_toolkit-0.8.13/unique_toolkit/evals}/exception.py +0 -0
  54. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/__init__.py +0 -0
  55. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/config.py +0 -0
  56. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/constants.py +0 -0
  57. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/context_relevancy/constants.py +0 -0
  58. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/context_relevancy/prompts.py +0 -0
  59. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/context_relevancy/service.py +0 -0
  60. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/context_relevancy/utils.py +0 -0
  61. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/hallucination/constants.py +0 -0
  62. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/hallucination/prompts.py +0 -0
  63. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/hallucination/service.py +0 -0
  64. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/hallucination/utils.py +0 -0
  65. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/output_parser.py +0 -0
  66. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/evaluators/schemas.py +0 -0
  67. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
  68. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
  69. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/framework_utilities/openai/client.py +0 -0
  70. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
  71. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/framework_utilities/utils.py +0 -0
  72. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/__init__.py +0 -0
  73. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/builder.py +0 -0
  74. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/constants.py +0 -0
  75. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/functions.py +0 -0
  76. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/infos.py +0 -0
  77. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/prompt.py +0 -0
  78. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/reference.py +0 -0
  79. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/schemas.py +0 -0
  80. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/language_model/utils.py +0 -0
  81. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/protocols/support.py +0 -0
  82. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/short_term_memory/__init__.py +0 -0
  83. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/short_term_memory/constants.py +0 -0
  84. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/short_term_memory/functions.py +0 -0
  85. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/short_term_memory/schemas.py +0 -0
  86. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/short_term_memory/service.py +0 -0
  87. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/smart_rules/__init__.py +0 -0
  88. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/smart_rules/compile.py +0 -0
  89. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/agent_chunks_handler.py +0 -0
  90. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/config.py +0 -0
  91. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/factory.py +0 -0
  92. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/test/test_tool_progress_reporter.py +0 -0
  93. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/tool_progress_reporter.py +0 -0
  94. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/utils/execution/execution.py +0 -0
  95. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/utils/source_handling/schema.py +0 -0
  96. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/utils/source_handling/source_formatting.py +0 -0
  97. {unique_toolkit-0.8.11 → unique_toolkit-0.8.13}/unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py +0 -0
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.8.12] - 2025-08-18
9
+ - Adding initial versions of
10
+ - Evaluation Manager
11
+ - History Manager
12
+ - Postprocessor Manager
13
+ - Thinking Manager
14
+ - Updated tool manager
15
+
16
+ ## [0.8.13] - 2025-08-18
17
+ - Fix no tool call respoonse in ChatMessage -> Open Ai messages translation
18
+ - Add simple append method to OpenAIMessageBuilder
19
+
8
20
  ## [0.8.11] - 2025-08-15
9
21
  - Fix no tool call respoonse in ChatMessage -> Open Ai messages translation
10
22
  - Add simple append method to OpenAIMessageBuilder
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.8.11
3
+ Version: 0.8.13
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -114,6 +114,18 @@ All notable changes to this project will be documented in this file.
114
114
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
115
115
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
116
116
 
117
+ ## [0.8.12] - 2025-08-18
118
+ - Adding initial versions of
119
+ - Evaluation Manager
120
+ - History Manager
121
+ - Postprocessor Manager
122
+ - Thinking Manager
123
+ - Updated tool manager
124
+
125
+ ## [0.8.13] - 2025-08-18
126
+ - Fix no tool call respoonse in ChatMessage -> Open Ai messages translation
127
+ - Add simple append method to OpenAIMessageBuilder
128
+
117
129
  ## [0.8.11] - 2025-08-15
118
130
  - Fix no tool call respoonse in ChatMessage -> Open Ai messages translation
119
131
  - Add simple append method to OpenAIMessageBuilder
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "unique_toolkit"
3
- version = "0.8.11"
3
+ version = "0.8.13"
4
4
  description = ""
5
5
  authors = [
6
6
  "Martin Fadler <martin.fadler@unique.ch>",
@@ -1,6 +1,7 @@
1
- from typing import Annotated
1
+ import logging
2
+ from typing import Annotated, Any
2
3
 
3
- from pydantic import BeforeValidator, PlainSerializer
4
+ from pydantic import BeforeValidator, Field, PlainSerializer, ValidationInfo
4
5
 
5
6
  from unique_toolkit.language_model import LanguageModelName
6
7
  from unique_toolkit.language_model.infos import (
@@ -8,6 +9,11 @@ from unique_toolkit.language_model.infos import (
8
9
  LanguageModelProvider,
9
10
  )
10
11
 
12
+ from pydantic.fields import FieldInfo
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
11
17
  # TODO @klcd: Inform on deprecation of str as input
12
18
  LMI = Annotated[
13
19
  LanguageModelInfo,
@@ -55,3 +61,32 @@ def validate_and_init_language_model_info(
55
61
  )
56
62
 
57
63
  return v
64
+
65
+
66
+ def ClipInt(*, min_value: int, max_value: int) -> tuple[BeforeValidator, FieldInfo]:
67
+ def _validator(value: Any, info: ValidationInfo) -> Any:
68
+ if not isinstance(value, int):
69
+ value = int(value)
70
+
71
+ field_name = info.field_name
72
+ if value < min_value:
73
+ logger.warning(
74
+ "Field %s is below the allowed minimum of %s. It will be set to %s.",
75
+ field_name,
76
+ min_value,
77
+ min_value,
78
+ )
79
+ return min_value
80
+
81
+ if value > max_value:
82
+ logger.warning(
83
+ "Field %s is above the allowed maximum of %s. It will be set to %s.",
84
+ field_name,
85
+ max_value,
86
+ max_value,
87
+ )
88
+ return max_value
89
+
90
+ return value
91
+
92
+ return (BeforeValidator(_validator), Field(ge=min_value, le=max_value))
@@ -8,6 +8,7 @@ from typing_extensions import deprecated
8
8
 
9
9
  from unique_toolkit._common.validate_required_values import validate_required_values
10
10
  from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
11
+ from unique_toolkit.app.unique_settings import UniqueSettings
11
12
  from unique_toolkit.content import DOMAIN_NAME
12
13
  from unique_toolkit.content.constants import DEFAULT_SEARCH_LANGUAGE
13
14
  from unique_toolkit.content.functions import (
@@ -53,7 +54,7 @@ class ContentService:
53
54
  *,
54
55
  company_id: str,
55
56
  user_id: str,
56
- chat_id: str | None,
57
+ chat_id: str | None = None,
57
58
  metadata_filter: dict | None = None,
58
59
  ): ...
59
60
 
@@ -107,6 +108,19 @@ class ContentService:
107
108
  metadata_filter=metadata_filter,
108
109
  )
109
110
 
111
+ @classmethod
112
+ def from_settings(
113
+ cls, settings: UniqueSettings, metadata_filter: dict | None = None
114
+ ):
115
+ """
116
+ Initialize the ContentService with a settings object.
117
+ """
118
+ return cls(
119
+ company_id=settings.auth.company_id.get_secret_value(),
120
+ user_id=settings.auth.user_id.get_secret_value(),
121
+ metadata_filter=metadata_filter,
122
+ )
123
+
110
124
  @property
111
125
  @deprecated(
112
126
  "The event property is deprecated and will be removed in a future version."
@@ -5,6 +5,7 @@ from typing_extensions import deprecated
5
5
  from unique_toolkit._common._base_service import BaseService
6
6
  from unique_toolkit._common.validate_required_values import validate_required_values
7
7
  from unique_toolkit.app.schemas import BaseEvent, Event
8
+ from unique_toolkit.app.unique_settings import UniqueSettings
8
9
  from unique_toolkit.embedding.constants import DEFAULT_TIMEOUT
9
10
  from unique_toolkit.embedding.functions import embed_texts, embed_texts_async
10
11
  from unique_toolkit.embedding.schemas import Embeddings
@@ -54,6 +55,16 @@ class EmbeddingService(BaseService):
54
55
  """
55
56
  return cls(company_id=event.company_id, user_id=event.user_id)
56
57
 
58
+ @classmethod
59
+ def from_settings(cls, settings: UniqueSettings):
60
+ """
61
+ Initialize the EmbeddingService with a settings object.
62
+ """
63
+ return cls(
64
+ company_id=settings.auth.company_id.get_secret_value(),
65
+ user_id=settings.auth.user_id.get_secret_value(),
66
+ )
67
+
57
68
  @property
58
69
  @deprecated(
59
70
  "The event property is deprecated and will be removed in a future version."
@@ -0,0 +1,206 @@
1
+ from abc import ABC
2
+ import asyncio
3
+
4
+
5
+ from unique_toolkit.tools.utils.execution.execution import Result, SafeTaskExecutor
6
+ from logging import Logger
7
+ from unique_toolkit.evals.schemas import (
8
+ EvaluationAssessmentMessage,
9
+ EvaluationMetricName,
10
+ EvaluationMetricResult,
11
+ )
12
+ from unique_toolkit.chat.schemas import (
13
+ ChatMessageAssessmentStatus,
14
+ ChatMessageAssessmentType,
15
+ )
16
+ from unique_toolkit.chat.service import ChatService
17
+ from unique_toolkit.language_model.schemas import (
18
+ LanguageModelStreamResponse,
19
+ )
20
+
21
+
22
+ class Evaluation(ABC):
23
+ """
24
+ Abstract base class for evaluation metrics.
25
+
26
+ typical use cases include:
27
+ - Hallucination checking
28
+ - compliance checking
29
+ """
30
+
31
+
32
+ def __init__(self, name: EvaluationMetricName):
33
+ self.name = name
34
+
35
+ def get_name(self) -> EvaluationMetricName:
36
+ return self.name
37
+
38
+ def get_assessment_type(self) -> ChatMessageAssessmentType:
39
+ raise NotImplementedError(
40
+ "Subclasses must implement this method to return the assessment type."
41
+ )
42
+
43
+ async def run(
44
+ self, loop_response: LanguageModelStreamResponse
45
+ ) -> EvaluationMetricResult:
46
+ raise NotImplementedError("Subclasses must implement this method.")
47
+
48
+ async def evaluation_metric_to_assessment(
49
+ self, evaluation_result: EvaluationMetricResult
50
+ ) -> EvaluationAssessmentMessage:
51
+ raise NotImplementedError(
52
+ "Subclasses must implement this method to convert evaluation results to assessment messages."
53
+ )
54
+
55
+
56
+ class EvaluationManager:
57
+ """
58
+ Manages the evaluation metrics and executes evaluation calls.
59
+
60
+ This class is responsible for:
61
+ - Storing and managing evaluation instances, identified by their unique names.
62
+ - Executing selected evaluations asynchronously and processing their results.
63
+ - Tracking the overall success or failure of evaluations.
64
+ - Integrating with external services like logging and chat systems to display evaluation statuses and results.
65
+ - Handling errors gracefully, including missing evaluations or failed executions.
66
+
67
+ Key Features:
68
+ - Evaluation Storage: Maintains a dictionary of evaluation instances for quick retrieval.
69
+ - Asynchronous Execution: Supports concurrent execution of multiple evaluations for efficiency.
70
+ - Result Processing: Tracks evaluation outcomes and updates the internal state based on results.
71
+ - Chat Integration: Updates the chat interface with evaluation statuses and detailed assessments.
72
+ - Error Handling: Provides robust error messages and fallback mechanisms for missing or failed evaluations.
73
+
74
+ The EvaluationManager serves as the central hub for managing and executing evaluations.
75
+ """
76
+ # a hashmap to hold evaluations by their names
77
+ _evaluations: dict[EvaluationMetricName, Evaluation] = {}
78
+ _evaluation_passed: bool = True
79
+
80
+ def __init__(
81
+ self,
82
+ logger: Logger,
83
+ chat_service: ChatService,
84
+ assistant_message_id: str,
85
+ ):
86
+ self._logger = logger
87
+ self._chat_service = chat_service
88
+ self._assistant_message_id = assistant_message_id
89
+
90
+ def add_evaluation(self, evaluation: Evaluation):
91
+ self._evaluations[evaluation.get_name()] = evaluation
92
+
93
+ def get_evaluation_by_name(self, name: EvaluationMetricName) -> Evaluation | None:
94
+ return self._evaluations.get(name)
95
+
96
+ async def run_evaluations(
97
+ self,
98
+ selected_evaluation_names: list[EvaluationMetricName],
99
+ loop_response: LanguageModelStreamResponse,
100
+ ) -> list[EvaluationMetricResult]:
101
+ task_executor = SafeTaskExecutor(
102
+ logger=self._logger,
103
+ )
104
+
105
+ tasks = [
106
+ task_executor.execute_async(
107
+ self.execute_evaluation_call,
108
+ loop_response=loop_response,
109
+ evaluation_name=evaluation_name,
110
+ )
111
+ for evaluation_name in selected_evaluation_names
112
+ ]
113
+ evaluation_results = await asyncio.gather(*tasks)
114
+ evaluation_results_unpacked: list[EvaluationMetricResult] = []
115
+
116
+ for i, result in enumerate(evaluation_results):
117
+ unpacked_evaluation_result = self._create_evaluation_metric_result(
118
+ result, selected_evaluation_names[i]
119
+ )
120
+ if not unpacked_evaluation_result.is_positive:
121
+ self._evaluation_passed = False
122
+ evaluation_results_unpacked.append(unpacked_evaluation_result)
123
+
124
+ return evaluation_results_unpacked
125
+
126
+ async def execute_evaluation_call(
127
+ self,
128
+ evaluation_name: EvaluationMetricName,
129
+ loop_response: LanguageModelStreamResponse,
130
+ ) -> EvaluationMetricResult:
131
+ self._logger.info(f"Processing tool call: {evaluation_name}")
132
+
133
+ evaluation_instance = self.get_evaluation_by_name(evaluation_name)
134
+
135
+ if evaluation_instance:
136
+ # Execute the evaluation
137
+ await self._create_assistant_message(evaluation_instance)
138
+ evaluation_metric_result: EvaluationMetricResult = (
139
+ await evaluation_instance.run(loop_response)
140
+ )
141
+ # show results to the user
142
+ await self._show_message_assessment(
143
+ evaluation_instance, evaluation_metric_result
144
+ )
145
+
146
+ return evaluation_metric_result
147
+
148
+ return EvaluationMetricResult(
149
+ name=evaluation_name,
150
+ is_positive=True,
151
+ value="RED",
152
+ reason=f"Evaluation named {evaluation_name} not found",
153
+ error=Exception("Evaluation named {evaluation_name} not found"),
154
+ )
155
+
156
+ def _create_evaluation_metric_result(
157
+ self,
158
+ result: Result[EvaluationMetricResult],
159
+ evaluation_name: EvaluationMetricName,
160
+ ) -> EvaluationMetricResult:
161
+ if not result.success:
162
+ return EvaluationMetricResult(
163
+ name=evaluation_name,
164
+ is_positive=True,
165
+ value="RED",
166
+ reason=str(result.exception),
167
+ error=Exception("Evaluation result is not successful"),
168
+ )
169
+ unpacked = result.unpack()
170
+ if not isinstance(unpacked, EvaluationMetricResult):
171
+ return EvaluationMetricResult(
172
+ name=evaluation_name,
173
+ is_positive=True,
174
+ value="RED",
175
+ reason="Evaluation result is not of type EvaluationMetricResult",
176
+ error=Exception(
177
+ "Evaluation result is not of type EvaluationMetricResult"
178
+ ),
179
+ )
180
+ return unpacked
181
+
182
+ async def _show_message_assessment(
183
+ self,
184
+ evaluation_instance: Evaluation,
185
+ evaluation_metric_result: EvaluationMetricResult,
186
+ ) -> None:
187
+ evaluation_assessment_message = (
188
+ await evaluation_instance.evaluation_metric_to_assessment(
189
+ evaluation_metric_result
190
+ )
191
+ )
192
+ await self._chat_service.modify_message_assessment_async(
193
+ assistant_message_id=self._assistant_message_id,
194
+ status=evaluation_assessment_message.status,
195
+ title=evaluation_assessment_message.title,
196
+ explanation=evaluation_assessment_message.explanation,
197
+ label=evaluation_assessment_message.label,
198
+ type=evaluation_assessment_message.type,
199
+ )
200
+
201
+ async def _create_assistant_message(self, evaluation_instance: Evaluation):
202
+ await self._chat_service.create_message_assessment_async(
203
+ assistant_message_id=self._assistant_message_id,
204
+ status=ChatMessageAssessmentStatus.PENDING,
205
+ type=evaluation_instance.get_assessment_type(),
206
+ )
@@ -0,0 +1,100 @@
1
+ from enum import StrEnum
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+ from unique_toolkit.chat import ChatMessage
6
+ from unique_toolkit.evals.exception import EvaluatorException
7
+ from unique_toolkit.chat.schemas import (
8
+ ChatMessageAssessmentLabel,
9
+ ChatMessageAssessmentStatus,
10
+ ChatMessageAssessmentType,
11
+ )
12
+
13
+
14
+ class EvaluationMetricName(StrEnum):
15
+ HALLUCINATION = "hallucination"
16
+ CONTEXT_RELEVANCY = "relevancy"
17
+
18
+
19
+ class EvaluationMetricInputFieldName(StrEnum):
20
+ INPUT_TEXT = "input_text"
21
+ CONTEXT_TEXTS = "context_texts"
22
+ HISTORY_MESSAGES = "history_messages"
23
+ OUTPUT_TEXT = "output_text"
24
+
25
+
26
+ class EvaluationMetricInput(BaseModel):
27
+ """
28
+ Input for any metric evaluation. Depending on the metric, the input can be different.
29
+ """
30
+
31
+ input_text: Optional[str] = None
32
+ context_texts: Optional[list[str]] = None
33
+ history_messages: Optional[list[ChatMessage]] = None
34
+ output_text: Optional[str] = None
35
+
36
+ def get_joined_context_texts(self, tag_name: str = "reference") -> str:
37
+ """
38
+ Concatenates context_texts.
39
+ """
40
+ if not self.context_texts:
41
+ return f"<No {tag_name} texts provided>"
42
+
43
+ return "\n".join(
44
+ [
45
+ f"<{tag_name}-{index + 1}>{text}</{tag_name}-{index + 1}>"
46
+ for index, text in enumerate(self.context_texts)
47
+ ]
48
+ )
49
+
50
+ def get_history_message_text(self, chat_message: ChatMessage):
51
+ return f"{chat_message.role.value}: {chat_message.content}"
52
+
53
+ def get_history_message_texts(self) -> list[str]:
54
+ if not self.history_messages:
55
+ return []
56
+ return [self.get_history_message_text(msg) for msg in self.history_messages]
57
+
58
+ def get_joined_history_texts(self, tag_name: str = "conversation") -> str:
59
+ """
60
+ Concatenates history message texts.
61
+ """
62
+ if not self.history_messages:
63
+ return f"<No {tag_name} texts provided>"
64
+
65
+ return "\n".join(self.get_history_message_texts())
66
+
67
+ def validate_required_fields(
68
+ self, required_fields: list[EvaluationMetricInputFieldName]
69
+ ):
70
+ """
71
+ Validates the input fields for the hallucination metric.
72
+ """
73
+ for field in required_fields:
74
+ value = getattr(self, field)
75
+ if value is None:
76
+ error_message = f"Missing required input field: {field}"
77
+ raise EvaluatorException(
78
+ user_message=error_message,
79
+ error_message=error_message,
80
+ )
81
+
82
+
83
+ class EvaluationMetricResult(BaseModel):
84
+ model_config = ConfigDict(arbitrary_types_allowed=True)
85
+
86
+ name: EvaluationMetricName
87
+ value: str
88
+ reason: str
89
+ is_positive: Optional[bool] = None
90
+ user_info: Optional[str] = None
91
+ error: Exception | None = None
92
+ fact_list: list[str] = Field(default_factory=list[str])
93
+
94
+
95
+ class EvaluationAssessmentMessage(BaseModel):
96
+ status: ChatMessageAssessmentStatus
97
+ explanation: str
98
+ title: str
99
+ label: ChatMessageAssessmentLabel
100
+ type: ChatMessageAssessmentType
@@ -0,0 +1,5 @@
1
+ from unique_toolkit._common.exception import CommonException
2
+
3
+
4
+ class EvaluatorException(CommonException):
5
+ pass