unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unique_toolkit might be problematic. Click here for more details.

Files changed (166) hide show
  1. unique_toolkit/__init__.py +28 -1
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +252 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +305 -0
  16. unique_toolkit/_common/endpoint_requestor.py +430 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/feature_flags/schema.py +9 -0
  19. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  20. unique_toolkit/_common/pydantic_helpers.py +154 -0
  21. unique_toolkit/_common/referencing.py +53 -0
  22. unique_toolkit/_common/string_utilities.py +140 -0
  23. unique_toolkit/_common/tests/test_referencing.py +521 -0
  24. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  25. unique_toolkit/_common/token/image_token_counting.py +67 -0
  26. unique_toolkit/_common/token/token_counting.py +204 -0
  27. unique_toolkit/_common/utils/__init__.py +1 -0
  28. unique_toolkit/_common/utils/files.py +43 -0
  29. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  30. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  31. unique_toolkit/_common/utils/write_configuration.py +51 -0
  32. unique_toolkit/_common/validators.py +101 -4
  33. unique_toolkit/agentic/__init__.py +1 -0
  34. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  35. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  36. unique_toolkit/agentic/evaluation/config.py +36 -0
  37. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  38. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  39. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  40. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  41. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  42. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
  43. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  44. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
  45. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
  46. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  47. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  48. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  49. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  50. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
  51. unique_toolkit/agentic/history_manager/history_manager.py +242 -0
  52. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  53. unique_toolkit/agentic/history_manager/utils.py +96 -0
  54. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  55. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  56. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  57. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
  58. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
  59. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  60. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  61. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  62. unique_toolkit/agentic/tools/__init__.py +1 -0
  63. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  64. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  65. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  66. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  67. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  68. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  69. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  70. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  71. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  72. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
  73. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
  74. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
  75. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
  76. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  77. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
  78. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  79. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  80. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  81. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  82. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  83. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  84. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  85. unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
  86. unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
  87. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  88. unique_toolkit/agentic/tools/config.py +167 -0
  89. unique_toolkit/agentic/tools/factory.py +44 -0
  90. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  91. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  92. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  93. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  94. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  95. unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
  96. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  97. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
  98. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
  99. unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
  100. unique_toolkit/agentic/tools/schemas.py +141 -0
  101. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  102. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  103. unique_toolkit/agentic/tools/tool.py +183 -0
  104. unique_toolkit/agentic/tools/tool_manager.py +523 -0
  105. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  106. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  107. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  108. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  109. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  110. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  111. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  112. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  113. unique_toolkit/app/__init__.py +6 -0
  114. unique_toolkit/app/dev_util.py +180 -0
  115. unique_toolkit/app/init_sdk.py +32 -1
  116. unique_toolkit/app/schemas.py +198 -31
  117. unique_toolkit/app/unique_settings.py +367 -0
  118. unique_toolkit/chat/__init__.py +8 -1
  119. unique_toolkit/chat/deprecated/service.py +232 -0
  120. unique_toolkit/chat/functions.py +642 -77
  121. unique_toolkit/chat/rendering.py +34 -0
  122. unique_toolkit/chat/responses_api.py +461 -0
  123. unique_toolkit/chat/schemas.py +133 -2
  124. unique_toolkit/chat/service.py +115 -767
  125. unique_toolkit/content/functions.py +153 -4
  126. unique_toolkit/content/schemas.py +122 -15
  127. unique_toolkit/content/service.py +278 -44
  128. unique_toolkit/content/smart_rules.py +301 -0
  129. unique_toolkit/content/utils.py +8 -3
  130. unique_toolkit/embedding/service.py +102 -11
  131. unique_toolkit/framework_utilities/__init__.py +1 -0
  132. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  133. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  134. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  135. unique_toolkit/framework_utilities/openai/client.py +83 -0
  136. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  137. unique_toolkit/framework_utilities/utils.py +23 -0
  138. unique_toolkit/language_model/__init__.py +3 -0
  139. unique_toolkit/language_model/builder.py +27 -11
  140. unique_toolkit/language_model/default_language_model.py +3 -0
  141. unique_toolkit/language_model/functions.py +327 -43
  142. unique_toolkit/language_model/infos.py +992 -50
  143. unique_toolkit/language_model/reference.py +242 -0
  144. unique_toolkit/language_model/schemas.py +475 -48
  145. unique_toolkit/language_model/service.py +228 -27
  146. unique_toolkit/protocols/support.py +145 -0
  147. unique_toolkit/services/__init__.py +7 -0
  148. unique_toolkit/services/chat_service.py +1630 -0
  149. unique_toolkit/services/knowledge_base.py +861 -0
  150. unique_toolkit/short_term_memory/service.py +178 -41
  151. unique_toolkit/smart_rules/__init__.py +0 -0
  152. unique_toolkit/smart_rules/compile.py +56 -0
  153. unique_toolkit/test_utilities/events.py +197 -0
  154. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
  155. unique_toolkit-1.23.0.dist-info/RECORD +182 -0
  156. unique_toolkit/evaluators/__init__.py +0 -1
  157. unique_toolkit/evaluators/config.py +0 -35
  158. unique_toolkit/evaluators/constants.py +0 -1
  159. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  160. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  161. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  162. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  163. unique_toolkit-0.7.7.dist-info/RECORD +0 -64
  164. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  165. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
  166. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,141 @@
1
+ import base64
2
+ import zlib
3
+ from logging import getLogger
4
+ from typing import Generic, Type, TypeVar
5
+
6
+ from pydantic import BaseModel
7
+
8
+ from unique_toolkit.agentic.tools.utils.execution.execution import SafeTaskExecutor
9
+ from unique_toolkit.short_term_memory.schemas import ShortTermMemory
10
+ from unique_toolkit.short_term_memory.service import ShortTermMemoryService
11
+
12
+ TSchema = TypeVar("TSchema", bound=BaseModel)
13
+
14
+
15
+ logger = getLogger(__name__)
16
+
17
+
18
+ def _default_short_term_memory_name(schema: type[BaseModel]) -> str:
19
+ return f"{schema.__name__}Key"
20
+
21
+
22
+ def _compress_data_zlib_base64(data: str) -> str:
23
+ """Compress data using ZLIB and encode as base64 string."""
24
+ compressed = zlib.compress(data.encode("utf-8"))
25
+ return base64.b64encode(compressed).decode("utf-8")
26
+
27
+
28
+ def _decompress_data_zlib_base64(compressed_data: str) -> str:
29
+ """Decompress base64 encoded ZLIB data."""
30
+ decoded = base64.b64decode(compressed_data.encode("utf-8"))
31
+ return zlib.decompress(decoded).decode("utf-8")
32
+
33
+
34
+ class PersistentShortMemoryManager(Generic[TSchema]):
35
+ """
36
+ Manages the storage, retrieval, and processing of short-term memory in a persistent manner.
37
+
38
+ This class is responsible for:
39
+ - Saving and loading short-term memory data, both synchronously and asynchronously.
40
+ - Compressing and decompressing memory data for efficient storage.
41
+ - Validating and processing memory data using a predefined schema.
42
+ - Logging the status of memory operations, such as whether memory was found or saved.
43
+
44
+ Key Features:
45
+ - Persistent Storage: Integrates with a short-term memory service to store and retrieve memory data.
46
+ - Compression Support: Compresses memory data before saving and decompresses it upon retrieval.
47
+ - Schema Validation: Ensures memory data adheres to a specified schema for consistency.
48
+ - Synchronous and Asynchronous Operations: Supports both sync and async methods for flexibility.
49
+ - Logging and Debugging: Provides detailed logs for memory operations, including success and failure cases.
50
+
51
+ The PersistentShortMemoryManager is designed to handle short-term memory efficiently, ensuring data integrity and optimized storage.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ short_term_memory_service: ShortTermMemoryService,
57
+ short_term_memory_schema: Type[TSchema],
58
+ short_term_memory_name: str | None = None,
59
+ ) -> None:
60
+ self._short_term_memory_name = (
61
+ short_term_memory_name
62
+ if short_term_memory_name
63
+ else _default_short_term_memory_name(short_term_memory_schema)
64
+ )
65
+ self._short_term_memory_schema = short_term_memory_schema
66
+ self._short_term_memory_service = short_term_memory_service
67
+
68
+ self._executor = SafeTaskExecutor(
69
+ log_exceptions=False,
70
+ )
71
+
72
+ def _log_not_found(self) -> None:
73
+ logger.warning(
74
+ f"No short term memory found for chat {self._short_term_memory_service.chat_id} and key {self._short_term_memory_name}"
75
+ )
76
+
77
+ def _log_found(self) -> None:
78
+ logger.debug(
79
+ f"Short term memory found for chat {self._short_term_memory_service.chat_id} and key {self._short_term_memory_name}"
80
+ )
81
+
82
+ def _find_latest_memory_sync(self) -> ShortTermMemory | None:
83
+ result = self._executor.execute(
84
+ self._short_term_memory_service.find_latest_memory,
85
+ self._short_term_memory_name,
86
+ )
87
+
88
+ self._log_not_found() if not result.success else self._log_found()
89
+
90
+ return result.unpack(default=None)
91
+
92
+ async def _find_latest_memory_async(self) -> ShortTermMemory | None:
93
+ result = await self._executor.execute_async(
94
+ self._short_term_memory_service.find_latest_memory_async,
95
+ self._short_term_memory_name,
96
+ )
97
+
98
+ self._log_not_found() if not result.success else self._log_found()
99
+
100
+ return result.unpack(default=None)
101
+
102
+ def save_sync(self, short_term_memory: TSchema) -> None:
103
+ json_data = short_term_memory.model_dump_json()
104
+ compressed_data = _compress_data_zlib_base64(json_data)
105
+ logger.info(
106
+ f"Saving memory with {len(compressed_data)} characters compressed from {len(json_data)} characters for memory {self._short_term_memory_name}"
107
+ )
108
+ self._short_term_memory_service.create_memory(
109
+ key=self._short_term_memory_name,
110
+ value=compressed_data,
111
+ )
112
+
113
+ async def save_async(self, short_term_memory: TSchema) -> None:
114
+ json_data = short_term_memory.model_dump_json()
115
+ compressed_data = _compress_data_zlib_base64(json_data)
116
+ logger.info(
117
+ f"Saving memory with {len(compressed_data)} characters compressed from {len(json_data)} characters for memory {self._short_term_memory_name}"
118
+ )
119
+ await self._short_term_memory_service.create_memory_async(
120
+ key=self._short_term_memory_name,
121
+ value=compressed_data,
122
+ )
123
+
124
+ def _process_compressed_memory(
125
+ self, memory: ShortTermMemory | None
126
+ ) -> TSchema | None:
127
+ if memory is not None and memory.data is not None:
128
+ if isinstance(memory.data, str):
129
+ data = _decompress_data_zlib_base64(memory.data)
130
+ return self._short_term_memory_schema.model_validate_json(data)
131
+ elif isinstance(memory.data, dict):
132
+ return self._short_term_memory_schema.model_validate(memory.data)
133
+ return None
134
+
135
+ def load_sync(self) -> TSchema | None:
136
+ memory: ShortTermMemory | None = self._find_latest_memory_sync()
137
+ return self._process_compressed_memory(memory)
138
+
139
+ async def load_async(self) -> TSchema | None:
140
+ memory: ShortTermMemory | None = await self._find_latest_memory_async()
141
+ return self._process_compressed_memory(memory)
@@ -0,0 +1,103 @@
1
+ from logging import Logger
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_toolkit.agentic.tools.tool_progress_reporter import (
6
+ ToolProgressReporter,
7
+ )
8
+ from unique_toolkit.chat.service import ChatService
9
+ from unique_toolkit.language_model.schemas import (
10
+ LanguageModelStreamResponse,
11
+ )
12
+
13
+
14
+ class ThinkingManagerConfig(BaseModel):
15
+ thinking_steps_display: bool = Field(
16
+ default=True, description="Whether to display thinking steps in the chat."
17
+ )
18
+
19
+
20
+ class ThinkingManager:
21
+ """
22
+ Manages the display and tracking of thinking steps during response generation.
23
+
24
+ This class is responsible for:
25
+ - Tracking and formatting thinking steps as part of the response process.
26
+ - Updating the tool progress reporter with the latest thinking step information.
27
+ - Managing the display of thinking steps in the assistant's response.
28
+ - Closing and finalizing the thinking steps section when the process is complete.
29
+
30
+ Key Features:
31
+ - Thinking Step Tracking: Maintains a sequential log of thinking steps with step numbers.
32
+ - Configurable Display: Supports enabling or disabling the display of thinking steps based on configuration.
33
+ - Integration with Tool Progress: Updates the tool progress reporter to reflect the current thinking state.
34
+ - Dynamic Response Updates: Modifies the assistant's response to include or finalize thinking steps.
35
+ - Flexible Formatting: Formats thinking steps in a structured and user-friendly HTML-like format.
36
+
37
+ The ThinkingManager enhances transparency and user understanding by providing a clear view of the assistant's reasoning process.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ logger: Logger,
43
+ config: ThinkingManagerConfig,
44
+ tool_progress_reporter: ToolProgressReporter,
45
+ chat_service: ChatService,
46
+ ):
47
+ self._chat_service = chat_service
48
+ self._config = config
49
+ self._thinking_steps = ""
50
+ self._thinking_step_number = 1
51
+ self._tool_progress_reporter = tool_progress_reporter
52
+
53
+ def thinking_is_displayed(self) -> bool:
54
+ return self._config.thinking_steps_display
55
+
56
+ def update_tool_progress_reporter(self, loop_response: LanguageModelStreamResponse):
57
+ if self._config.thinking_steps_display and (
58
+ not loop_response.message.text
59
+ == self._tool_progress_reporter._progress_start_text
60
+ ):
61
+ self._tool_progress_reporter.tool_statuses = {}
62
+ self._tool_progress_reporter._progress_start_text = (
63
+ loop_response.message.text
64
+ )
65
+
66
+ def update_start_text(
67
+ self, start_text: str, loop_response: LanguageModelStreamResponse
68
+ ) -> str:
69
+ if not self._config.thinking_steps_display:
70
+ return start_text
71
+ if not loop_response.message.original_text:
72
+ return start_text
73
+ if loop_response.message.original_text == "":
74
+ return start_text
75
+
76
+ update_message = loop_response.message.original_text
77
+
78
+ if start_text == "":
79
+ self._thinking_steps = f"\n<i><b>Step 1:</b>\n{update_message}</i>\n"
80
+ start_text = f"""<details open>\n<summary><b>Thinking steps</b></summary>\n{self._thinking_steps}\n</details>\n\n---\n\n"""
81
+ else:
82
+ self._thinking_steps += f"\n\n<i><b>Step {self._thinking_step_number}:</b>\n{update_message}</i>\n\n"
83
+ start_text = f"""<details open>\n<summary><b>Thinking steps</b></summary>\n<i>{self._thinking_steps}\n\n</i>\n</details>\n\n---\n\n"""
84
+
85
+ self._thinking_step_number += 1
86
+ return start_text
87
+
88
+ def close_thinking_steps(self, loop_response: LanguageModelStreamResponse):
89
+ if not self._config.thinking_steps_display:
90
+ return
91
+ if not self._thinking_steps:
92
+ return
93
+ if not loop_response.message.text:
94
+ return
95
+ if not loop_response.message.text.startswith("<details open>"):
96
+ return
97
+
98
+ loop_response.message.text = loop_response.message.text.replace(
99
+ "<details open>", "<details>"
100
+ )
101
+
102
+ self._chat_service.modify_assistant_message(content=loop_response.message.text)
103
+ return
@@ -0,0 +1 @@
1
+ """Tools module for the Unique Toolkit."""
@@ -0,0 +1,36 @@
1
+ from unique_toolkit.agentic.tools.a2a.config import ExtendedSubAgentToolConfig
2
+ from unique_toolkit.agentic.tools.a2a.evaluation import (
3
+ SubAgentEvaluationService,
4
+ SubAgentEvaluationServiceConfig,
5
+ SubAgentEvaluationSpec,
6
+ )
7
+ from unique_toolkit.agentic.tools.a2a.manager import A2AManager
8
+ from unique_toolkit.agentic.tools.a2a.postprocessing import (
9
+ SubAgentDisplaySpec,
10
+ SubAgentReferencesPostprocessor,
11
+ SubAgentResponsesDisplayPostprocessor,
12
+ SubAgentResponsesPostprocessorConfig,
13
+ )
14
+ from unique_toolkit.agentic.tools.a2a.prompts import (
15
+ REFERENCING_INSTRUCTIONS_FOR_SYSTEM_PROMPT,
16
+ REFERENCING_INSTRUCTIONS_FOR_USER_PROMPT,
17
+ )
18
+ from unique_toolkit.agentic.tools.a2a.response_watcher import SubAgentResponseWatcher
19
+ from unique_toolkit.agentic.tools.a2a.tool import SubAgentTool, SubAgentToolConfig
20
+
21
+ __all__ = [
22
+ "SubAgentToolConfig",
23
+ "SubAgentTool",
24
+ "SubAgentResponsesDisplayPostprocessor",
25
+ "SubAgentResponsesPostprocessorConfig",
26
+ "SubAgentDisplaySpec",
27
+ "A2AManager",
28
+ "ExtendedSubAgentToolConfig",
29
+ "SubAgentEvaluationServiceConfig",
30
+ "SubAgentEvaluationService",
31
+ "REFERENCING_INSTRUCTIONS_FOR_SYSTEM_PROMPT",
32
+ "REFERENCING_INSTRUCTIONS_FOR_USER_PROMPT",
33
+ "SubAgentResponseWatcher",
34
+ "SubAgentReferencesPostprocessor",
35
+ "SubAgentEvaluationSpec",
36
+ ]
@@ -0,0 +1,17 @@
1
+ from pydantic import Field
2
+
3
+ from unique_toolkit.agentic.tools.a2a.evaluation import SubAgentEvaluationConfig
4
+ from unique_toolkit.agentic.tools.a2a.postprocessing import SubAgentDisplayConfig
5
+ from unique_toolkit.agentic.tools.a2a.tool import SubAgentToolConfig
6
+
7
+
8
+ # SubAgentToolConfig with display and evaluation configs
9
+ class ExtendedSubAgentToolConfig(SubAgentToolConfig):
10
+ response_display_config: SubAgentDisplayConfig = Field(
11
+ default_factory=SubAgentDisplayConfig,
12
+ description="Configuration for how to display the sub-agent response.",
13
+ )
14
+ evaluation_config: SubAgentEvaluationConfig = Field(
15
+ default_factory=SubAgentEvaluationConfig,
16
+ description="Configuration for handling assessments of the sub-agent response.",
17
+ )
@@ -0,0 +1,15 @@
1
+ from unique_toolkit.agentic.tools.a2a.evaluation.config import (
2
+ SubAgentEvaluationConfig,
3
+ SubAgentEvaluationServiceConfig,
4
+ )
5
+ from unique_toolkit.agentic.tools.a2a.evaluation.evaluator import (
6
+ SubAgentEvaluationService,
7
+ SubAgentEvaluationSpec,
8
+ )
9
+
10
+ __all__ = [
11
+ "SubAgentEvaluationService",
12
+ "SubAgentEvaluationServiceConfig",
13
+ "SubAgentEvaluationConfig",
14
+ "SubAgentEvaluationSpec",
15
+ ]
@@ -0,0 +1,66 @@
1
+ import logging
2
+
3
+ import unique_sdk
4
+
5
+ from unique_toolkit.chat.schemas import (
6
+ ChatMessageAssessmentLabel,
7
+ ChatMessageAssessmentStatus,
8
+ )
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ _ASSESSMENT_LABEL_COMPARISON_DICT: dict[str, int] = {
13
+ ChatMessageAssessmentLabel.RED: 0,
14
+ ChatMessageAssessmentLabel.YELLOW: 1,
15
+ ChatMessageAssessmentLabel.GREEN: 2,
16
+ }
17
+
18
+
19
+ def sort_assessments(
20
+ assessments: list[unique_sdk.Space.Assessment],
21
+ ) -> list[unique_sdk.Space.Assessment]:
22
+ return sorted(
23
+ assessments,
24
+ key=lambda x: _ASSESSMENT_LABEL_COMPARISON_DICT[x["label"]], # type: ignore (should be checked before sorting)
25
+ )
26
+
27
+
28
+ def get_worst_label(
29
+ *labels: str,
30
+ ) -> str:
31
+ return min(
32
+ labels,
33
+ key=lambda x: _ASSESSMENT_LABEL_COMPARISON_DICT[x],
34
+ )
35
+
36
+
37
+ def get_valid_assessments(
38
+ assessments: list[unique_sdk.Space.Assessment],
39
+ display_name: str,
40
+ sequence_number: int,
41
+ ) -> list[unique_sdk.Space.Assessment]:
42
+ valid_assessments = []
43
+ for assessment in assessments:
44
+ if (
45
+ assessment["label"] is None
46
+ or assessment["label"] not in ChatMessageAssessmentLabel
47
+ ):
48
+ logger.warning(
49
+ "Unkown assistant label %s for assistant %s (sequence number: %s) will be ignored",
50
+ assessment["label"],
51
+ display_name,
52
+ sequence_number,
53
+ )
54
+ continue
55
+ if assessment["status"] != ChatMessageAssessmentStatus.DONE:
56
+ logger.warning(
57
+ "Assessment %s for assistant %s (sequence number: %s) is not done (status: %s) will be ignored",
58
+ assessment["label"],
59
+ display_name,
60
+ sequence_number,
61
+ assessment["status"],
62
+ )
63
+ continue
64
+ valid_assessments.append(assessment)
65
+
66
+ return valid_assessments
@@ -0,0 +1,55 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic import AliasChoices, BaseModel, Field
4
+
5
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
6
+ from unique_toolkit._common.validators import LMI, get_LMI_default_field
7
+ from unique_toolkit.chat.schemas import (
8
+ ChatMessageAssessmentType,
9
+ )
10
+ from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
11
+
12
+ DEFAULT_EVALUATION_SYSTEM_MESSAGE_TEMPLATE = """
13
+ You are a through and precise summarization model.
14
+ You will receive a list of "assessments" of one or more agent(s) response(s).
15
+ Your task is to give a brief summary (1-10 sentences) of the received assessments, following the following guidelines:
16
+ 1. You must NOT in ANY case state a fact that is not stated in the given assessments.
17
+ 2. You must focus first and foremost on the failing assessments, labeled `RED` below.
18
+ 3. You must mention each agent's name when summarizing its list of assessments.
19
+ 4. You must NOT use any markdown formatting in your response as this will FAIL to render in the chat frontend.
20
+ """.strip()
21
+
22
+ with open(Path(__file__).parent / "summarization_user_message.j2", "r") as file:
23
+ DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE = file.read().strip()
24
+
25
+
26
+ class SubAgentEvaluationServiceConfig(BaseModel):
27
+ model_config = get_configuration_dict()
28
+
29
+ assessment_type: ChatMessageAssessmentType = Field(
30
+ default=ChatMessageAssessmentType.COMPLIANCE,
31
+ description="The type of assessment to use in the display.",
32
+ )
33
+ summarization_model: LMI = get_LMI_default_field(DEFAULT_GPT_4o)
34
+ summarization_system_message: str = Field(
35
+ default=DEFAULT_EVALUATION_SYSTEM_MESSAGE_TEMPLATE,
36
+ description="The system message template for the summarization model.",
37
+ )
38
+ summarization_user_message_template: str = Field(
39
+ default=DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE,
40
+ description="The user message template for the summarization model.",
41
+ )
42
+
43
+
44
+ class SubAgentEvaluationConfig(BaseModel):
45
+ model_config = get_configuration_dict()
46
+
47
+ include_evaluation: bool = Field(
48
+ default=True,
49
+ description="Whether to include the evaluation in the response.",
50
+ validation_alias=AliasChoices(
51
+ "includeEvaluation",
52
+ "displayEvalution", # typo in old config name
53
+ "display_evalution",
54
+ ),
55
+ )