unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unique_toolkit might be problematic. Click here for more details.

Files changed (166) hide show
  1. unique_toolkit/__init__.py +28 -1
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +252 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +305 -0
  16. unique_toolkit/_common/endpoint_requestor.py +430 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/feature_flags/schema.py +9 -0
  19. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  20. unique_toolkit/_common/pydantic_helpers.py +154 -0
  21. unique_toolkit/_common/referencing.py +53 -0
  22. unique_toolkit/_common/string_utilities.py +140 -0
  23. unique_toolkit/_common/tests/test_referencing.py +521 -0
  24. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  25. unique_toolkit/_common/token/image_token_counting.py +67 -0
  26. unique_toolkit/_common/token/token_counting.py +204 -0
  27. unique_toolkit/_common/utils/__init__.py +1 -0
  28. unique_toolkit/_common/utils/files.py +43 -0
  29. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  30. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  31. unique_toolkit/_common/utils/write_configuration.py +51 -0
  32. unique_toolkit/_common/validators.py +101 -4
  33. unique_toolkit/agentic/__init__.py +1 -0
  34. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  35. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  36. unique_toolkit/agentic/evaluation/config.py +36 -0
  37. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  38. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  39. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  40. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  41. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  42. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
  43. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  44. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
  45. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
  46. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  47. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  48. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  49. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  50. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
  51. unique_toolkit/agentic/history_manager/history_manager.py +242 -0
  52. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  53. unique_toolkit/agentic/history_manager/utils.py +96 -0
  54. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  55. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  56. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  57. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
  58. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
  59. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  60. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  61. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  62. unique_toolkit/agentic/tools/__init__.py +1 -0
  63. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  64. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  65. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  66. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  67. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  68. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  69. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  70. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  71. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  72. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
  73. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
  74. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
  75. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
  76. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  77. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
  78. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  79. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  80. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  81. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  82. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  83. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  84. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  85. unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
  86. unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
  87. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  88. unique_toolkit/agentic/tools/config.py +167 -0
  89. unique_toolkit/agentic/tools/factory.py +44 -0
  90. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  91. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  92. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  93. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  94. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  95. unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
  96. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  97. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
  98. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
  99. unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
  100. unique_toolkit/agentic/tools/schemas.py +141 -0
  101. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  102. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  103. unique_toolkit/agentic/tools/tool.py +183 -0
  104. unique_toolkit/agentic/tools/tool_manager.py +523 -0
  105. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  106. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  107. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  108. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  109. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  110. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  111. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  112. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  113. unique_toolkit/app/__init__.py +6 -0
  114. unique_toolkit/app/dev_util.py +180 -0
  115. unique_toolkit/app/init_sdk.py +32 -1
  116. unique_toolkit/app/schemas.py +198 -31
  117. unique_toolkit/app/unique_settings.py +367 -0
  118. unique_toolkit/chat/__init__.py +8 -1
  119. unique_toolkit/chat/deprecated/service.py +232 -0
  120. unique_toolkit/chat/functions.py +642 -77
  121. unique_toolkit/chat/rendering.py +34 -0
  122. unique_toolkit/chat/responses_api.py +461 -0
  123. unique_toolkit/chat/schemas.py +133 -2
  124. unique_toolkit/chat/service.py +115 -767
  125. unique_toolkit/content/functions.py +153 -4
  126. unique_toolkit/content/schemas.py +122 -15
  127. unique_toolkit/content/service.py +278 -44
  128. unique_toolkit/content/smart_rules.py +301 -0
  129. unique_toolkit/content/utils.py +8 -3
  130. unique_toolkit/embedding/service.py +102 -11
  131. unique_toolkit/framework_utilities/__init__.py +1 -0
  132. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  133. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  134. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  135. unique_toolkit/framework_utilities/openai/client.py +83 -0
  136. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  137. unique_toolkit/framework_utilities/utils.py +23 -0
  138. unique_toolkit/language_model/__init__.py +3 -0
  139. unique_toolkit/language_model/builder.py +27 -11
  140. unique_toolkit/language_model/default_language_model.py +3 -0
  141. unique_toolkit/language_model/functions.py +327 -43
  142. unique_toolkit/language_model/infos.py +992 -50
  143. unique_toolkit/language_model/reference.py +242 -0
  144. unique_toolkit/language_model/schemas.py +475 -48
  145. unique_toolkit/language_model/service.py +228 -27
  146. unique_toolkit/protocols/support.py +145 -0
  147. unique_toolkit/services/__init__.py +7 -0
  148. unique_toolkit/services/chat_service.py +1630 -0
  149. unique_toolkit/services/knowledge_base.py +861 -0
  150. unique_toolkit/short_term_memory/service.py +178 -41
  151. unique_toolkit/smart_rules/__init__.py +0 -0
  152. unique_toolkit/smart_rules/compile.py +56 -0
  153. unique_toolkit/test_utilities/events.py +197 -0
  154. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
  155. unique_toolkit-1.23.0.dist-info/RECORD +182 -0
  156. unique_toolkit/evaluators/__init__.py +0 -1
  157. unique_toolkit/evaluators/config.py +0 -35
  158. unique_toolkit/evaluators/constants.py +0 -1
  159. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  160. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  161. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  162. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  163. unique_toolkit-0.7.7.dist-info/RECORD +0 -64
  164. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  165. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
  166. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,278 @@
1
+ """
2
+ Test suite for DebugInfoManager class.
3
+
4
+ This test suite validates the DebugInfoManager's ability to:
5
+ 1. Initialize with empty debug info
6
+ 2. Extract tool debug info from ToolCallResponse objects
7
+ 3. Handle loop iteration indices
8
+ 4. Add arbitrary key-value pairs to debug info
9
+ 5. Retrieve the complete debug info dictionary
10
+ """
11
+
12
+ from unique_toolkit.agentic.debug_info_manager.debug_info_manager import (
13
+ DebugInfoManager,
14
+ )
15
+ from unique_toolkit.agentic.tools.schemas import ToolCallResponse
16
+
17
+
18
+ class TestDebugInfoManager:
19
+ """Test suite for DebugInfoManager functionality."""
20
+
21
+ def test_init__initializes_empty_debug_info__on_creation(self):
22
+ """Test that DebugInfoManager initializes with empty tools list."""
23
+ manager = DebugInfoManager()
24
+
25
+ assert manager.debug_info == {"tools": []}
26
+ assert manager.get() == {"tools": []}
27
+
28
+ def test_extract_tool_debug_info__adds_single_tool__with_valid_response(self):
29
+ """Test extracting debug info from a single ToolCallResponse."""
30
+ manager = DebugInfoManager()
31
+ tool_call_response = ToolCallResponse(
32
+ id="tool_1",
33
+ name="TestTool",
34
+ debug_info={"execution_time": "100ms", "status": "success"},
35
+ )
36
+
37
+ manager.extract_tool_debug_info([tool_call_response])
38
+
39
+ debug_info = manager.get()
40
+ assert len(debug_info["tools"]) == 1
41
+ assert debug_info["tools"][0]["name"] == "TestTool"
42
+ assert debug_info["tools"][0]["info"]["execution_time"] == "100ms"
43
+ assert debug_info["tools"][0]["info"]["status"] == "success"
44
+
45
+ def test_extract_tool_debug_info__adds_multiple_tools__with_multiple_responses(
46
+ self,
47
+ ):
48
+ """Test extracting debug info from multiple ToolCallResponse objects."""
49
+ manager = DebugInfoManager()
50
+ tool_call_responses = [
51
+ ToolCallResponse(
52
+ id="tool_1",
53
+ name="SearchTool",
54
+ debug_info={"query": "test query", "results": 5},
55
+ ),
56
+ ToolCallResponse(
57
+ id="tool_2",
58
+ name="CalculatorTool",
59
+ debug_info={"operation": "add", "result": 42},
60
+ ),
61
+ ToolCallResponse(
62
+ id="tool_3",
63
+ name="WeatherTool",
64
+ debug_info={"location": "New York", "temperature": "72F"},
65
+ ),
66
+ ]
67
+
68
+ manager.extract_tool_debug_info(tool_call_responses)
69
+
70
+ debug_info = manager.get()
71
+ assert len(debug_info["tools"]) == 3
72
+ assert debug_info["tools"][0]["name"] == "SearchTool"
73
+ assert debug_info["tools"][1]["name"] == "CalculatorTool"
74
+ assert debug_info["tools"][2]["name"] == "WeatherTool"
75
+
76
+ def test_extract_tool_debug_info__preserves_order__with_sequential_calls(self):
77
+ """Test that multiple calls to extract_tool_debug_info preserve order."""
78
+ manager = DebugInfoManager()
79
+
80
+ # First call
81
+ manager.extract_tool_debug_info(
82
+ [ToolCallResponse(id="tool_1", name="Tool1", debug_info={"step": 1})]
83
+ )
84
+
85
+ # Second call
86
+ manager.extract_tool_debug_info(
87
+ [ToolCallResponse(id="tool_2", name="Tool2", debug_info={"step": 2})]
88
+ )
89
+
90
+ # Third call
91
+ manager.extract_tool_debug_info(
92
+ [ToolCallResponse(id="tool_3", name="Tool3", debug_info={"step": 3})]
93
+ )
94
+
95
+ debug_info = manager.get()
96
+ assert len(debug_info["tools"]) == 3
97
+ assert debug_info["tools"][0]["info"]["step"] == 1
98
+ assert debug_info["tools"][1]["info"]["step"] == 2
99
+ assert debug_info["tools"][2]["info"]["step"] == 3
100
+
101
+ def test_extract_tool_debug_info__adds_loop_iteration__when_index_provided(self):
102
+ """Test that loop_iteration_index is added to debug info when provided."""
103
+ manager = DebugInfoManager()
104
+ tool_call_response = ToolCallResponse(
105
+ id="tool_1", name="IterativeTool", debug_info={"status": "processing"}
106
+ )
107
+
108
+ manager.extract_tool_debug_info([tool_call_response], loop_iteration_index=3)
109
+
110
+ debug_info = manager.get()
111
+ assert debug_info["tools"][0]["info"]["loop_iteration"] == 3
112
+ assert debug_info["tools"][0]["info"]["status"] == "processing"
113
+
114
+ def test_extract_tool_debug_info__omits_loop_iteration__when_index_is_none(self):
115
+ """Test that loop_iteration is not added when index is None."""
116
+ manager = DebugInfoManager()
117
+ tool_call_response = ToolCallResponse(
118
+ id="tool_1", name="SingleRunTool", debug_info={"status": "complete"}
119
+ )
120
+
121
+ manager.extract_tool_debug_info([tool_call_response], loop_iteration_index=None)
122
+
123
+ debug_info = manager.get()
124
+ assert "loop_iteration" not in debug_info["tools"][0]["info"]
125
+ assert debug_info["tools"][0]["info"]["status"] == "complete"
126
+
127
+ def test_extract_tool_debug_info__handles_empty_debug_info__gracefully(self):
128
+ """Test extracting from ToolCallResponse with empty debug_info dict."""
129
+ manager = DebugInfoManager()
130
+ tool_call_response = ToolCallResponse(
131
+ id="tool_1", name="MinimalTool", debug_info={}
132
+ )
133
+
134
+ manager.extract_tool_debug_info([tool_call_response])
135
+
136
+ debug_info = manager.get()
137
+ assert len(debug_info["tools"]) == 1
138
+ assert debug_info["tools"][0]["name"] == "MinimalTool"
139
+ assert debug_info["tools"][0]["info"] == {}
140
+
141
+ def test_extract_tool_debug_info__handles_empty_list__without_error(self):
142
+ """Test that passing an empty list doesn't cause errors."""
143
+ manager = DebugInfoManager()
144
+
145
+ manager.extract_tool_debug_info([])
146
+
147
+ debug_info = manager.get()
148
+ assert debug_info["tools"] == []
149
+
150
+ def test_add__adds_new_key_value_pair__to_debug_info(self):
151
+ """Test adding a new key-value pair to debug_info."""
152
+ manager = DebugInfoManager()
153
+
154
+ manager.add("execution_summary", {"total_time": "500ms", "total_calls": 5})
155
+
156
+ debug_info = manager.get()
157
+ assert "execution_summary" in debug_info
158
+ assert debug_info["execution_summary"]["total_time"] == "500ms"
159
+ assert debug_info["execution_summary"]["total_calls"] == 5
160
+
161
+ def test_add__preserves_tools_list__when_adding_new_keys(self):
162
+ """Test that add() preserves the tools list."""
163
+ manager = DebugInfoManager()
164
+ manager.extract_tool_debug_info(
165
+ [
166
+ ToolCallResponse(
167
+ id="tool_1", name="TestTool", debug_info={"test": "data"}
168
+ )
169
+ ]
170
+ )
171
+
172
+ manager.add("metadata", {"version": "1.0"})
173
+
174
+ debug_info = manager.get()
175
+ assert len(debug_info["tools"]) == 1
176
+ assert debug_info["tools"][0]["name"] == "TestTool"
177
+ assert debug_info["metadata"]["version"] == "1.0"
178
+
179
+ def test_add__overwrites_existing_key__when_key_exists(self):
180
+ """Test that add() overwrites an existing key."""
181
+ manager = DebugInfoManager()
182
+ manager.add("status", "in_progress")
183
+ manager.add("status", "completed")
184
+
185
+ debug_info = manager.get()
186
+ assert debug_info["status"] == "completed"
187
+
188
+ def test_add__adds_multiple_keys__with_sequential_calls(self):
189
+ """Test adding multiple key-value pairs with sequential calls."""
190
+ manager = DebugInfoManager()
191
+
192
+ manager.add("key1", "value1")
193
+ manager.add("key2", {"nested": "value2"})
194
+ manager.add("key3", [1, 2, 3])
195
+
196
+ debug_info = manager.get()
197
+ assert debug_info["key1"] == "value1"
198
+ assert debug_info["key2"]["nested"] == "value2"
199
+ assert debug_info["key3"] == [1, 2, 3]
200
+
201
+ def test_get__returns_complete_debug_info__with_mixed_data(self):
202
+ """Test get() returns complete debug info with tools and custom keys."""
203
+ manager = DebugInfoManager()
204
+
205
+ # Add tool debug info
206
+ manager.extract_tool_debug_info(
207
+ [ToolCallResponse(id="tool_1", name="Tool1", debug_info={"data": "test"})],
208
+ loop_iteration_index=0,
209
+ )
210
+
211
+ # Add custom keys
212
+ manager.add("start_time", "2025-10-16T10:00:00")
213
+ manager.add("end_time", "2025-10-16T10:01:00")
214
+
215
+ debug_info = manager.get()
216
+
217
+ assert "tools" in debug_info
218
+ assert "start_time" in debug_info
219
+ assert "end_time" in debug_info
220
+ assert len(debug_info["tools"]) == 1
221
+ assert debug_info["start_time"] == "2025-10-16T10:00:00"
222
+
223
+ def test_integration__complete_workflow__with_all_operations(self):
224
+ """Integration test: complete workflow using all DebugInfoManager methods."""
225
+ manager = DebugInfoManager()
226
+
227
+ # Initial state
228
+ assert manager.get() == {"tools": []}
229
+
230
+ # Add some metadata
231
+ manager.add("session_id", "abc-123")
232
+ manager.add("user_id", "user-456")
233
+
234
+ # First tool call (loop iteration 0)
235
+ manager.extract_tool_debug_info(
236
+ [
237
+ ToolCallResponse(
238
+ id="tool_1",
239
+ name="SearchTool",
240
+ debug_info={"query": "AI research", "hits": 100},
241
+ )
242
+ ],
243
+ loop_iteration_index=0,
244
+ )
245
+
246
+ # Second tool call (loop iteration 1)
247
+ manager.extract_tool_debug_info(
248
+ [
249
+ ToolCallResponse(
250
+ id="tool_2",
251
+ name="AnalysisTool",
252
+ debug_info={"processed": 50, "relevant": 10},
253
+ ),
254
+ ToolCallResponse(
255
+ id="tool_3",
256
+ name="SummaryTool",
257
+ debug_info={"paragraphs": 3, "words": 250},
258
+ ),
259
+ ],
260
+ loop_iteration_index=1,
261
+ )
262
+
263
+ # Add final summary
264
+ manager.add("summary", {"total_tools": 3, "total_iterations": 2})
265
+
266
+ # Verify complete debug info
267
+ debug_info = manager.get()
268
+
269
+ assert debug_info["session_id"] == "abc-123"
270
+ assert debug_info["user_id"] == "user-456"
271
+ assert len(debug_info["tools"]) == 3
272
+ assert debug_info["tools"][0]["name"] == "SearchTool"
273
+ assert debug_info["tools"][0]["info"]["loop_iteration"] == 0
274
+ assert debug_info["tools"][1]["name"] == "AnalysisTool"
275
+ assert debug_info["tools"][1]["info"]["loop_iteration"] == 1
276
+ assert debug_info["tools"][2]["name"] == "SummaryTool"
277
+ assert debug_info["tools"][2]["info"]["loop_iteration"] == 1
278
+ assert debug_info["summary"]["total_tools"] == 3
@@ -0,0 +1,36 @@
1
+ from typing import Any
2
+
3
+ from humps import camelize
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+
6
+ from unique_toolkit._common.validators import LMI
7
+ from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
8
+ from unique_toolkit.language_model.infos import LanguageModelInfo
9
+
10
+ from .schemas import (
11
+ EvaluationMetricName,
12
+ )
13
+
14
+ model_config = ConfigDict(
15
+ alias_generator=camelize,
16
+ populate_by_name=True,
17
+ arbitrary_types_allowed=True,
18
+ validate_default=True,
19
+ )
20
+
21
+
22
+ class EvaluationMetricConfig(BaseModel):
23
+ model_config = model_config
24
+
25
+ enabled: bool = False
26
+ name: EvaluationMetricName
27
+ language_model: LMI = LanguageModelInfo.from_name(
28
+ DEFAULT_GPT_4o,
29
+ )
30
+ additional_llm_options: dict[str, Any] = Field(
31
+ default={},
32
+ description="Additional options to pass to the language model.",
33
+ )
34
+ custom_prompts: dict[str, str] = {}
35
+ score_to_label: dict[str, str] = {}
36
+ score_to_title: dict[str, str] = {}
@@ -14,6 +14,17 @@ Your answer must be in JSON format:
14
14
  }
15
15
  """
16
16
 
17
+ CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT = """
18
+ You will receive an input and a set of contexts.
19
+ Your task is to evaluate how relevant the contexts are to the input text.
20
+ Further you should extract relevant facts from the contexts.
21
+
22
+ # Output Format
23
+ - Generate data according to the provided data schema.
24
+ - Ensure the output adheres to the format required by the pydantic object.
25
+ - All necessary fields should be populated as per the data schema guidelines.
26
+ """
27
+
17
28
  CONTEXT_RELEVANCY_METRIC_USER_MSG = """
18
29
  Here is the data:
19
30
 
@@ -29,3 +40,17 @@ $context_texts
29
40
 
30
41
  Answer as JSON:
31
42
  """
43
+
44
+ CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT = """
45
+ Here is the data:
46
+
47
+ Input:
48
+ '''
49
+ $input_text
50
+ '''
51
+
52
+ Contexts:
53
+ '''
54
+ $context_texts
55
+ '''
56
+ """
@@ -0,0 +1,80 @@
1
+ from pydantic import BaseModel, Field, create_model
2
+ from pydantic.json_schema import SkipJsonSchema
3
+
4
+ from unique_toolkit._common.utils.structured_output.schema import StructuredOutputModel
5
+ from unique_toolkit.agentic.tools.config import get_configuration_dict
6
+
7
+
8
+ class StructuredOutputConfig(BaseModel):
9
+ model_config = get_configuration_dict()
10
+
11
+ enabled: bool = Field(
12
+ default=False,
13
+ description="Whether to use structured output for the evaluation.",
14
+ )
15
+ extract_fact_list: bool = Field(
16
+ default=False,
17
+ description="Whether to extract a list of relevant facts from context chunks with structured output.",
18
+ )
19
+ reason_description: str = Field(
20
+ default="A brief explanation justifying your evaluation decision.",
21
+ description="The description of the reason field for structured output.",
22
+ )
23
+ value_description: str = Field(
24
+ default="Assessment of how relevant the facts are to the query. Must be one of: ['low', 'medium', 'high'].",
25
+ description="The description of the value field for structured output.",
26
+ )
27
+
28
+ fact_description: str = Field(
29
+ default="A fact is an information that is directly answers the user's query. Make sure to emphasize the important information from the fact with bold text.",
30
+ description="The description of the fact field for structured output.",
31
+ )
32
+ fact_list_description: str = Field(
33
+ default="A list of relevant facts extracted from the source that supports or answers the user's query.",
34
+ description="The description of the fact list field for structured output.",
35
+ )
36
+
37
+
38
+ class Fact(StructuredOutputModel):
39
+ fact: str
40
+
41
+
42
+ class EvaluationSchemaStructuredOutput(StructuredOutputModel):
43
+ reason: str
44
+ value: str
45
+
46
+ fact_list: list[Fact] = Field(default_factory=list[Fact])
47
+
48
+ @classmethod
49
+ def get_with_descriptions(cls, config: StructuredOutputConfig):
50
+ if config.extract_fact_list:
51
+ FactWithDescription = create_model(
52
+ "Fact",
53
+ fact=(str, Field(..., description=config.fact_description)),
54
+ __base__=Fact,
55
+ )
56
+ fact_list_field = (
57
+ list[FactWithDescription],
58
+ Field(
59
+ description=config.fact_list_description,
60
+ ),
61
+ )
62
+ else:
63
+ fact_list_field = (
64
+ SkipJsonSchema[list[Fact]],
65
+ Field(default_factory=list[Fact]),
66
+ )
67
+
68
+ return create_model(
69
+ "EvaluationSchemaStructuredOutputWithDescription",
70
+ reason=(
71
+ str,
72
+ Field(..., description=config.reason_description),
73
+ ),
74
+ value=(
75
+ str,
76
+ Field(..., description=config.value_description),
77
+ ),
78
+ fact_list=fact_list_field,
79
+ __base__=cls,
80
+ )