unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unique_toolkit might be problematic. Click here for more details.
- unique_toolkit/__init__.py +28 -1
- unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +252 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +305 -0
- unique_toolkit/_common/endpoint_requestor.py +430 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +154 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
- unique_toolkit/agentic/history_manager/history_manager.py +242 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +167 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
- unique_toolkit/agentic/tools/schemas.py +141 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +183 -0
- unique_toolkit/agentic/tools/tool_manager.py +523 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +6 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +198 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +642 -77
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +133 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +153 -4
- unique_toolkit/content/schemas.py +122 -15
- unique_toolkit/content/service.py +278 -44
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/embedding/service.py +102 -11
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +83 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +327 -43
- unique_toolkit/language_model/infos.py +992 -50
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +475 -48
- unique_toolkit/language_model/service.py +228 -27
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1630 -0
- unique_toolkit/services/knowledge_base.py +861 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
- unique_toolkit-1.23.0.dist-info/RECORD +182 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.7.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test suite for DebugInfoManager class.
|
|
3
|
+
|
|
4
|
+
This test suite validates the DebugInfoManager's ability to:
|
|
5
|
+
1. Initialize with empty debug info
|
|
6
|
+
2. Extract tool debug info from ToolCallResponse objects
|
|
7
|
+
3. Handle loop iteration indices
|
|
8
|
+
4. Add arbitrary key-value pairs to debug info
|
|
9
|
+
5. Retrieve the complete debug info dictionary
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from unique_toolkit.agentic.debug_info_manager.debug_info_manager import (
|
|
13
|
+
DebugInfoManager,
|
|
14
|
+
)
|
|
15
|
+
from unique_toolkit.agentic.tools.schemas import ToolCallResponse
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestDebugInfoManager:
|
|
19
|
+
"""Test suite for DebugInfoManager functionality."""
|
|
20
|
+
|
|
21
|
+
def test_init__initializes_empty_debug_info__on_creation(self):
|
|
22
|
+
"""Test that DebugInfoManager initializes with empty tools list."""
|
|
23
|
+
manager = DebugInfoManager()
|
|
24
|
+
|
|
25
|
+
assert manager.debug_info == {"tools": []}
|
|
26
|
+
assert manager.get() == {"tools": []}
|
|
27
|
+
|
|
28
|
+
def test_extract_tool_debug_info__adds_single_tool__with_valid_response(self):
|
|
29
|
+
"""Test extracting debug info from a single ToolCallResponse."""
|
|
30
|
+
manager = DebugInfoManager()
|
|
31
|
+
tool_call_response = ToolCallResponse(
|
|
32
|
+
id="tool_1",
|
|
33
|
+
name="TestTool",
|
|
34
|
+
debug_info={"execution_time": "100ms", "status": "success"},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
manager.extract_tool_debug_info([tool_call_response])
|
|
38
|
+
|
|
39
|
+
debug_info = manager.get()
|
|
40
|
+
assert len(debug_info["tools"]) == 1
|
|
41
|
+
assert debug_info["tools"][0]["name"] == "TestTool"
|
|
42
|
+
assert debug_info["tools"][0]["info"]["execution_time"] == "100ms"
|
|
43
|
+
assert debug_info["tools"][0]["info"]["status"] == "success"
|
|
44
|
+
|
|
45
|
+
def test_extract_tool_debug_info__adds_multiple_tools__with_multiple_responses(
|
|
46
|
+
self,
|
|
47
|
+
):
|
|
48
|
+
"""Test extracting debug info from multiple ToolCallResponse objects."""
|
|
49
|
+
manager = DebugInfoManager()
|
|
50
|
+
tool_call_responses = [
|
|
51
|
+
ToolCallResponse(
|
|
52
|
+
id="tool_1",
|
|
53
|
+
name="SearchTool",
|
|
54
|
+
debug_info={"query": "test query", "results": 5},
|
|
55
|
+
),
|
|
56
|
+
ToolCallResponse(
|
|
57
|
+
id="tool_2",
|
|
58
|
+
name="CalculatorTool",
|
|
59
|
+
debug_info={"operation": "add", "result": 42},
|
|
60
|
+
),
|
|
61
|
+
ToolCallResponse(
|
|
62
|
+
id="tool_3",
|
|
63
|
+
name="WeatherTool",
|
|
64
|
+
debug_info={"location": "New York", "temperature": "72F"},
|
|
65
|
+
),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
manager.extract_tool_debug_info(tool_call_responses)
|
|
69
|
+
|
|
70
|
+
debug_info = manager.get()
|
|
71
|
+
assert len(debug_info["tools"]) == 3
|
|
72
|
+
assert debug_info["tools"][0]["name"] == "SearchTool"
|
|
73
|
+
assert debug_info["tools"][1]["name"] == "CalculatorTool"
|
|
74
|
+
assert debug_info["tools"][2]["name"] == "WeatherTool"
|
|
75
|
+
|
|
76
|
+
def test_extract_tool_debug_info__preserves_order__with_sequential_calls(self):
|
|
77
|
+
"""Test that multiple calls to extract_tool_debug_info preserve order."""
|
|
78
|
+
manager = DebugInfoManager()
|
|
79
|
+
|
|
80
|
+
# First call
|
|
81
|
+
manager.extract_tool_debug_info(
|
|
82
|
+
[ToolCallResponse(id="tool_1", name="Tool1", debug_info={"step": 1})]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Second call
|
|
86
|
+
manager.extract_tool_debug_info(
|
|
87
|
+
[ToolCallResponse(id="tool_2", name="Tool2", debug_info={"step": 2})]
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Third call
|
|
91
|
+
manager.extract_tool_debug_info(
|
|
92
|
+
[ToolCallResponse(id="tool_3", name="Tool3", debug_info={"step": 3})]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
debug_info = manager.get()
|
|
96
|
+
assert len(debug_info["tools"]) == 3
|
|
97
|
+
assert debug_info["tools"][0]["info"]["step"] == 1
|
|
98
|
+
assert debug_info["tools"][1]["info"]["step"] == 2
|
|
99
|
+
assert debug_info["tools"][2]["info"]["step"] == 3
|
|
100
|
+
|
|
101
|
+
def test_extract_tool_debug_info__adds_loop_iteration__when_index_provided(self):
|
|
102
|
+
"""Test that loop_iteration_index is added to debug info when provided."""
|
|
103
|
+
manager = DebugInfoManager()
|
|
104
|
+
tool_call_response = ToolCallResponse(
|
|
105
|
+
id="tool_1", name="IterativeTool", debug_info={"status": "processing"}
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
manager.extract_tool_debug_info([tool_call_response], loop_iteration_index=3)
|
|
109
|
+
|
|
110
|
+
debug_info = manager.get()
|
|
111
|
+
assert debug_info["tools"][0]["info"]["loop_iteration"] == 3
|
|
112
|
+
assert debug_info["tools"][0]["info"]["status"] == "processing"
|
|
113
|
+
|
|
114
|
+
def test_extract_tool_debug_info__omits_loop_iteration__when_index_is_none(self):
|
|
115
|
+
"""Test that loop_iteration is not added when index is None."""
|
|
116
|
+
manager = DebugInfoManager()
|
|
117
|
+
tool_call_response = ToolCallResponse(
|
|
118
|
+
id="tool_1", name="SingleRunTool", debug_info={"status": "complete"}
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
manager.extract_tool_debug_info([tool_call_response], loop_iteration_index=None)
|
|
122
|
+
|
|
123
|
+
debug_info = manager.get()
|
|
124
|
+
assert "loop_iteration" not in debug_info["tools"][0]["info"]
|
|
125
|
+
assert debug_info["tools"][0]["info"]["status"] == "complete"
|
|
126
|
+
|
|
127
|
+
def test_extract_tool_debug_info__handles_empty_debug_info__gracefully(self):
|
|
128
|
+
"""Test extracting from ToolCallResponse with empty debug_info dict."""
|
|
129
|
+
manager = DebugInfoManager()
|
|
130
|
+
tool_call_response = ToolCallResponse(
|
|
131
|
+
id="tool_1", name="MinimalTool", debug_info={}
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
manager.extract_tool_debug_info([tool_call_response])
|
|
135
|
+
|
|
136
|
+
debug_info = manager.get()
|
|
137
|
+
assert len(debug_info["tools"]) == 1
|
|
138
|
+
assert debug_info["tools"][0]["name"] == "MinimalTool"
|
|
139
|
+
assert debug_info["tools"][0]["info"] == {}
|
|
140
|
+
|
|
141
|
+
def test_extract_tool_debug_info__handles_empty_list__without_error(self):
|
|
142
|
+
"""Test that passing an empty list doesn't cause errors."""
|
|
143
|
+
manager = DebugInfoManager()
|
|
144
|
+
|
|
145
|
+
manager.extract_tool_debug_info([])
|
|
146
|
+
|
|
147
|
+
debug_info = manager.get()
|
|
148
|
+
assert debug_info["tools"] == []
|
|
149
|
+
|
|
150
|
+
def test_add__adds_new_key_value_pair__to_debug_info(self):
|
|
151
|
+
"""Test adding a new key-value pair to debug_info."""
|
|
152
|
+
manager = DebugInfoManager()
|
|
153
|
+
|
|
154
|
+
manager.add("execution_summary", {"total_time": "500ms", "total_calls": 5})
|
|
155
|
+
|
|
156
|
+
debug_info = manager.get()
|
|
157
|
+
assert "execution_summary" in debug_info
|
|
158
|
+
assert debug_info["execution_summary"]["total_time"] == "500ms"
|
|
159
|
+
assert debug_info["execution_summary"]["total_calls"] == 5
|
|
160
|
+
|
|
161
|
+
def test_add__preserves_tools_list__when_adding_new_keys(self):
|
|
162
|
+
"""Test that add() preserves the tools list."""
|
|
163
|
+
manager = DebugInfoManager()
|
|
164
|
+
manager.extract_tool_debug_info(
|
|
165
|
+
[
|
|
166
|
+
ToolCallResponse(
|
|
167
|
+
id="tool_1", name="TestTool", debug_info={"test": "data"}
|
|
168
|
+
)
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
manager.add("metadata", {"version": "1.0"})
|
|
173
|
+
|
|
174
|
+
debug_info = manager.get()
|
|
175
|
+
assert len(debug_info["tools"]) == 1
|
|
176
|
+
assert debug_info["tools"][0]["name"] == "TestTool"
|
|
177
|
+
assert debug_info["metadata"]["version"] == "1.0"
|
|
178
|
+
|
|
179
|
+
def test_add__overwrites_existing_key__when_key_exists(self):
|
|
180
|
+
"""Test that add() overwrites an existing key."""
|
|
181
|
+
manager = DebugInfoManager()
|
|
182
|
+
manager.add("status", "in_progress")
|
|
183
|
+
manager.add("status", "completed")
|
|
184
|
+
|
|
185
|
+
debug_info = manager.get()
|
|
186
|
+
assert debug_info["status"] == "completed"
|
|
187
|
+
|
|
188
|
+
def test_add__adds_multiple_keys__with_sequential_calls(self):
|
|
189
|
+
"""Test adding multiple key-value pairs with sequential calls."""
|
|
190
|
+
manager = DebugInfoManager()
|
|
191
|
+
|
|
192
|
+
manager.add("key1", "value1")
|
|
193
|
+
manager.add("key2", {"nested": "value2"})
|
|
194
|
+
manager.add("key3", [1, 2, 3])
|
|
195
|
+
|
|
196
|
+
debug_info = manager.get()
|
|
197
|
+
assert debug_info["key1"] == "value1"
|
|
198
|
+
assert debug_info["key2"]["nested"] == "value2"
|
|
199
|
+
assert debug_info["key3"] == [1, 2, 3]
|
|
200
|
+
|
|
201
|
+
def test_get__returns_complete_debug_info__with_mixed_data(self):
|
|
202
|
+
"""Test get() returns complete debug info with tools and custom keys."""
|
|
203
|
+
manager = DebugInfoManager()
|
|
204
|
+
|
|
205
|
+
# Add tool debug info
|
|
206
|
+
manager.extract_tool_debug_info(
|
|
207
|
+
[ToolCallResponse(id="tool_1", name="Tool1", debug_info={"data": "test"})],
|
|
208
|
+
loop_iteration_index=0,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Add custom keys
|
|
212
|
+
manager.add("start_time", "2025-10-16T10:00:00")
|
|
213
|
+
manager.add("end_time", "2025-10-16T10:01:00")
|
|
214
|
+
|
|
215
|
+
debug_info = manager.get()
|
|
216
|
+
|
|
217
|
+
assert "tools" in debug_info
|
|
218
|
+
assert "start_time" in debug_info
|
|
219
|
+
assert "end_time" in debug_info
|
|
220
|
+
assert len(debug_info["tools"]) == 1
|
|
221
|
+
assert debug_info["start_time"] == "2025-10-16T10:00:00"
|
|
222
|
+
|
|
223
|
+
def test_integration__complete_workflow__with_all_operations(self):
|
|
224
|
+
"""Integration test: complete workflow using all DebugInfoManager methods."""
|
|
225
|
+
manager = DebugInfoManager()
|
|
226
|
+
|
|
227
|
+
# Initial state
|
|
228
|
+
assert manager.get() == {"tools": []}
|
|
229
|
+
|
|
230
|
+
# Add some metadata
|
|
231
|
+
manager.add("session_id", "abc-123")
|
|
232
|
+
manager.add("user_id", "user-456")
|
|
233
|
+
|
|
234
|
+
# First tool call (loop iteration 0)
|
|
235
|
+
manager.extract_tool_debug_info(
|
|
236
|
+
[
|
|
237
|
+
ToolCallResponse(
|
|
238
|
+
id="tool_1",
|
|
239
|
+
name="SearchTool",
|
|
240
|
+
debug_info={"query": "AI research", "hits": 100},
|
|
241
|
+
)
|
|
242
|
+
],
|
|
243
|
+
loop_iteration_index=0,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Second tool call (loop iteration 1)
|
|
247
|
+
manager.extract_tool_debug_info(
|
|
248
|
+
[
|
|
249
|
+
ToolCallResponse(
|
|
250
|
+
id="tool_2",
|
|
251
|
+
name="AnalysisTool",
|
|
252
|
+
debug_info={"processed": 50, "relevant": 10},
|
|
253
|
+
),
|
|
254
|
+
ToolCallResponse(
|
|
255
|
+
id="tool_3",
|
|
256
|
+
name="SummaryTool",
|
|
257
|
+
debug_info={"paragraphs": 3, "words": 250},
|
|
258
|
+
),
|
|
259
|
+
],
|
|
260
|
+
loop_iteration_index=1,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Add final summary
|
|
264
|
+
manager.add("summary", {"total_tools": 3, "total_iterations": 2})
|
|
265
|
+
|
|
266
|
+
# Verify complete debug info
|
|
267
|
+
debug_info = manager.get()
|
|
268
|
+
|
|
269
|
+
assert debug_info["session_id"] == "abc-123"
|
|
270
|
+
assert debug_info["user_id"] == "user-456"
|
|
271
|
+
assert len(debug_info["tools"]) == 3
|
|
272
|
+
assert debug_info["tools"][0]["name"] == "SearchTool"
|
|
273
|
+
assert debug_info["tools"][0]["info"]["loop_iteration"] == 0
|
|
274
|
+
assert debug_info["tools"][1]["name"] == "AnalysisTool"
|
|
275
|
+
assert debug_info["tools"][1]["info"]["loop_iteration"] == 1
|
|
276
|
+
assert debug_info["tools"][2]["name"] == "SummaryTool"
|
|
277
|
+
assert debug_info["tools"][2]["info"]["loop_iteration"] == 1
|
|
278
|
+
assert debug_info["summary"]["total_tools"] == 3
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from humps import camelize
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
5
|
+
|
|
6
|
+
from unique_toolkit._common.validators import LMI
|
|
7
|
+
from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
|
|
8
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
9
|
+
|
|
10
|
+
from .schemas import (
|
|
11
|
+
EvaluationMetricName,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
model_config = ConfigDict(
|
|
15
|
+
alias_generator=camelize,
|
|
16
|
+
populate_by_name=True,
|
|
17
|
+
arbitrary_types_allowed=True,
|
|
18
|
+
validate_default=True,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EvaluationMetricConfig(BaseModel):
|
|
23
|
+
model_config = model_config
|
|
24
|
+
|
|
25
|
+
enabled: bool = False
|
|
26
|
+
name: EvaluationMetricName
|
|
27
|
+
language_model: LMI = LanguageModelInfo.from_name(
|
|
28
|
+
DEFAULT_GPT_4o,
|
|
29
|
+
)
|
|
30
|
+
additional_llm_options: dict[str, Any] = Field(
|
|
31
|
+
default={},
|
|
32
|
+
description="Additional options to pass to the language model.",
|
|
33
|
+
)
|
|
34
|
+
custom_prompts: dict[str, str] = {}
|
|
35
|
+
score_to_label: dict[str, str] = {}
|
|
36
|
+
score_to_title: dict[str, str] = {}
|
|
@@ -14,6 +14,17 @@ Your answer must be in JSON format:
|
|
|
14
14
|
}
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT = """
|
|
18
|
+
You will receive an input and a set of contexts.
|
|
19
|
+
Your task is to evaluate how relevant the contexts are to the input text.
|
|
20
|
+
Further you should extract relevant facts from the contexts.
|
|
21
|
+
|
|
22
|
+
# Output Format
|
|
23
|
+
- Generate data according to the provided data schema.
|
|
24
|
+
- Ensure the output adheres to the format required by the pydantic object.
|
|
25
|
+
- All necessary fields should be populated as per the data schema guidelines.
|
|
26
|
+
"""
|
|
27
|
+
|
|
17
28
|
CONTEXT_RELEVANCY_METRIC_USER_MSG = """
|
|
18
29
|
Here is the data:
|
|
19
30
|
|
|
@@ -29,3 +40,17 @@ $context_texts
|
|
|
29
40
|
|
|
30
41
|
Answer as JSON:
|
|
31
42
|
"""
|
|
43
|
+
|
|
44
|
+
CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT = """
|
|
45
|
+
Here is the data:
|
|
46
|
+
|
|
47
|
+
Input:
|
|
48
|
+
'''
|
|
49
|
+
$input_text
|
|
50
|
+
'''
|
|
51
|
+
|
|
52
|
+
Contexts:
|
|
53
|
+
'''
|
|
54
|
+
$context_texts
|
|
55
|
+
'''
|
|
56
|
+
"""
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field, create_model
|
|
2
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
3
|
+
|
|
4
|
+
from unique_toolkit._common.utils.structured_output.schema import StructuredOutputModel
|
|
5
|
+
from unique_toolkit.agentic.tools.config import get_configuration_dict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StructuredOutputConfig(BaseModel):
|
|
9
|
+
model_config = get_configuration_dict()
|
|
10
|
+
|
|
11
|
+
enabled: bool = Field(
|
|
12
|
+
default=False,
|
|
13
|
+
description="Whether to use structured output for the evaluation.",
|
|
14
|
+
)
|
|
15
|
+
extract_fact_list: bool = Field(
|
|
16
|
+
default=False,
|
|
17
|
+
description="Whether to extract a list of relevant facts from context chunks with structured output.",
|
|
18
|
+
)
|
|
19
|
+
reason_description: str = Field(
|
|
20
|
+
default="A brief explanation justifying your evaluation decision.",
|
|
21
|
+
description="The description of the reason field for structured output.",
|
|
22
|
+
)
|
|
23
|
+
value_description: str = Field(
|
|
24
|
+
default="Assessment of how relevant the facts are to the query. Must be one of: ['low', 'medium', 'high'].",
|
|
25
|
+
description="The description of the value field for structured output.",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
fact_description: str = Field(
|
|
29
|
+
default="A fact is an information that is directly answers the user's query. Make sure to emphasize the important information from the fact with bold text.",
|
|
30
|
+
description="The description of the fact field for structured output.",
|
|
31
|
+
)
|
|
32
|
+
fact_list_description: str = Field(
|
|
33
|
+
default="A list of relevant facts extracted from the source that supports or answers the user's query.",
|
|
34
|
+
description="The description of the fact list field for structured output.",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Fact(StructuredOutputModel):
|
|
39
|
+
fact: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EvaluationSchemaStructuredOutput(StructuredOutputModel):
|
|
43
|
+
reason: str
|
|
44
|
+
value: str
|
|
45
|
+
|
|
46
|
+
fact_list: list[Fact] = Field(default_factory=list[Fact])
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def get_with_descriptions(cls, config: StructuredOutputConfig):
|
|
50
|
+
if config.extract_fact_list:
|
|
51
|
+
FactWithDescription = create_model(
|
|
52
|
+
"Fact",
|
|
53
|
+
fact=(str, Field(..., description=config.fact_description)),
|
|
54
|
+
__base__=Fact,
|
|
55
|
+
)
|
|
56
|
+
fact_list_field = (
|
|
57
|
+
list[FactWithDescription],
|
|
58
|
+
Field(
|
|
59
|
+
description=config.fact_list_description,
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
fact_list_field = (
|
|
64
|
+
SkipJsonSchema[list[Fact]],
|
|
65
|
+
Field(default_factory=list[Fact]),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return create_model(
|
|
69
|
+
"EvaluationSchemaStructuredOutputWithDescription",
|
|
70
|
+
reason=(
|
|
71
|
+
str,
|
|
72
|
+
Field(..., description=config.reason_description),
|
|
73
|
+
),
|
|
74
|
+
value=(
|
|
75
|
+
str,
|
|
76
|
+
Field(..., description=config.value_description),
|
|
77
|
+
),
|
|
78
|
+
fact_list=fact_list_field,
|
|
79
|
+
__base__=cls,
|
|
80
|
+
)
|