unique_toolkit 0.8.30__py3-none-any.whl → 0.8.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +45 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +372 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +273 -0
- unique_toolkit/_common/feature_flags/schema.py +13 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/evals/context_relevancy/schema.py +2 -5
- unique_toolkit/evals/context_relevancy/service.py +42 -10
- unique_toolkit/language_model/infos.py +156 -0
- unique_toolkit/tools/a2a/config.py +1 -3
- unique_toolkit/tools/a2a/manager.py +1 -2
- unique_toolkit/tools/a2a/memory.py +0 -1
- unique_toolkit/tools/a2a/service.py +6 -8
- unique_toolkit/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/tools/mcp/models.py +1 -0
- unique_toolkit/tools/test/test_mcp_manager.py +10 -19
- unique_toolkit/tools/tool.py +2 -0
- unique_toolkit/tools/tool_manager.py +1 -3
- {unique_toolkit-0.8.30.dist-info → unique_toolkit-0.8.32.dist-info}/METADATA +17 -1
- {unique_toolkit-0.8.30.dist-info → unique_toolkit-0.8.32.dist-info}/RECORD +23 -15
- {unique_toolkit-0.8.30.dist-info → unique_toolkit-0.8.32.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.8.30.dist-info → unique_toolkit-0.8.32.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from unique_toolkit._common.chunk_relevancy_sorter.config import (
|
|
6
|
+
ChunkRelevancySortConfig,
|
|
7
|
+
)
|
|
8
|
+
from unique_toolkit._common.chunk_relevancy_sorter.exception import (
|
|
9
|
+
ChunkRelevancySorterException,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
|
|
12
|
+
ChunkRelevancy,
|
|
13
|
+
ChunkRelevancySorterResult,
|
|
14
|
+
)
|
|
15
|
+
from unique_toolkit._common.chunk_relevancy_sorter.service import ChunkRelevancySorter
|
|
16
|
+
from unique_toolkit._common.default_language_model import (
|
|
17
|
+
DEFAULT_GPT_35_TURBO,
|
|
18
|
+
DEFAULT_GPT_4o,
|
|
19
|
+
)
|
|
20
|
+
from unique_toolkit.app.schemas import ChatEvent
|
|
21
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
22
|
+
from unique_toolkit.evals.context_relevancy.schema import StructuredOutputConfig
|
|
23
|
+
from unique_toolkit.evals.schemas import EvaluationMetricName, EvaluationMetricResult
|
|
24
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def event():
|
|
29
|
+
event = MagicMock(spec=ChatEvent)
|
|
30
|
+
event.payload = MagicMock()
|
|
31
|
+
event.payload.user_message = MagicMock()
|
|
32
|
+
event.payload.user_message.text = "Test query"
|
|
33
|
+
event.user_id = "user_0"
|
|
34
|
+
event.company_id = "company_0"
|
|
35
|
+
return event
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.fixture
|
|
39
|
+
def mock_chunks():
|
|
40
|
+
return [
|
|
41
|
+
ContentChunk(
|
|
42
|
+
id=f"chunk_{i}",
|
|
43
|
+
order=i,
|
|
44
|
+
chunk_id=f"chunk_{i}",
|
|
45
|
+
text=f"Test content {i}",
|
|
46
|
+
)
|
|
47
|
+
for i in range(3)
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.fixture
|
|
52
|
+
def config():
|
|
53
|
+
return ChunkRelevancySortConfig(
|
|
54
|
+
enabled=True,
|
|
55
|
+
relevancy_levels_to_consider=["high", "medium", "low"],
|
|
56
|
+
relevancy_level_order={"high": 0, "medium": 1, "low": 2},
|
|
57
|
+
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
|
|
58
|
+
fallback_language_model=LanguageModelInfo.from_name(DEFAULT_GPT_35_TURBO),
|
|
59
|
+
structured_output_config=StructuredOutputConfig(
|
|
60
|
+
enabled=False,
|
|
61
|
+
extract_fact_list=False,
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.fixture
|
|
67
|
+
def chunk_relevancy_sorter(event):
|
|
68
|
+
return ChunkRelevancySorter(event)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.mark.asyncio
|
|
72
|
+
async def test_run_disabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
73
|
+
config.enabled = False
|
|
74
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
75
|
+
|
|
76
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
77
|
+
assert result.content_chunks == mock_chunks
|
|
78
|
+
assert len(result.content_chunks) == len(mock_chunks)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.mark.asyncio
|
|
82
|
+
async def test_run_enabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
83
|
+
with patch.object(chunk_relevancy_sorter, "_run_chunk_relevancy_sort") as mock_sort:
|
|
84
|
+
mock_sort.return_value = ChunkRelevancySorterResult.from_chunks(mock_chunks)
|
|
85
|
+
|
|
86
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
87
|
+
|
|
88
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
89
|
+
assert result.content_chunks == mock_chunks
|
|
90
|
+
mock_sort.assert_called_once_with("test input", mock_chunks, config)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.mark.asyncio
|
|
94
|
+
async def test_evaluate_chunks_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
95
|
+
mock_relevancy = EvaluationMetricResult(
|
|
96
|
+
value="high",
|
|
97
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
98
|
+
reason="Test reason",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
with patch.object(
|
|
102
|
+
chunk_relevancy_sorter, "_process_relevancy_evaluation"
|
|
103
|
+
) as mock_process:
|
|
104
|
+
mock_process.return_value = ChunkRelevancy(
|
|
105
|
+
chunk=mock_chunks[0], relevancy=mock_relevancy
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
result = await chunk_relevancy_sorter._evaluate_chunks_relevancy(
|
|
109
|
+
"test input", mock_chunks, config
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
assert len(result) == len(mock_chunks)
|
|
113
|
+
assert all(isinstance(r, ChunkRelevancy) for r in result)
|
|
114
|
+
assert mock_process.call_count == len(mock_chunks)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@pytest.mark.asyncio
|
|
118
|
+
async def test_evaluate_chunk_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
119
|
+
with patch(
|
|
120
|
+
"unique_toolkit._common.chunk_relevancy_sorter.service.ContextRelevancyEvaluator.analyze"
|
|
121
|
+
) as mock_analyze:
|
|
122
|
+
mock_analyze.return_value = EvaluationMetricResult(
|
|
123
|
+
value="high",
|
|
124
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
125
|
+
reason="Test reason",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
result = await chunk_relevancy_sorter._evaluate_chunk_relevancy(
|
|
129
|
+
input_text="test input",
|
|
130
|
+
chunk=mock_chunks[0],
|
|
131
|
+
langugage_model=config.language_model,
|
|
132
|
+
structured_output_config=config.structured_output_config,
|
|
133
|
+
additional_llm_options=config.additional_llm_options,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
assert isinstance(result, EvaluationMetricResult)
|
|
137
|
+
assert result.value == "high"
|
|
138
|
+
mock_analyze.assert_called_once()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@pytest.mark.asyncio
|
|
142
|
+
async def test_process_relevancy_evaluation_success(
|
|
143
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
144
|
+
):
|
|
145
|
+
with patch.object(
|
|
146
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
147
|
+
) as mock_evaluate:
|
|
148
|
+
mock_evaluate.return_value = EvaluationMetricResult(
|
|
149
|
+
value="high",
|
|
150
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
151
|
+
reason="Test reason",
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
result = await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
155
|
+
"test input", mock_chunks[0], config
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
assert isinstance(result, ChunkRelevancy)
|
|
159
|
+
assert result.chunk == mock_chunks[0]
|
|
160
|
+
assert result.relevancy is not None
|
|
161
|
+
assert result.relevancy.value == "high"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pytest.mark.asyncio
|
|
165
|
+
async def test_process_relevancy_evaluation_fallback(
|
|
166
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
167
|
+
):
|
|
168
|
+
with patch.object(
|
|
169
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
170
|
+
) as mock_evaluate:
|
|
171
|
+
# First call raises exception, second call succeeds
|
|
172
|
+
mock_evaluate.side_effect = [
|
|
173
|
+
Exception("Test error"),
|
|
174
|
+
EvaluationMetricResult(
|
|
175
|
+
value="medium",
|
|
176
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
177
|
+
reason="Test reason",
|
|
178
|
+
),
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
182
|
+
await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
183
|
+
"test input", mock_chunks[0], config
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@pytest.mark.asyncio
|
|
188
|
+
async def test_validate_and_sort_relevant_chunks(
|
|
189
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
190
|
+
):
|
|
191
|
+
chunk_relevancies = [
|
|
192
|
+
ChunkRelevancy(
|
|
193
|
+
chunk=mock_chunks[0],
|
|
194
|
+
relevancy=EvaluationMetricResult(
|
|
195
|
+
value="low",
|
|
196
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
197
|
+
reason="Test reason",
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
]
|
|
201
|
+
chunk_relevancies.append(
|
|
202
|
+
ChunkRelevancy(
|
|
203
|
+
chunk=mock_chunks[1],
|
|
204
|
+
relevancy=EvaluationMetricResult(
|
|
205
|
+
value="medium",
|
|
206
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
207
|
+
reason="Test reason",
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
chunk_relevancies.append(
|
|
212
|
+
ChunkRelevancy(
|
|
213
|
+
chunk=mock_chunks[2],
|
|
214
|
+
relevancy=EvaluationMetricResult(
|
|
215
|
+
value="high",
|
|
216
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
217
|
+
reason="Test reason",
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
result = await chunk_relevancy_sorter._validate_and_sort_relevant_chunks(
|
|
223
|
+
config, chunk_relevancies
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
assert isinstance(result, list)
|
|
227
|
+
assert len(result) == len(mock_chunks)
|
|
228
|
+
assert all(isinstance(relevancy.chunk, ContentChunk) for relevancy in result)
|
|
229
|
+
assert result[0].chunk == mock_chunks[2]
|
|
230
|
+
assert result[1].chunk == mock_chunks[1]
|
|
231
|
+
assert result[2].chunk == mock_chunks[0]
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@pytest.mark.asyncio
|
|
235
|
+
async def test_validate_chunk_relevancies_invalid(chunk_relevancy_sorter):
|
|
236
|
+
invalid_relevancies = [
|
|
237
|
+
ChunkRelevancy(
|
|
238
|
+
chunk=ContentChunk(chunk_id="test", text="test", id="test", order=0),
|
|
239
|
+
relevancy=None,
|
|
240
|
+
)
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
244
|
+
await chunk_relevancy_sorter._validate_chunk_relevancies(invalid_relevancies)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_count_distinct_values(chunk_relevancy_sorter, mock_chunks):
|
|
248
|
+
chunk_relevancies = [
|
|
249
|
+
ChunkRelevancy(
|
|
250
|
+
chunk=chunk,
|
|
251
|
+
relevancy=EvaluationMetricResult(
|
|
252
|
+
value="high",
|
|
253
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
254
|
+
reason="Test reason",
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
for chunk in mock_chunks[:2]
|
|
258
|
+
]
|
|
259
|
+
chunk_relevancies.append(
|
|
260
|
+
ChunkRelevancy(
|
|
261
|
+
chunk=mock_chunks[2],
|
|
262
|
+
relevancy=EvaluationMetricResult(
|
|
263
|
+
value="medium",
|
|
264
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
265
|
+
reason="Test reason",
|
|
266
|
+
),
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
value_counts = chunk_relevancy_sorter._count_distinct_values(chunk_relevancies)
|
|
271
|
+
|
|
272
|
+
assert value_counts["high"] == 2
|
|
273
|
+
assert value_counts["medium"] == 1
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.tools.config import get_configuration_dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FeatureExtendedSourceSerialization(BaseModel):
|
|
7
|
+
"""Mixin for experimental feature in Source serialization"""
|
|
8
|
+
|
|
9
|
+
model_config = get_configuration_dict()
|
|
10
|
+
full_sources_serialize_dump: bool = Field(
|
|
11
|
+
default=False,
|
|
12
|
+
description="Whether to include the full source object in the tool response. If True, includes the full Source object. If False, uses the old format with only source_number and content.",
|
|
13
|
+
)
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
from pydantic import BaseModel,
|
|
1
|
+
from pydantic import BaseModel, Field, create_model
|
|
2
2
|
from pydantic.json_schema import SkipJsonSchema
|
|
3
3
|
|
|
4
|
+
from unique_toolkit._common.utils.structured_output.schema import StructuredOutputModel
|
|
4
5
|
from unique_toolkit.tools.config import get_configuration_dict
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class StructuredOutputModel(BaseModel):
|
|
8
|
-
model_config = ConfigDict(extra="forbid")
|
|
9
|
-
|
|
10
|
-
|
|
11
8
|
class StructuredOutputConfig(BaseModel):
|
|
12
9
|
model_config = get_configuration_dict()
|
|
13
10
|
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import overload
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel, ValidationError
|
|
5
|
+
from typing_extensions import deprecated
|
|
4
6
|
|
|
5
|
-
from unique_toolkit.
|
|
6
|
-
from unique_toolkit.
|
|
7
|
+
from unique_toolkit._common.default_language_model import DEFAULT_GPT_35_TURBO
|
|
8
|
+
from unique_toolkit._common.validate_required_values import (
|
|
9
|
+
validate_required_values,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit.app.schemas import BaseEvent, ChatEvent
|
|
7
12
|
from unique_toolkit.evals.config import EvaluationMetricConfig
|
|
8
13
|
from unique_toolkit.evals.context_relevancy.schema import (
|
|
9
14
|
EvaluationSchemaStructuredOutput,
|
|
@@ -21,7 +26,6 @@ from unique_toolkit.evals.schemas import (
|
|
|
21
26
|
)
|
|
22
27
|
from unique_toolkit.language_model.infos import (
|
|
23
28
|
LanguageModelInfo,
|
|
24
|
-
LanguageModelName,
|
|
25
29
|
ModelCapabilities,
|
|
26
30
|
)
|
|
27
31
|
from unique_toolkit.language_model.prompt import Prompt
|
|
@@ -45,9 +49,7 @@ USER_MSG_KEY = "userPrompt"
|
|
|
45
49
|
default_config = EvaluationMetricConfig(
|
|
46
50
|
enabled=False,
|
|
47
51
|
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
48
|
-
language_model=LanguageModelInfo.from_name(
|
|
49
|
-
LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
50
|
-
),
|
|
52
|
+
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_35_TURBO),
|
|
51
53
|
custom_prompts={
|
|
52
54
|
SYSTEM_MSG_KEY: CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
|
|
53
55
|
USER_MSG_KEY: CONTEXT_RELEVANCY_METRIC_USER_MSG,
|
|
@@ -61,13 +63,42 @@ relevancy_required_input_fields = [
|
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
class ContextRelevancyEvaluator:
|
|
66
|
+
@deprecated(
|
|
67
|
+
"Use __init__ with company_id and user_id instead or use the classmethod `from_event`"
|
|
68
|
+
)
|
|
69
|
+
@overload
|
|
70
|
+
def __init__(self, event: ChatEvent | BaseEvent):
|
|
71
|
+
"""
|
|
72
|
+
Initialize the ContextRelevancyEvaluator with an event (deprecated)
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@overload
|
|
76
|
+
def __init__(self, *, company_id: str, user_id: str):
|
|
77
|
+
"""
|
|
78
|
+
Initialize the ContextRelevancyEvaluator with a company_id and user_id
|
|
79
|
+
"""
|
|
80
|
+
|
|
64
81
|
def __init__(
|
|
65
82
|
self,
|
|
66
|
-
event: ChatEvent,
|
|
83
|
+
event: ChatEvent | BaseEvent | None = None,
|
|
84
|
+
company_id: str | None = None,
|
|
85
|
+
user_id: str | None = None,
|
|
67
86
|
):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
87
|
+
if isinstance(event, (ChatEvent, BaseEvent)):
|
|
88
|
+
self.language_model_service = LanguageModelService.from_event(event)
|
|
89
|
+
else:
|
|
90
|
+
[company_id, user_id] = validate_required_values([company_id, user_id])
|
|
91
|
+
self.language_model_service = LanguageModelService(
|
|
92
|
+
company_id=company_id, user_id=user_id
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Setup the logger
|
|
96
|
+
module_name = "ContextRelevancyEvaluator"
|
|
97
|
+
self.logger = logging.getLogger(f"{module_name}.{__name__}")
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_event(cls, event: ChatEvent | BaseEvent):
|
|
101
|
+
return cls(company_id=event.company_id, user_id=event.user_id)
|
|
71
102
|
|
|
72
103
|
async def analyze(
|
|
73
104
|
self,
|
|
@@ -95,6 +126,7 @@ class ContextRelevancyEvaluator:
|
|
|
95
126
|
|
|
96
127
|
input.validate_required_fields(relevancy_required_input_fields)
|
|
97
128
|
|
|
129
|
+
# TODO: Was already there in monorepo
|
|
98
130
|
if len(input.context_texts) == 0: # type: ignore
|
|
99
131
|
error_message = "No context texts provided."
|
|
100
132
|
raise EvaluatorException(
|
|
@@ -48,6 +48,14 @@ class LanguageModelName(StrEnum):
|
|
|
48
48
|
LITELLM_OPENAI_GPT_5_MINI = "litellm:openai-gpt-5-mini"
|
|
49
49
|
LITELLM_OPENAI_GPT_5_NANO = "litellm:openai-gpt-5-nano"
|
|
50
50
|
LITELLM_OPENAI_GPT_5_CHAT = "litellm:openai-gpt-5-chat"
|
|
51
|
+
LITELLM_OPENAI_O1 = "litellm:openai-o1"
|
|
52
|
+
LITELLM_OPENAI_O3 = "litellm:openai-o3"
|
|
53
|
+
LITELLM_OPENAI_O3_DEEP_RESEARCH = "litellm:openai-o3-deep-research"
|
|
54
|
+
LITELLM_OPENAI_O3_PRO = "litellm:openai-o3-pro"
|
|
55
|
+
LITELLM_OPENAI_O4_MINI = "litellm:openai-o4-mini"
|
|
56
|
+
LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH = "litellm:openai-o4-mini-deep-research"
|
|
57
|
+
LITELLM_OPENAI_GPT_4_1_MINI = "litellm:openai-gpt-4-1-mini"
|
|
58
|
+
LITELLM_OPENAI_GPT_4_1_NANO = "litellm:openai-gpt-4-1-nano"
|
|
51
59
|
LITELLM_DEEPSEEK_R1 = "litellm:deepseek-r1"
|
|
52
60
|
LITELLM_DEEPSEEK_V3 = "litellm:deepseek-v3-1"
|
|
53
61
|
LITELLM_QWEN_3 = "litellm:qwen-3-235B-A22B"
|
|
@@ -83,6 +91,14 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
|
|
|
83
91
|
| LMN.LITELLM_OPENAI_GPT_5_MINI
|
|
84
92
|
| LMN.LITELLM_OPENAI_GPT_5_NANO
|
|
85
93
|
| LMN.LITELLM_OPENAI_GPT_5_CHAT
|
|
94
|
+
| LMN.LITELLM_OPENAI_O1
|
|
95
|
+
| LMN.LITELLM_OPENAI_O3
|
|
96
|
+
| LMN.LITELLM_OPENAI_O3_DEEP_RESEARCH
|
|
97
|
+
| LMN.LITELLM_OPENAI_O4_MINI
|
|
98
|
+
| LMN.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH
|
|
99
|
+
| LMN.LITELLM_OPENAI_GPT_4_1_MINI
|
|
100
|
+
| LMN.LITELLM_OPENAI_GPT_4_1_NANO
|
|
101
|
+
| LMN.LITELLM_OPENAI_O3_PRO
|
|
86
102
|
):
|
|
87
103
|
return EncoderName.O200K_BASE
|
|
88
104
|
case _:
|
|
@@ -879,6 +895,146 @@ class LanguageModelInfo(BaseModel):
|
|
|
879
895
|
deprecated_at=date(2026, 8, 7),
|
|
880
896
|
retirement_at=date(2026, 8, 7),
|
|
881
897
|
)
|
|
898
|
+
case LanguageModelName.LITELLM_OPENAI_O1:
|
|
899
|
+
return cls(
|
|
900
|
+
name=model_name,
|
|
901
|
+
provider=LanguageModelProvider.LITELLM,
|
|
902
|
+
version="2024-12-17",
|
|
903
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
904
|
+
capabilities=[
|
|
905
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
906
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
907
|
+
ModelCapabilities.STREAMING,
|
|
908
|
+
ModelCapabilities.VISION,
|
|
909
|
+
ModelCapabilities.REASONING,
|
|
910
|
+
],
|
|
911
|
+
token_limits=LanguageModelTokenLimits(
|
|
912
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
913
|
+
),
|
|
914
|
+
info_cutoff_at=date(2023, 10, 1),
|
|
915
|
+
published_at=date(2024, 12, 17),
|
|
916
|
+
temperature_bounds=TemperatureBounds(
|
|
917
|
+
min_temperature=1.0, max_temperature=1.0
|
|
918
|
+
),
|
|
919
|
+
)
|
|
920
|
+
case LanguageModelName.LITELLM_OPENAI_O3:
|
|
921
|
+
return cls(
|
|
922
|
+
name=model_name,
|
|
923
|
+
provider=LanguageModelProvider.LITELLM,
|
|
924
|
+
version="2025-04-16",
|
|
925
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
926
|
+
capabilities=[
|
|
927
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
928
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
929
|
+
ModelCapabilities.STREAMING,
|
|
930
|
+
ModelCapabilities.REASONING,
|
|
931
|
+
],
|
|
932
|
+
token_limits=LanguageModelTokenLimits(
|
|
933
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
934
|
+
),
|
|
935
|
+
temperature_bounds=TemperatureBounds(
|
|
936
|
+
min_temperature=1.0, max_temperature=1.0
|
|
937
|
+
),
|
|
938
|
+
published_at=date(2025, 4, 16),
|
|
939
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
940
|
+
)
|
|
941
|
+
case LanguageModelName.LITELLM_OPENAI_O3_DEEP_RESEARCH:
|
|
942
|
+
return cls(
|
|
943
|
+
name=model_name,
|
|
944
|
+
provider=LanguageModelProvider.LITELLM,
|
|
945
|
+
version="2025-06-26",
|
|
946
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
947
|
+
token_limits=LanguageModelTokenLimits(
|
|
948
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
949
|
+
),
|
|
950
|
+
published_at=date(2025, 4, 16),
|
|
951
|
+
capabilities=[ModelCapabilities.STREAMING],
|
|
952
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
953
|
+
)
|
|
954
|
+
case LanguageModelName.LITELLM_OPENAI_O3_PRO:
|
|
955
|
+
return cls(
|
|
956
|
+
name=model_name,
|
|
957
|
+
provider=LanguageModelProvider.LITELLM,
|
|
958
|
+
version="2025-06-10",
|
|
959
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
960
|
+
capabilities=[
|
|
961
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
962
|
+
ModelCapabilities.REASONING,
|
|
963
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
964
|
+
],
|
|
965
|
+
token_limits=LanguageModelTokenLimits(
|
|
966
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
967
|
+
),
|
|
968
|
+
published_at=date(2025, 6, 10),
|
|
969
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
970
|
+
)
|
|
971
|
+
case LanguageModelName.LITELLM_OPENAI_O4_MINI:
|
|
972
|
+
return cls(
|
|
973
|
+
name=model_name,
|
|
974
|
+
provider=LanguageModelProvider.LITELLM,
|
|
975
|
+
version="2025-04-16",
|
|
976
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
977
|
+
capabilities=[
|
|
978
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
979
|
+
ModelCapabilities.STREAMING,
|
|
980
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
981
|
+
],
|
|
982
|
+
token_limits=LanguageModelTokenLimits(
|
|
983
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
984
|
+
),
|
|
985
|
+
published_at=date(2025, 4, 16),
|
|
986
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
987
|
+
temperature_bounds=TemperatureBounds(
|
|
988
|
+
min_temperature=1.0, max_temperature=1.0
|
|
989
|
+
),
|
|
990
|
+
)
|
|
991
|
+
case LanguageModelName.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH:
|
|
992
|
+
return cls(
|
|
993
|
+
name=model_name,
|
|
994
|
+
provider=LanguageModelProvider.LITELLM,
|
|
995
|
+
version="2025-06-26",
|
|
996
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
997
|
+
token_limits=LanguageModelTokenLimits(
|
|
998
|
+
token_limit_input=200_000, token_limit_output=100_000
|
|
999
|
+
),
|
|
1000
|
+
published_at=date(2025, 4, 16),
|
|
1001
|
+
capabilities=[ModelCapabilities.STREAMING],
|
|
1002
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
1003
|
+
)
|
|
1004
|
+
case LanguageModelName.LITELLM_OPENAI_GPT_4_1_MINI:
|
|
1005
|
+
return cls(
|
|
1006
|
+
name=model_name,
|
|
1007
|
+
provider=LanguageModelProvider.LITELLM,
|
|
1008
|
+
version="2025-04-14",
|
|
1009
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
1010
|
+
published_at=date(2025, 4, 14),
|
|
1011
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
1012
|
+
token_limits=LanguageModelTokenLimits(
|
|
1013
|
+
token_limit_input=1_047_576, token_limit_output=32_768
|
|
1014
|
+
),
|
|
1015
|
+
capabilities=[
|
|
1016
|
+
ModelCapabilities.STREAMING,
|
|
1017
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
1018
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
1019
|
+
],
|
|
1020
|
+
)
|
|
1021
|
+
case LanguageModelName.LITELLM_OPENAI_GPT_4_1_NANO:
|
|
1022
|
+
return cls(
|
|
1023
|
+
name=model_name,
|
|
1024
|
+
provider=LanguageModelProvider.LITELLM,
|
|
1025
|
+
version="2025-04-14",
|
|
1026
|
+
encoder_name=EncoderName.O200K_BASE,
|
|
1027
|
+
published_at=date(2025, 4, 14),
|
|
1028
|
+
info_cutoff_at=date(2024, 6, 1),
|
|
1029
|
+
token_limits=LanguageModelTokenLimits(
|
|
1030
|
+
token_limit_input=1_047_576, token_limit_output=32_768
|
|
1031
|
+
),
|
|
1032
|
+
capabilities=[
|
|
1033
|
+
ModelCapabilities.STREAMING,
|
|
1034
|
+
ModelCapabilities.FUNCTION_CALLING,
|
|
1035
|
+
ModelCapabilities.STRUCTURED_OUTPUT,
|
|
1036
|
+
],
|
|
1037
|
+
)
|
|
882
1038
|
case LanguageModelName.LITELLM_DEEPSEEK_R1:
|
|
883
1039
|
return cls(
|
|
884
1040
|
name=model_name,
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
from unique_toolkit.tools.schemas import BaseToolConfig
|
|
2
|
-
|
|
3
1
|
from unique_toolkit.tools.config import get_configuration_dict
|
|
4
|
-
|
|
2
|
+
from unique_toolkit.tools.schemas import BaseToolConfig
|
|
5
3
|
|
|
6
4
|
DEFAULT_PARAM_DESCRIPTION_SUB_AGENT_USER_MESSAGE = """
|
|
7
5
|
This is the message that will be sent to the sub-agent.
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from logging import Logger
|
|
2
2
|
|
|
3
3
|
from unique_toolkit.app.schemas import ChatEvent
|
|
4
|
-
from unique_toolkit.tools.a2a.service import SubAgentTool, ToolProgressReporter
|
|
5
|
-
|
|
6
4
|
from unique_toolkit.tools.a2a.config import SubAgentToolConfig
|
|
5
|
+
from unique_toolkit.tools.a2a.service import SubAgentTool, ToolProgressReporter
|
|
7
6
|
from unique_toolkit.tools.config import ToolBuildConfig
|
|
8
7
|
from unique_toolkit.tools.schemas import BaseToolConfig
|
|
9
8
|
from unique_toolkit.tools.tool import Tool
|
|
@@ -1,25 +1,23 @@
|
|
|
1
1
|
from pydantic import Field, create_model
|
|
2
2
|
from unique_sdk.utils.chat_in_space import send_message_and_wait_for_completion
|
|
3
|
+
|
|
3
4
|
from unique_toolkit.app import ChatEvent
|
|
5
|
+
from unique_toolkit.evaluators.schemas import EvaluationMetricName
|
|
4
6
|
from unique_toolkit.language_model import (
|
|
5
7
|
LanguageModelFunction,
|
|
6
8
|
LanguageModelMessage,
|
|
7
9
|
LanguageModelToolDescription,
|
|
8
10
|
)
|
|
9
|
-
|
|
10
|
-
from unique_toolkit.evaluators.schemas import EvaluationMetricName
|
|
11
|
-
from unique_toolkit.tools.schemas import ToolCallResponse
|
|
12
11
|
from unique_toolkit.tools.a2a.config import SubAgentToolConfig
|
|
13
12
|
from unique_toolkit.tools.a2a.memory import (
|
|
14
13
|
get_sub_agent_short_term_memory_manager,
|
|
15
14
|
)
|
|
16
15
|
from unique_toolkit.tools.a2a.schema import (
|
|
17
16
|
SubAgentShortTermMemorySchema,
|
|
18
|
-
SubAgentToolCallResponse,
|
|
19
17
|
SubAgentToolInput,
|
|
20
18
|
)
|
|
19
|
+
from unique_toolkit.tools.schemas import ToolCallResponse
|
|
21
20
|
from unique_toolkit.tools.tool import Tool
|
|
22
|
-
|
|
23
21
|
from unique_toolkit.tools.tool_progress_reporter import (
|
|
24
22
|
ProgressState,
|
|
25
23
|
ToolProgressReporter,
|
|
@@ -148,7 +146,7 @@ class SubAgentTool(Tool[SubAgentToolConfig]):
|
|
|
148
146
|
tool_response: ToolCallResponse,
|
|
149
147
|
) -> LanguageModelMessage:
|
|
150
148
|
return ToolCallResponse(
|
|
151
|
-
id=
|
|
152
|
-
name=
|
|
153
|
-
content=
|
|
149
|
+
id=tool_response.id,
|
|
150
|
+
name=tool_response.name,
|
|
151
|
+
content=tool_response["content"],
|
|
154
152
|
)
|