unique_toolkit 0.8.31__py3-none-any.whl → 0.8.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +45 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +372 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +273 -0
- unique_toolkit/_common/feature_flags/schema.py +13 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/evals/context_relevancy/schema.py +2 -5
- unique_toolkit/evals/context_relevancy/service.py +42 -10
- unique_toolkit/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/tools/tool.py +5 -2
- {unique_toolkit-0.8.31.dist-info → unique_toolkit-0.8.33.dist-info}/METADATA +9 -1
- {unique_toolkit-0.8.31.dist-info → unique_toolkit-0.8.33.dist-info}/RECORD +15 -7
- {unique_toolkit-0.8.31.dist-info → unique_toolkit-0.8.33.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.8.31.dist-info → unique_toolkit-0.8.33.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
5
|
+
|
|
6
|
+
from unique_toolkit._common.default_language_model import DEFAULT_GPT_35_TURBO
|
|
7
|
+
from unique_toolkit._common.validators import LMI, get_LMI_default_field
|
|
8
|
+
from unique_toolkit.evals.context_relevancy.schema import StructuredOutputConfig
|
|
9
|
+
from unique_toolkit.tools.config import get_configuration_dict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ChunkRelevancySortConfig(BaseModel):
|
|
13
|
+
model_config = get_configuration_dict()
|
|
14
|
+
enabled: bool = Field(
|
|
15
|
+
default=False,
|
|
16
|
+
description="Whether to enable the chunk relevancy sort.",
|
|
17
|
+
)
|
|
18
|
+
relevancy_levels_to_consider: list[str] = Field(
|
|
19
|
+
default=["high", "medium", "low"],
|
|
20
|
+
description="The relevancy levels to consider.",
|
|
21
|
+
)
|
|
22
|
+
relevancy_level_order: dict[str, int] = Field(
|
|
23
|
+
default={"high": 0, "medium": 1, "low": 2},
|
|
24
|
+
description="The relevancy level order.",
|
|
25
|
+
)
|
|
26
|
+
language_model: LMI = get_LMI_default_field(
|
|
27
|
+
DEFAULT_GPT_35_TURBO,
|
|
28
|
+
description="The language model to use for the chunk relevancy sort.",
|
|
29
|
+
)
|
|
30
|
+
fallback_language_model: LMI = get_LMI_default_field(
|
|
31
|
+
DEFAULT_GPT_35_TURBO,
|
|
32
|
+
description="The language model to use as a fallback.",
|
|
33
|
+
)
|
|
34
|
+
additional_llm_options: dict[str, Any] = Field(
|
|
35
|
+
default={},
|
|
36
|
+
description="Additional options to pass to the language model.",
|
|
37
|
+
)
|
|
38
|
+
structured_output_config: StructuredOutputConfig = Field(
|
|
39
|
+
default_factory=StructuredOutputConfig,
|
|
40
|
+
description="The configuration for the structured output.",
|
|
41
|
+
)
|
|
42
|
+
max_tasks: int | SkipJsonSchema[None] = Field(
|
|
43
|
+
default=1000,
|
|
44
|
+
description="The maximum number of tasks to run in parallel.",
|
|
45
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
6
|
+
from unique_toolkit.evals.schemas import EvaluationMetricResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ChunkRelevancy(BaseModel):
|
|
10
|
+
chunk: ContentChunk
|
|
11
|
+
relevancy: EvaluationMetricResult | None = None
|
|
12
|
+
|
|
13
|
+
def get_document_name(self):
|
|
14
|
+
title = self.chunk.key or self.chunk.title or "Unkown"
|
|
15
|
+
return title.split(":")[0]
|
|
16
|
+
|
|
17
|
+
def get_page_number(self):
|
|
18
|
+
start_page = self.chunk.start_page
|
|
19
|
+
end_page = self.chunk.end_page
|
|
20
|
+
|
|
21
|
+
if start_page is None or end_page is None:
|
|
22
|
+
return start_page or end_page or "Unknown Page"
|
|
23
|
+
elif start_page == end_page:
|
|
24
|
+
return str(start_page)
|
|
25
|
+
else:
|
|
26
|
+
return f"{start_page}-{end_page}"
|
|
27
|
+
|
|
28
|
+
def get_facts(self):
|
|
29
|
+
if self.relevancy is None:
|
|
30
|
+
return []
|
|
31
|
+
return self.relevancy.fact_list
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ChunkRelevancySorterResult(BaseModel):
|
|
35
|
+
relevancies: list[ChunkRelevancy]
|
|
36
|
+
user_message: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def from_chunks(chunks: list[ContentChunk]):
|
|
40
|
+
return ChunkRelevancySorterResult(
|
|
41
|
+
relevancies=[ChunkRelevancy(chunk=chunk) for chunk in chunks],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def content_chunks(self):
|
|
46
|
+
return [chunk.chunk for chunk in self.relevancies]
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import Any, overload
|
|
5
|
+
|
|
6
|
+
from typing_extensions import deprecated
|
|
7
|
+
|
|
8
|
+
from unique_toolkit._common.chunk_relevancy_sorter.config import (
|
|
9
|
+
ChunkRelevancySortConfig,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit._common.chunk_relevancy_sorter.exception import (
|
|
12
|
+
ChunkRelevancySorterException,
|
|
13
|
+
)
|
|
14
|
+
from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
|
|
15
|
+
ChunkRelevancy,
|
|
16
|
+
ChunkRelevancySorterResult,
|
|
17
|
+
)
|
|
18
|
+
from unique_toolkit._common.validate_required_values import validate_required_values
|
|
19
|
+
from unique_toolkit.app.performance.async_tasks import run_async_tasks_parallel
|
|
20
|
+
from unique_toolkit.app.schemas import BaseEvent, ChatEvent
|
|
21
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
22
|
+
from unique_toolkit.evals.config import EvaluationMetricConfig
|
|
23
|
+
from unique_toolkit.evals.context_relevancy.schema import (
|
|
24
|
+
EvaluationSchemaStructuredOutput,
|
|
25
|
+
StructuredOutputConfig,
|
|
26
|
+
)
|
|
27
|
+
from unique_toolkit.evals.context_relevancy.service import ContextRelevancyEvaluator
|
|
28
|
+
from unique_toolkit.evals.exception import EvaluatorException
|
|
29
|
+
from unique_toolkit.evals.schemas import (
|
|
30
|
+
EvaluationMetricInput,
|
|
31
|
+
EvaluationMetricName,
|
|
32
|
+
EvaluationMetricResult,
|
|
33
|
+
)
|
|
34
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ChunkRelevancySorter:
|
|
38
|
+
@deprecated(
|
|
39
|
+
"Use __init__ with company_id and user_id instead or use the classmethod `from_event`"
|
|
40
|
+
)
|
|
41
|
+
@overload
|
|
42
|
+
def __init__(self, event: ChatEvent | BaseEvent):
|
|
43
|
+
"""
|
|
44
|
+
Initialize the ChunkRelevancySorter with an event (deprecated)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
@overload
|
|
48
|
+
def __init__(self, *, company_id: str, user_id: str):
|
|
49
|
+
"""
|
|
50
|
+
Initialize the ChunkRelevancySorter with a company_id and user_id
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
event: ChatEvent | BaseEvent | None = None,
|
|
56
|
+
company_id: str | None = None,
|
|
57
|
+
user_id: str | None = None,
|
|
58
|
+
):
|
|
59
|
+
if isinstance(event, (ChatEvent, BaseEvent)):
|
|
60
|
+
self.chunk_relevancy_evaluator = ContextRelevancyEvaluator.from_event(event)
|
|
61
|
+
else:
|
|
62
|
+
[company_id, user_id] = validate_required_values([company_id, user_id])
|
|
63
|
+
self.chunk_relevancy_evaluator = ContextRelevancyEvaluator(
|
|
64
|
+
company_id=company_id, user_id=user_id
|
|
65
|
+
)
|
|
66
|
+
module_name = "ChunkRelevancySorter"
|
|
67
|
+
self.logger = logging.getLogger(f"{module_name}.{__name__}")
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_event(cls, event: ChatEvent | BaseEvent):
|
|
71
|
+
return cls(company_id=event.company_id, user_id=event.user_id)
|
|
72
|
+
|
|
73
|
+
async def run(
|
|
74
|
+
self,
|
|
75
|
+
input_text: str,
|
|
76
|
+
chunks: list[ContentChunk],
|
|
77
|
+
config: ChunkRelevancySortConfig,
|
|
78
|
+
) -> ChunkRelevancySorterResult:
|
|
79
|
+
"""
|
|
80
|
+
Resorts the search chunks by classifying each chunk into High, Medium, Low depending on the relevancy to the user input, then
|
|
81
|
+
sorts the chunks based on the classification while preserving the orginial order.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
chunks (list[ContentChunk]): The list of search chunks to be reranked.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
ChunkRelevancySorterResult: The result of the chunk relevancy sort.
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
ChunkRelevancySorterException: If an error occurs while sorting the chunks.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
if not config.enabled:
|
|
94
|
+
self.logger.info("Chunk relevancy sort is disabled.")
|
|
95
|
+
return ChunkRelevancySorterResult.from_chunks(chunks)
|
|
96
|
+
|
|
97
|
+
self.logger.info("Running chunk relevancy sort.")
|
|
98
|
+
return await self._run_chunk_relevancy_sort(input_text, chunks, config)
|
|
99
|
+
|
|
100
|
+
async def _run_chunk_relevancy_sort(
|
|
101
|
+
self,
|
|
102
|
+
input_text: str,
|
|
103
|
+
chunks: list[ContentChunk],
|
|
104
|
+
config: ChunkRelevancySortConfig,
|
|
105
|
+
) -> ChunkRelevancySorterResult:
|
|
106
|
+
start_time = time.time()
|
|
107
|
+
|
|
108
|
+
resorted_relevancies = []
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
self.logger.info(f"Resorting {len(chunks)} chunks based on relevancy...")
|
|
112
|
+
chunk_relevancies = await self._evaluate_chunks_relevancy(
|
|
113
|
+
input_text,
|
|
114
|
+
chunks,
|
|
115
|
+
config,
|
|
116
|
+
)
|
|
117
|
+
resorted_relevancies = await self._validate_and_sort_relevant_chunks(
|
|
118
|
+
config,
|
|
119
|
+
chunk_relevancies,
|
|
120
|
+
)
|
|
121
|
+
except ChunkRelevancySorterException as e:
|
|
122
|
+
self.logger.error(e.error_message)
|
|
123
|
+
raise e
|
|
124
|
+
except Exception as e:
|
|
125
|
+
unknown_error_msg = "Unknown error occurred while resorting search results."
|
|
126
|
+
raise ChunkRelevancySorterException(
|
|
127
|
+
user_message=f"{unknown_error_msg}. Fallback to original search results.",
|
|
128
|
+
error_message=f"{unknown_error_msg}: {e}",
|
|
129
|
+
)
|
|
130
|
+
finally:
|
|
131
|
+
end_time = time.time()
|
|
132
|
+
duration = end_time - start_time
|
|
133
|
+
total_chunks = len(resorted_relevancies)
|
|
134
|
+
success_msg = f"Resorted {total_chunks} chunks in {duration:.2f} seconds."
|
|
135
|
+
self.logger.info(success_msg)
|
|
136
|
+
return ChunkRelevancySorterResult(
|
|
137
|
+
relevancies=resorted_relevancies,
|
|
138
|
+
user_message=success_msg,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
async def _evaluate_chunks_relevancy(
|
|
142
|
+
self,
|
|
143
|
+
input_text: str,
|
|
144
|
+
chunks: list[ContentChunk],
|
|
145
|
+
config: ChunkRelevancySortConfig,
|
|
146
|
+
) -> list[ChunkRelevancy]:
|
|
147
|
+
"""
|
|
148
|
+
Evaluates the relevancy of the chunks.
|
|
149
|
+
"""
|
|
150
|
+
self.logger.info(
|
|
151
|
+
f"Processing chunk relevancy for {len(chunks)} chunks with {config.language_model.name}. "
|
|
152
|
+
f"(Structured output: {config.structured_output_config.enabled}. Extract fact list: {config.structured_output_config.extract_fact_list})",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Evaluate the relevancy of each chunk
|
|
156
|
+
tasks = [
|
|
157
|
+
self._process_relevancy_evaluation(input_text, chunk=chunk, config=config)
|
|
158
|
+
for chunk in chunks
|
|
159
|
+
]
|
|
160
|
+
chunk_relevancies = await run_async_tasks_parallel(
|
|
161
|
+
tasks=tasks,
|
|
162
|
+
max_tasks=config.max_tasks,
|
|
163
|
+
logger=self.logger,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# handle exceptions
|
|
167
|
+
for chunk_relevancy in chunk_relevancies:
|
|
168
|
+
if isinstance(chunk_relevancy, Exception):
|
|
169
|
+
error_msg = "Error occurred while evaluating context relevancy of a specific chunk"
|
|
170
|
+
raise ChunkRelevancySorterException(
|
|
171
|
+
user_message=f"{error_msg}. Fallback to original search results.",
|
|
172
|
+
error_message=f"{error_msg}: {chunk_relevancy}",
|
|
173
|
+
exception=chunk_relevancy,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# This check is currently necessary for typing purposes only
|
|
177
|
+
# as the run_async_tasks_parallel function does not enforce the return type
|
|
178
|
+
# TODO fix return type in run_async_tasks_parallel
|
|
179
|
+
chunk_relevancies = [
|
|
180
|
+
chunk_relevancy
|
|
181
|
+
for chunk_relevancy in chunk_relevancies
|
|
182
|
+
if isinstance(chunk_relevancy, ChunkRelevancy)
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
return chunk_relevancies
|
|
186
|
+
|
|
187
|
+
async def _evaluate_chunk_relevancy(
|
|
188
|
+
self,
|
|
189
|
+
input_text: str,
|
|
190
|
+
langugage_model: LanguageModelInfo,
|
|
191
|
+
chunk: ContentChunk,
|
|
192
|
+
structured_output_config: StructuredOutputConfig,
|
|
193
|
+
additional_llm_options: dict[str, Any],
|
|
194
|
+
) -> EvaluationMetricResult | None:
|
|
195
|
+
"""
|
|
196
|
+
Gets the relevancy score of the chunk compared to the user message txt.
|
|
197
|
+
"""
|
|
198
|
+
structured_output_schema = (
|
|
199
|
+
(
|
|
200
|
+
EvaluationSchemaStructuredOutput.get_with_descriptions(
|
|
201
|
+
structured_output_config
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
if structured_output_config.enabled
|
|
205
|
+
else None
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
metric_config = EvaluationMetricConfig(
|
|
209
|
+
enabled=True,
|
|
210
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
211
|
+
language_model=langugage_model,
|
|
212
|
+
additional_llm_options=additional_llm_options,
|
|
213
|
+
)
|
|
214
|
+
relevancy_input = EvaluationMetricInput(
|
|
215
|
+
input_text=input_text,
|
|
216
|
+
context_texts=[chunk.text],
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return await self.chunk_relevancy_evaluator.analyze(
|
|
220
|
+
input=relevancy_input,
|
|
221
|
+
config=metric_config,
|
|
222
|
+
structured_output_schema=structured_output_schema,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
async def _process_relevancy_evaluation(
|
|
226
|
+
self,
|
|
227
|
+
input_text: str,
|
|
228
|
+
chunk: ContentChunk,
|
|
229
|
+
config: ChunkRelevancySortConfig,
|
|
230
|
+
):
|
|
231
|
+
model = config.language_model
|
|
232
|
+
fallback_model = config.fallback_language_model
|
|
233
|
+
try:
|
|
234
|
+
relevancy = await self._evaluate_chunk_relevancy(
|
|
235
|
+
input_text=input_text,
|
|
236
|
+
langugage_model=model,
|
|
237
|
+
chunk=chunk,
|
|
238
|
+
structured_output_config=config.structured_output_config,
|
|
239
|
+
additional_llm_options=config.additional_llm_options,
|
|
240
|
+
)
|
|
241
|
+
return ChunkRelevancy(
|
|
242
|
+
chunk=chunk,
|
|
243
|
+
relevancy=relevancy,
|
|
244
|
+
)
|
|
245
|
+
except EvaluatorException as e:
|
|
246
|
+
if e.exception:
|
|
247
|
+
self.logger.warning(
|
|
248
|
+
"Error evaluating chunk ID %s with model %s. Trying fallback model %s.",
|
|
249
|
+
chunk.chunk_id,
|
|
250
|
+
model,
|
|
251
|
+
e.exception,
|
|
252
|
+
)
|
|
253
|
+
relevancy = await self._evaluate_chunk_relevancy(
|
|
254
|
+
input_text=input_text,
|
|
255
|
+
langugage_model=fallback_model,
|
|
256
|
+
chunk=chunk,
|
|
257
|
+
structured_output_config=config.structured_output_config,
|
|
258
|
+
additional_llm_options=config.additional_llm_options,
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
raise e
|
|
262
|
+
except Exception as e:
|
|
263
|
+
raise ChunkRelevancySorterException(
|
|
264
|
+
user_message="Error occurred while evaluating context relevancy of a specific chunk.",
|
|
265
|
+
error_message=f"Error in _process_relevancy_evaluation: {e}",
|
|
266
|
+
exception=e,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
async def _validate_and_sort_relevant_chunks(
|
|
270
|
+
self,
|
|
271
|
+
config: ChunkRelevancySortConfig,
|
|
272
|
+
chunk_relevancies: list[ChunkRelevancy],
|
|
273
|
+
) -> list[ChunkRelevancy]:
|
|
274
|
+
"""
|
|
275
|
+
Checks for error or no value in chunk relevancy.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
# Check that all chunk relevancies have a relevancy level
|
|
279
|
+
await self._validate_chunk_relevancies(chunk_relevancies)
|
|
280
|
+
|
|
281
|
+
# Filter the chunks based on the relevancy levels to consider
|
|
282
|
+
chunk_relevancies = await self._filter_chunks_by_relevancy_levels(
|
|
283
|
+
config,
|
|
284
|
+
chunk_relevancies,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Sort the chunks based on the relevancy levels
|
|
288
|
+
sorted_chunks = await self._sort_chunk_relevancies_by_relevancy_and_chunk(
|
|
289
|
+
config,
|
|
290
|
+
chunk_relevancies,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
return sorted_chunks
|
|
294
|
+
|
|
295
|
+
async def _validate_chunk_relevancies(
|
|
296
|
+
self,
|
|
297
|
+
chunk_relevancies: list[ChunkRelevancy],
|
|
298
|
+
):
|
|
299
|
+
for chunk_relevancy in chunk_relevancies:
|
|
300
|
+
if not chunk_relevancy.relevancy or not chunk_relevancy.relevancy.value:
|
|
301
|
+
raise ChunkRelevancySorterException(
|
|
302
|
+
user_message="Error occurred while evaluating chunk relevancy.",
|
|
303
|
+
error_message=f"No relevancy level returned for chunk ID {chunk_relevancy.chunk.chunk_id}.",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
async def _sort_chunk_relevancies_by_relevancy_and_chunk(
|
|
307
|
+
self,
|
|
308
|
+
config: ChunkRelevancySortConfig,
|
|
309
|
+
chunk_relevancies: list[ChunkRelevancy],
|
|
310
|
+
):
|
|
311
|
+
# Define the custom sorting order for relevancy
|
|
312
|
+
relevancy_level_order = config.relevancy_level_order
|
|
313
|
+
|
|
314
|
+
# Create a dictionary to map the chunk chunkId to its position in the original order
|
|
315
|
+
chunk_order = {
|
|
316
|
+
relevancy.chunk.chunk_id: index
|
|
317
|
+
for index, relevancy in enumerate(chunk_relevancies)
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# Sort the chunk relevancies first by relevancy and then by original order within each relevancy level
|
|
321
|
+
sorted_chunk_relevancies = sorted(
|
|
322
|
+
chunk_relevancies,
|
|
323
|
+
key=lambda obj: (
|
|
324
|
+
relevancy_level_order[obj.relevancy.value.lower()], # type: ignore
|
|
325
|
+
chunk_order[obj.chunk.chunk_id],
|
|
326
|
+
),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Count and print the distinct values of relevancy
|
|
330
|
+
self._count_distinct_values(sorted_chunk_relevancies)
|
|
331
|
+
|
|
332
|
+
# Return only the chunk in the sorted order
|
|
333
|
+
return sorted_chunk_relevancies
|
|
334
|
+
|
|
335
|
+
async def _filter_chunks_by_relevancy_levels(
|
|
336
|
+
self,
|
|
337
|
+
config: ChunkRelevancySortConfig,
|
|
338
|
+
chunk_relevancies: list[ChunkRelevancy],
|
|
339
|
+
) -> list[ChunkRelevancy]:
|
|
340
|
+
levels_to_consider = [
|
|
341
|
+
relevancy_level.lower()
|
|
342
|
+
for relevancy_level in config.relevancy_levels_to_consider
|
|
343
|
+
]
|
|
344
|
+
if not levels_to_consider:
|
|
345
|
+
self.logger.warning("No relevancy levels defined, including all levels.")
|
|
346
|
+
return chunk_relevancies
|
|
347
|
+
|
|
348
|
+
self.logger.info(
|
|
349
|
+
"Filtering chunks by relevancy levels: %s.", levels_to_consider
|
|
350
|
+
)
|
|
351
|
+
return [
|
|
352
|
+
chunk_relevancy
|
|
353
|
+
for chunk_relevancy in chunk_relevancies
|
|
354
|
+
if chunk_relevancy.relevancy.value.lower() in levels_to_consider # type: ignore
|
|
355
|
+
]
|
|
356
|
+
|
|
357
|
+
def _count_distinct_values(self, chunk_relevancies: list[ChunkRelevancy]):
|
|
358
|
+
# Extract the values from the relevancy field
|
|
359
|
+
values = [
|
|
360
|
+
cr.relevancy.value
|
|
361
|
+
for cr in chunk_relevancies
|
|
362
|
+
if cr.relevancy and cr.relevancy.value
|
|
363
|
+
]
|
|
364
|
+
|
|
365
|
+
# Use Counter to count occurrences
|
|
366
|
+
value_counts = Counter(values)
|
|
367
|
+
|
|
368
|
+
self.logger.info("Count of distinct relevancy values:")
|
|
369
|
+
for value, count in value_counts.items():
|
|
370
|
+
self.logger.info(f"Relevancy: {value}, Count: {count}")
|
|
371
|
+
|
|
372
|
+
return value_counts
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from unique_toolkit._common.chunk_relevancy_sorter.config import (
|
|
6
|
+
ChunkRelevancySortConfig,
|
|
7
|
+
)
|
|
8
|
+
from unique_toolkit._common.chunk_relevancy_sorter.exception import (
|
|
9
|
+
ChunkRelevancySorterException,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
|
|
12
|
+
ChunkRelevancy,
|
|
13
|
+
ChunkRelevancySorterResult,
|
|
14
|
+
)
|
|
15
|
+
from unique_toolkit._common.chunk_relevancy_sorter.service import ChunkRelevancySorter
|
|
16
|
+
from unique_toolkit._common.default_language_model import (
|
|
17
|
+
DEFAULT_GPT_35_TURBO,
|
|
18
|
+
DEFAULT_GPT_4o,
|
|
19
|
+
)
|
|
20
|
+
from unique_toolkit.app.schemas import ChatEvent
|
|
21
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
22
|
+
from unique_toolkit.evals.context_relevancy.schema import StructuredOutputConfig
|
|
23
|
+
from unique_toolkit.evals.schemas import EvaluationMetricName, EvaluationMetricResult
|
|
24
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def event():
|
|
29
|
+
event = MagicMock(spec=ChatEvent)
|
|
30
|
+
event.payload = MagicMock()
|
|
31
|
+
event.payload.user_message = MagicMock()
|
|
32
|
+
event.payload.user_message.text = "Test query"
|
|
33
|
+
event.user_id = "user_0"
|
|
34
|
+
event.company_id = "company_0"
|
|
35
|
+
return event
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.fixture
|
|
39
|
+
def mock_chunks():
|
|
40
|
+
return [
|
|
41
|
+
ContentChunk(
|
|
42
|
+
id=f"chunk_{i}",
|
|
43
|
+
order=i,
|
|
44
|
+
chunk_id=f"chunk_{i}",
|
|
45
|
+
text=f"Test content {i}",
|
|
46
|
+
)
|
|
47
|
+
for i in range(3)
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.fixture
|
|
52
|
+
def config():
|
|
53
|
+
return ChunkRelevancySortConfig(
|
|
54
|
+
enabled=True,
|
|
55
|
+
relevancy_levels_to_consider=["high", "medium", "low"],
|
|
56
|
+
relevancy_level_order={"high": 0, "medium": 1, "low": 2},
|
|
57
|
+
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
|
|
58
|
+
fallback_language_model=LanguageModelInfo.from_name(DEFAULT_GPT_35_TURBO),
|
|
59
|
+
structured_output_config=StructuredOutputConfig(
|
|
60
|
+
enabled=False,
|
|
61
|
+
extract_fact_list=False,
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.fixture
|
|
67
|
+
def chunk_relevancy_sorter(event):
|
|
68
|
+
return ChunkRelevancySorter(event)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.mark.asyncio
|
|
72
|
+
async def test_run_disabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
73
|
+
config.enabled = False
|
|
74
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
75
|
+
|
|
76
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
77
|
+
assert result.content_chunks == mock_chunks
|
|
78
|
+
assert len(result.content_chunks) == len(mock_chunks)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.mark.asyncio
|
|
82
|
+
async def test_run_enabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
83
|
+
with patch.object(chunk_relevancy_sorter, "_run_chunk_relevancy_sort") as mock_sort:
|
|
84
|
+
mock_sort.return_value = ChunkRelevancySorterResult.from_chunks(mock_chunks)
|
|
85
|
+
|
|
86
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
87
|
+
|
|
88
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
89
|
+
assert result.content_chunks == mock_chunks
|
|
90
|
+
mock_sort.assert_called_once_with("test input", mock_chunks, config)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.mark.asyncio
|
|
94
|
+
async def test_evaluate_chunks_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
95
|
+
mock_relevancy = EvaluationMetricResult(
|
|
96
|
+
value="high",
|
|
97
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
98
|
+
reason="Test reason",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
with patch.object(
|
|
102
|
+
chunk_relevancy_sorter, "_process_relevancy_evaluation"
|
|
103
|
+
) as mock_process:
|
|
104
|
+
mock_process.return_value = ChunkRelevancy(
|
|
105
|
+
chunk=mock_chunks[0], relevancy=mock_relevancy
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
result = await chunk_relevancy_sorter._evaluate_chunks_relevancy(
|
|
109
|
+
"test input", mock_chunks, config
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
assert len(result) == len(mock_chunks)
|
|
113
|
+
assert all(isinstance(r, ChunkRelevancy) for r in result)
|
|
114
|
+
assert mock_process.call_count == len(mock_chunks)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@pytest.mark.asyncio
|
|
118
|
+
async def test_evaluate_chunk_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
119
|
+
with patch(
|
|
120
|
+
"unique_toolkit._common.chunk_relevancy_sorter.service.ContextRelevancyEvaluator.analyze"
|
|
121
|
+
) as mock_analyze:
|
|
122
|
+
mock_analyze.return_value = EvaluationMetricResult(
|
|
123
|
+
value="high",
|
|
124
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
125
|
+
reason="Test reason",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
result = await chunk_relevancy_sorter._evaluate_chunk_relevancy(
|
|
129
|
+
input_text="test input",
|
|
130
|
+
chunk=mock_chunks[0],
|
|
131
|
+
langugage_model=config.language_model,
|
|
132
|
+
structured_output_config=config.structured_output_config,
|
|
133
|
+
additional_llm_options=config.additional_llm_options,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
assert isinstance(result, EvaluationMetricResult)
|
|
137
|
+
assert result.value == "high"
|
|
138
|
+
mock_analyze.assert_called_once()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@pytest.mark.asyncio
|
|
142
|
+
async def test_process_relevancy_evaluation_success(
|
|
143
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
144
|
+
):
|
|
145
|
+
with patch.object(
|
|
146
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
147
|
+
) as mock_evaluate:
|
|
148
|
+
mock_evaluate.return_value = EvaluationMetricResult(
|
|
149
|
+
value="high",
|
|
150
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
151
|
+
reason="Test reason",
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
result = await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
155
|
+
"test input", mock_chunks[0], config
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
assert isinstance(result, ChunkRelevancy)
|
|
159
|
+
assert result.chunk == mock_chunks[0]
|
|
160
|
+
assert result.relevancy is not None
|
|
161
|
+
assert result.relevancy.value == "high"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pytest.mark.asyncio
|
|
165
|
+
async def test_process_relevancy_evaluation_fallback(
|
|
166
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
167
|
+
):
|
|
168
|
+
with patch.object(
|
|
169
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
170
|
+
) as mock_evaluate:
|
|
171
|
+
# First call raises exception, second call succeeds
|
|
172
|
+
mock_evaluate.side_effect = [
|
|
173
|
+
Exception("Test error"),
|
|
174
|
+
EvaluationMetricResult(
|
|
175
|
+
value="medium",
|
|
176
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
177
|
+
reason="Test reason",
|
|
178
|
+
),
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
182
|
+
await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
183
|
+
"test input", mock_chunks[0], config
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@pytest.mark.asyncio
|
|
188
|
+
async def test_validate_and_sort_relevant_chunks(
|
|
189
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
190
|
+
):
|
|
191
|
+
chunk_relevancies = [
|
|
192
|
+
ChunkRelevancy(
|
|
193
|
+
chunk=mock_chunks[0],
|
|
194
|
+
relevancy=EvaluationMetricResult(
|
|
195
|
+
value="low",
|
|
196
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
197
|
+
reason="Test reason",
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
]
|
|
201
|
+
chunk_relevancies.append(
|
|
202
|
+
ChunkRelevancy(
|
|
203
|
+
chunk=mock_chunks[1],
|
|
204
|
+
relevancy=EvaluationMetricResult(
|
|
205
|
+
value="medium",
|
|
206
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
207
|
+
reason="Test reason",
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
chunk_relevancies.append(
|
|
212
|
+
ChunkRelevancy(
|
|
213
|
+
chunk=mock_chunks[2],
|
|
214
|
+
relevancy=EvaluationMetricResult(
|
|
215
|
+
value="high",
|
|
216
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
217
|
+
reason="Test reason",
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
result = await chunk_relevancy_sorter._validate_and_sort_relevant_chunks(
|
|
223
|
+
config, chunk_relevancies
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
assert isinstance(result, list)
|
|
227
|
+
assert len(result) == len(mock_chunks)
|
|
228
|
+
assert all(isinstance(relevancy.chunk, ContentChunk) for relevancy in result)
|
|
229
|
+
assert result[0].chunk == mock_chunks[2]
|
|
230
|
+
assert result[1].chunk == mock_chunks[1]
|
|
231
|
+
assert result[2].chunk == mock_chunks[0]
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@pytest.mark.asyncio
|
|
235
|
+
async def test_validate_chunk_relevancies_invalid(chunk_relevancy_sorter):
|
|
236
|
+
invalid_relevancies = [
|
|
237
|
+
ChunkRelevancy(
|
|
238
|
+
chunk=ContentChunk(chunk_id="test", text="test", id="test", order=0),
|
|
239
|
+
relevancy=None,
|
|
240
|
+
)
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
244
|
+
await chunk_relevancy_sorter._validate_chunk_relevancies(invalid_relevancies)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_count_distinct_values(chunk_relevancy_sorter, mock_chunks):
|
|
248
|
+
chunk_relevancies = [
|
|
249
|
+
ChunkRelevancy(
|
|
250
|
+
chunk=chunk,
|
|
251
|
+
relevancy=EvaluationMetricResult(
|
|
252
|
+
value="high",
|
|
253
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
254
|
+
reason="Test reason",
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
for chunk in mock_chunks[:2]
|
|
258
|
+
]
|
|
259
|
+
chunk_relevancies.append(
|
|
260
|
+
ChunkRelevancy(
|
|
261
|
+
chunk=mock_chunks[2],
|
|
262
|
+
relevancy=EvaluationMetricResult(
|
|
263
|
+
value="medium",
|
|
264
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
265
|
+
reason="Test reason",
|
|
266
|
+
),
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
value_counts = chunk_relevancy_sorter._count_distinct_values(chunk_relevancies)
|
|
271
|
+
|
|
272
|
+
assert value_counts["high"] == 2
|
|
273
|
+
assert value_counts["medium"] == 1
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.tools.config import get_configuration_dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FeatureExtendedSourceSerialization(BaseModel):
|
|
7
|
+
"""Mixin for experimental feature in Source serialization"""
|
|
8
|
+
|
|
9
|
+
model_config = get_configuration_dict()
|
|
10
|
+
full_sources_serialize_dump: bool = Field(
|
|
11
|
+
default=False,
|
|
12
|
+
description="Whether to include the full source object in the tool response. If True, includes the full Source object. If False, uses the old format with only source_number and content.",
|
|
13
|
+
)
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
from pydantic import BaseModel,
|
|
1
|
+
from pydantic import BaseModel, Field, create_model
|
|
2
2
|
from pydantic.json_schema import SkipJsonSchema
|
|
3
3
|
|
|
4
|
+
from unique_toolkit._common.utils.structured_output.schema import StructuredOutputModel
|
|
4
5
|
from unique_toolkit.tools.config import get_configuration_dict
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class StructuredOutputModel(BaseModel):
|
|
8
|
-
model_config = ConfigDict(extra="forbid")
|
|
9
|
-
|
|
10
|
-
|
|
11
8
|
class StructuredOutputConfig(BaseModel):
|
|
12
9
|
model_config = get_configuration_dict()
|
|
13
10
|
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import overload
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel, ValidationError
|
|
5
|
+
from typing_extensions import deprecated
|
|
4
6
|
|
|
5
|
-
from unique_toolkit.
|
|
6
|
-
from unique_toolkit.
|
|
7
|
+
from unique_toolkit._common.default_language_model import DEFAULT_GPT_35_TURBO
|
|
8
|
+
from unique_toolkit._common.validate_required_values import (
|
|
9
|
+
validate_required_values,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit.app.schemas import BaseEvent, ChatEvent
|
|
7
12
|
from unique_toolkit.evals.config import EvaluationMetricConfig
|
|
8
13
|
from unique_toolkit.evals.context_relevancy.schema import (
|
|
9
14
|
EvaluationSchemaStructuredOutput,
|
|
@@ -21,7 +26,6 @@ from unique_toolkit.evals.schemas import (
|
|
|
21
26
|
)
|
|
22
27
|
from unique_toolkit.language_model.infos import (
|
|
23
28
|
LanguageModelInfo,
|
|
24
|
-
LanguageModelName,
|
|
25
29
|
ModelCapabilities,
|
|
26
30
|
)
|
|
27
31
|
from unique_toolkit.language_model.prompt import Prompt
|
|
@@ -45,9 +49,7 @@ USER_MSG_KEY = "userPrompt"
|
|
|
45
49
|
default_config = EvaluationMetricConfig(
|
|
46
50
|
enabled=False,
|
|
47
51
|
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
48
|
-
language_model=LanguageModelInfo.from_name(
|
|
49
|
-
LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
50
|
-
),
|
|
52
|
+
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_35_TURBO),
|
|
51
53
|
custom_prompts={
|
|
52
54
|
SYSTEM_MSG_KEY: CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
|
|
53
55
|
USER_MSG_KEY: CONTEXT_RELEVANCY_METRIC_USER_MSG,
|
|
@@ -61,13 +63,42 @@ relevancy_required_input_fields = [
|
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
class ContextRelevancyEvaluator:
|
|
66
|
+
@deprecated(
|
|
67
|
+
"Use __init__ with company_id and user_id instead or use the classmethod `from_event`"
|
|
68
|
+
)
|
|
69
|
+
@overload
|
|
70
|
+
def __init__(self, event: ChatEvent | BaseEvent):
|
|
71
|
+
"""
|
|
72
|
+
Initialize the ContextRelevancyEvaluator with an event (deprecated)
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@overload
|
|
76
|
+
def __init__(self, *, company_id: str, user_id: str):
|
|
77
|
+
"""
|
|
78
|
+
Initialize the ContextRelevancyEvaluator with a company_id and user_id
|
|
79
|
+
"""
|
|
80
|
+
|
|
64
81
|
def __init__(
|
|
65
82
|
self,
|
|
66
|
-
event: ChatEvent,
|
|
83
|
+
event: ChatEvent | BaseEvent | None = None,
|
|
84
|
+
company_id: str | None = None,
|
|
85
|
+
user_id: str | None = None,
|
|
67
86
|
):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
87
|
+
if isinstance(event, (ChatEvent, BaseEvent)):
|
|
88
|
+
self.language_model_service = LanguageModelService.from_event(event)
|
|
89
|
+
else:
|
|
90
|
+
[company_id, user_id] = validate_required_values([company_id, user_id])
|
|
91
|
+
self.language_model_service = LanguageModelService(
|
|
92
|
+
company_id=company_id, user_id=user_id
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Setup the logger
|
|
96
|
+
module_name = "ContextRelevancyEvaluator"
|
|
97
|
+
self.logger = logging.getLogger(f"{module_name}.{__name__}")
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_event(cls, event: ChatEvent | BaseEvent):
|
|
101
|
+
return cls(company_id=event.company_id, user_id=event.user_id)
|
|
71
102
|
|
|
72
103
|
async def analyze(
|
|
73
104
|
self,
|
|
@@ -95,6 +126,7 @@ class ContextRelevancyEvaluator:
|
|
|
95
126
|
|
|
96
127
|
input.validate_required_fields(relevancy_required_input_fields)
|
|
97
128
|
|
|
129
|
+
# TODO: Was already there in monorepo
|
|
98
130
|
if len(input.context_texts) == 0: # type: ignore
|
|
99
131
|
error_message = "No context texts provided."
|
|
100
132
|
raise EvaluatorException(
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing_extensions import deprecated
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.content.schemas import ContentChunk, ContentReference
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@deprecated("do not use this its only used in old tools")
|
|
7
|
+
class AgentChunksHandler:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self._tool_chunks = {}
|
|
10
|
+
self._chunks: list[ContentChunk] = []
|
|
11
|
+
self._references: list[list[ContentReference]] = []
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def chunks(self) -> list[ContentChunk]:
|
|
15
|
+
return self._chunks
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def tool_chunks(self) -> dict:
|
|
19
|
+
return self._tool_chunks
|
|
20
|
+
|
|
21
|
+
def extend(self, chunks: list[ContentChunk]):
|
|
22
|
+
self._chunks.extend(chunks)
|
|
23
|
+
|
|
24
|
+
def replace(self, chunks: list[ContentChunk]):
|
|
25
|
+
self._chunks = chunks
|
|
26
|
+
|
|
27
|
+
def add_references(
|
|
28
|
+
self,
|
|
29
|
+
references: list[ContentReference],
|
|
30
|
+
):
|
|
31
|
+
self._references.append(references)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def all_references(
|
|
35
|
+
self,
|
|
36
|
+
) -> list[list[ContentReference]]:
|
|
37
|
+
return self._references
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def latest_references(
|
|
41
|
+
self,
|
|
42
|
+
) -> list[ContentReference]:
|
|
43
|
+
if not self._references:
|
|
44
|
+
return []
|
|
45
|
+
return self._references[-1]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def latest_referenced_chunks(self) -> list[ContentChunk]:
|
|
49
|
+
if not self._references:
|
|
50
|
+
return []
|
|
51
|
+
return self._get_referenced_chunks_from_references(self._references[-1])
|
|
52
|
+
|
|
53
|
+
def _get_referenced_chunks_from_references(
|
|
54
|
+
self,
|
|
55
|
+
references: list[ContentReference],
|
|
56
|
+
) -> list[ContentChunk]:
|
|
57
|
+
"""
|
|
58
|
+
Get _referenced_chunks by matching sourceId from _references with merged id and chunk_id from _chunks.
|
|
59
|
+
"""
|
|
60
|
+
referenced_chunks: list[ContentChunk] = []
|
|
61
|
+
for ref in references:
|
|
62
|
+
for chunk in self._chunks:
|
|
63
|
+
if ref.source_id == str(chunk.id) + "_" + str(chunk.chunk_id):
|
|
64
|
+
referenced_chunks.append(chunk)
|
|
65
|
+
return referenced_chunks
|
unique_toolkit/tools/tool.py
CHANGED
|
@@ -15,12 +15,15 @@ from unique_toolkit.language_model.schemas import (
|
|
|
15
15
|
LanguageModelMessage,
|
|
16
16
|
)
|
|
17
17
|
from unique_toolkit.language_model.service import LanguageModelService
|
|
18
|
+
from unique_toolkit.tools.agent_chunks_hanlder import AgentChunksHandler
|
|
18
19
|
from unique_toolkit.tools.config import ToolBuildConfig, ToolSelectionPolicy
|
|
19
20
|
from unique_toolkit.tools.schemas import BaseToolConfig, ToolCallResponse, ToolPrompts
|
|
20
21
|
from unique_toolkit.tools.tool_progress_reporter import ToolProgressReporter
|
|
21
22
|
|
|
22
23
|
ConfigType = TypeVar("ConfigType", bound=BaseToolConfig)
|
|
23
24
|
|
|
25
|
+
ToolBuildConfig.model_rebuild()
|
|
26
|
+
|
|
24
27
|
|
|
25
28
|
class Tool(ABC, Generic[ConfigType]):
|
|
26
29
|
name: str
|
|
@@ -80,6 +83,7 @@ class Tool(ABC, Generic[ConfigType]):
|
|
|
80
83
|
def get_tool_call_result_for_loop_history(
|
|
81
84
|
self,
|
|
82
85
|
tool_response: ToolCallResponse,
|
|
86
|
+
agent_chunks_handler: AgentChunksHandler,
|
|
83
87
|
) -> LanguageModelMessage:
|
|
84
88
|
raise NotImplementedError
|
|
85
89
|
|
|
@@ -150,8 +154,7 @@ class Tool(ABC, Generic[ConfigType]):
|
|
|
150
154
|
):
|
|
151
155
|
self.settings = ToolBuildConfig(
|
|
152
156
|
name=self.name,
|
|
153
|
-
configuration=config,
|
|
154
|
-
# the ToolBuildConfig has a wrong type in it to be fixed later.
|
|
157
|
+
configuration=config,
|
|
155
158
|
)
|
|
156
159
|
|
|
157
160
|
self.config = config
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unique_toolkit
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.33
|
|
4
4
|
Summary:
|
|
5
5
|
License: Proprietary
|
|
6
6
|
Author: Martin Fadler
|
|
@@ -116,6 +116,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
|
116
116
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
117
117
|
|
|
118
118
|
|
|
119
|
+
## [0.8.33] - 2025-08-31
|
|
120
|
+
|
|
121
|
+
fixed tool for `web_search`
|
|
122
|
+
|
|
123
|
+
## [0.8.32] - 2025-08-30
|
|
124
|
+
|
|
125
|
+
moved over general packages for `web_search`
|
|
126
|
+
|
|
119
127
|
## [0.8.31] - 2025-08-29
|
|
120
128
|
- Add various openai models to supported model list
|
|
121
129
|
- o1
|
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
unique_toolkit/__init__.py,sha256=nbOYPIKERt-ITsgifrnJhatn1YNR38Ntumw-dCn_tsA,714
|
|
2
2
|
unique_toolkit/_common/_base_service.py,sha256=S8H0rAebx7GsOldA7xInLp3aQJt9yEPDQdsGSFRJsGg,276
|
|
3
3
|
unique_toolkit/_common/_time_utils.py,sha256=ztmTovTvr-3w71Ns2VwXC65OKUUh-sQlzbHdKTQWm-w,135
|
|
4
|
+
unique_toolkit/_common/chunk_relevancy_sorter/config.py,sha256=v6Ljo-WIZCtYJgfaPfpzZegCV0DEw_nNhTzNtw0Jg7c,1744
|
|
5
|
+
unique_toolkit/_common/chunk_relevancy_sorter/exception.py,sha256=1mY4zjbvnXsd5oIxwiVsma09bS2XRnHrxW8KJBGtgCM,126
|
|
6
|
+
unique_toolkit/_common/chunk_relevancy_sorter/schemas.py,sha256=doAWPPx8d0zIqHMXmnJy47Z5_NlblJBhMqo8KE7fyyc,1329
|
|
7
|
+
unique_toolkit/_common/chunk_relevancy_sorter/service.py,sha256=UxYn4xJMNEXQ1afMiT2sMwXgdmlFNPaglVhx6CRRtiM,13864
|
|
8
|
+
unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py,sha256=JRvLtJXPmz6bm1tFSSqt01HHVeanSD8zk70XVLJHOEM,8878
|
|
4
9
|
unique_toolkit/_common/default_language_model.py,sha256=tmHSqg6e8G7RmKqmdE_tmLxkSN0x-aGoyUdy6Pl2oAE,334
|
|
5
10
|
unique_toolkit/_common/endpoint_builder.py,sha256=oM6uDmxUqTAJut6MuJQj3bIX4yOccyErWD5bJ1d1lcY,4526
|
|
6
11
|
unique_toolkit/_common/exception.py,sha256=caQIE1btsQnpKCHqL2cgWUSbHup06enQu_Pt7uGUTTE,727
|
|
12
|
+
unique_toolkit/_common/feature_flags/schema.py,sha256=3JpTuld8kK-UQ5B0sbYTu0yqhyFPnChXG2Iv4BNqHdg,539
|
|
7
13
|
unique_toolkit/_common/token/image_token_counting.py,sha256=VpFfZyY0GIH27q_Wy4YNjk2algqvbCtJyzuuROoFQPw,2189
|
|
8
14
|
unique_toolkit/_common/token/token_counting.py,sha256=Jo5B11GLlnaZDi4u7xCzIboLl8zn5cY_dmrogHixVdk,6132
|
|
15
|
+
unique_toolkit/_common/utils/structured_output/schema.py,sha256=Tp7kDYcmKtnUhcuRkH86TSYhylRff0ZZJYb2dLkISts,131
|
|
9
16
|
unique_toolkit/_common/validate_required_values.py,sha256=Y_M1ub9gIKP9qZ45F6Zq3ZHtuIqhmOjl8Z2Vd3avg8w,588
|
|
10
17
|
unique_toolkit/_common/validators.py,sha256=aZwbMho7XszN7lT5RtemaiXgC0WJ4u40oeVgsNGhF4U,2803
|
|
11
18
|
unique_toolkit/app/__init__.py,sha256=ETxYDpEizg_PKmi4JPX_P76ySq-us-xypfAIdKQ1QZU,1284
|
|
@@ -39,8 +46,8 @@ unique_toolkit/embedding/service.py,sha256=2KjYlUKxeh-Je8S1mOPiQ735pqAmdbeztycVN
|
|
|
39
46
|
unique_toolkit/embedding/utils.py,sha256=v86lo__bCJbxZBQ3OcLu5SuwT6NbFfWlcq8iyk6BuzQ,279
|
|
40
47
|
unique_toolkit/evals/config.py,sha256=ywHIrJs5SFdKr1WXfrofWuFfzb0iPQw8iZDpq5oEug4,953
|
|
41
48
|
unique_toolkit/evals/context_relevancy/prompts.py,sha256=EdHFUOB581yVxcOL8482KUv_LzaRjuiem71EF8udYMc,1331
|
|
42
|
-
unique_toolkit/evals/context_relevancy/schema.py,sha256=
|
|
43
|
-
unique_toolkit/evals/context_relevancy/service.py,sha256=
|
|
49
|
+
unique_toolkit/evals/context_relevancy/schema.py,sha256=ILA0ClTBFJbtZavkVIrqPz2vj6oql8U50e0G4bh8jEI,2911
|
|
50
|
+
unique_toolkit/evals/context_relevancy/service.py,sha256=xoe0ezA4xy-KaPjSO22xhiqo_WKMuYjlYFKEYJWl25A,9601
|
|
44
51
|
unique_toolkit/evals/evaluation_manager.py,sha256=g-8qa_6_p53C9Okx8iNkuoIXYSJrf-6sQ-xku7bo9kI,7895
|
|
45
52
|
unique_toolkit/evals/exception.py,sha256=7lcVbCyoN4Md1chNJDFxpUYyWbVrcr9dcc3TxWykJTc,115
|
|
46
53
|
unique_toolkit/evals/hallucination/constants.py,sha256=FLcXl5XU07jCvS8YPX9l6UjTaqyQ8YvnSKpx4Z6wZ2Y,1997
|
|
@@ -105,6 +112,7 @@ unique_toolkit/tools/a2a/manager.py,sha256=-Vt0pL886zMaaG4sGQhXmbwOhOL928WbZzm4F
|
|
|
105
112
|
unique_toolkit/tools/a2a/memory.py,sha256=F18kUA3m3NqoKdKAJSwDv8JQneHvZTGOkcZTLNMXAYs,1004
|
|
106
113
|
unique_toolkit/tools/a2a/schema.py,sha256=1R7qIu2l2qnUJDGRuUnZLqPPkHnT3x4d3d4PALoBzcY,296
|
|
107
114
|
unique_toolkit/tools/a2a/service.py,sha256=8yiG2zYqHva4fQJX4dfnE9Bm4vDnbK0k6YRyCSQy9RQ,5147
|
|
115
|
+
unique_toolkit/tools/agent_chunks_hanlder.py,sha256=x32Dp1Z8cVW5i-XzXbaMwX2KHPcNGmqEU-FB4AV9ZGo,1909
|
|
108
116
|
unique_toolkit/tools/config.py,sha256=nYwglwUSb3fxhqYxI83k-qoecuF5Zcol5FMOxHvTNeE,3827
|
|
109
117
|
unique_toolkit/tools/factory.py,sha256=w3uNHuYBIJ330Xi8PTdAkr8G3OMbQH2cBgvk5UT16oE,1253
|
|
110
118
|
unique_toolkit/tools/mcp/__init__.py,sha256=RLF_p-LDRC7GhiB3fdCi4u3bh6V9PY_w26fg61BLyco,122
|
|
@@ -114,14 +122,14 @@ unique_toolkit/tools/mcp/tool_wrapper.py,sha256=w7Fbo4FSMYvtgSq7Sqt1dmAPvqHjoBQS
|
|
|
114
122
|
unique_toolkit/tools/schemas.py,sha256=rArQccbfIv7CWcozClAZ-BVlOwAsjpgL8KUab_WeO3k,4817
|
|
115
123
|
unique_toolkit/tools/test/test_mcp_manager.py,sha256=dySiytBUfRjTOzwY_oGKi_jT6BNVgzZuh1du-EvbcJ4,15627
|
|
116
124
|
unique_toolkit/tools/test/test_tool_progress_reporter.py,sha256=GTtmBqOUo0-4fh_q0lRgxDhwKeankc3FHFD5ULZAm4Y,6299
|
|
117
|
-
unique_toolkit/tools/tool.py,sha256=
|
|
125
|
+
unique_toolkit/tools/tool.py,sha256=6kw8oadYfFlV0LEPDTHWiz0l5q_8gM-hLpLjmUn_VNs,5943
|
|
118
126
|
unique_toolkit/tools/tool_manager.py,sha256=uW1uf8mYvbnBN_TVa4L79p_8Sf9HyY4V_nXZqrh3fPM,10206
|
|
119
127
|
unique_toolkit/tools/tool_progress_reporter.py,sha256=ixud9VoHey1vlU1t86cW0-WTvyTwMxNSWBon8I11SUk,7955
|
|
120
128
|
unique_toolkit/tools/utils/execution/execution.py,sha256=vjG2Y6awsGNtlvyQAGCTthQ5thWHYnn-vzZXaYLb3QE,7922
|
|
121
129
|
unique_toolkit/tools/utils/source_handling/schema.py,sha256=vzAyf6ZWNexjMO0OrnB8y2glGkvAilmGGQXd6zcDaKw,870
|
|
122
130
|
unique_toolkit/tools/utils/source_handling/source_formatting.py,sha256=C7uayNbdkNVJdEARA5CENnHtNY1SU6etlaqbgHNyxaQ,9152
|
|
123
131
|
unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py,sha256=oM5ZxEgzROrnX1229KViCAFjRxl9wCTzWZoinYSHleM,6979
|
|
124
|
-
unique_toolkit-0.8.
|
|
125
|
-
unique_toolkit-0.8.
|
|
126
|
-
unique_toolkit-0.8.
|
|
127
|
-
unique_toolkit-0.8.
|
|
132
|
+
unique_toolkit-0.8.33.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
|
|
133
|
+
unique_toolkit-0.8.33.dist-info/METADATA,sha256=6ohnYw35vWjXiiVkqnq3oF0QVINLnuF1GYGZzntACf4,29666
|
|
134
|
+
unique_toolkit-0.8.33.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
135
|
+
unique_toolkit-0.8.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|