unique_toolkit 0.5.54__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/_common/validate_required_values.py +21 -0
- unique_toolkit/app/__init__.py +20 -0
- unique_toolkit/app/schemas.py +73 -7
- unique_toolkit/chat/__init__.py +5 -4
- unique_toolkit/chat/constants.py +3 -0
- unique_toolkit/chat/functions.py +661 -0
- unique_toolkit/chat/schemas.py +11 -11
- unique_toolkit/chat/service.py +273 -430
- unique_toolkit/content/__init__.py +1 -0
- unique_toolkit/content/constants.py +2 -0
- unique_toolkit/content/functions.py +475 -0
- unique_toolkit/content/service.py +163 -300
- unique_toolkit/content/utils.py +32 -0
- unique_toolkit/embedding/__init__.py +3 -0
- unique_toolkit/embedding/constants.py +2 -0
- unique_toolkit/embedding/functions.py +79 -0
- unique_toolkit/embedding/service.py +47 -34
- unique_toolkit/evaluators/__init__.py +1 -0
- unique_toolkit/evaluators/constants.py +1 -0
- unique_toolkit/evaluators/context_relevancy/constants.py +3 -3
- unique_toolkit/evaluators/context_relevancy/utils.py +5 -2
- unique_toolkit/evaluators/hallucination/utils.py +2 -1
- unique_toolkit/language_model/__init__.py +1 -0
- unique_toolkit/language_model/constants.py +4 -0
- unique_toolkit/language_model/functions.py +362 -0
- unique_toolkit/language_model/service.py +246 -293
- unique_toolkit/short_term_memory/__init__.py +5 -0
- unique_toolkit/short_term_memory/constants.py +1 -0
- unique_toolkit/short_term_memory/functions.py +175 -0
- unique_toolkit/short_term_memory/service.py +153 -27
- {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/METADATA +36 -7
- unique_toolkit-0.6.0.dist-info/RECORD +64 -0
- unique_toolkit-0.5.54.dist-info/RECORD +0 -50
- {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,475 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import tempfile
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import requests
|
8
|
+
import unique_sdk
|
9
|
+
|
10
|
+
from unique_toolkit.content import DOMAIN_NAME
|
11
|
+
from unique_toolkit.content.constants import DEFAULT_SEARCH_LANGUAGE
|
12
|
+
from unique_toolkit.content.schemas import (
|
13
|
+
Content,
|
14
|
+
ContentChunk,
|
15
|
+
ContentRerankerConfig,
|
16
|
+
ContentSearchType,
|
17
|
+
)
|
18
|
+
from unique_toolkit.content.utils import map_contents, map_to_content_chunks
|
19
|
+
|
20
|
+
logger = logging.getLogger(f"toolkit.{DOMAIN_NAME}.{__name__}")
|
21
|
+
|
22
|
+
|
23
|
+
def search_content_chunks(
|
24
|
+
user_id: str,
|
25
|
+
company_id: str,
|
26
|
+
chat_id: str,
|
27
|
+
search_string: str,
|
28
|
+
search_type: ContentSearchType,
|
29
|
+
limit: int,
|
30
|
+
search_language: str = DEFAULT_SEARCH_LANGUAGE,
|
31
|
+
reranker_config: ContentRerankerConfig | None = None,
|
32
|
+
scope_ids: list[str] | None = None,
|
33
|
+
chat_only: bool | None = None,
|
34
|
+
metadata_filter: dict | None = None,
|
35
|
+
content_ids: list[str] | None = None,
|
36
|
+
) -> list[ContentChunk]:
|
37
|
+
"""
|
38
|
+
Performs a synchronous search for content chunks in the knowledge base.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
search_string (str): The search string.
|
42
|
+
search_type (ContentSearchType): The type of search to perform.
|
43
|
+
limit (int): The maximum number of results to return.
|
44
|
+
search_language (str): The language for the full-text search. Defaults to "english".
|
45
|
+
reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
|
46
|
+
scope_ids (list[str] | None): The scope IDs. Defaults to None.
|
47
|
+
chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
|
48
|
+
metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
|
49
|
+
content_ids (list[str] | None): The content IDs to search. Defaults to None.
|
50
|
+
Returns:
|
51
|
+
list[ContentChunk]: The search results.
|
52
|
+
"""
|
53
|
+
if not scope_ids:
|
54
|
+
logger.warning("No scope IDs provided for search.")
|
55
|
+
|
56
|
+
if content_ids:
|
57
|
+
logger.info(f"Searching for content chunks with content_ids: {content_ids}")
|
58
|
+
|
59
|
+
try:
|
60
|
+
searches = unique_sdk.Search.create(
|
61
|
+
user_id=user_id,
|
62
|
+
company_id=company_id,
|
63
|
+
chatId=chat_id,
|
64
|
+
searchString=search_string,
|
65
|
+
searchType=search_type.name,
|
66
|
+
scopeIds=scope_ids,
|
67
|
+
limit=limit,
|
68
|
+
reranker=(
|
69
|
+
reranker_config.model_dump(by_alias=True) if reranker_config else None
|
70
|
+
),
|
71
|
+
language=search_language,
|
72
|
+
chatOnly=chat_only,
|
73
|
+
metaDataFilter=metadata_filter,
|
74
|
+
contentIds=content_ids,
|
75
|
+
)
|
76
|
+
return map_to_content_chunks(searches)
|
77
|
+
except Exception as e:
|
78
|
+
logger.error(f"Error while searching content chunks: {e}")
|
79
|
+
raise e
|
80
|
+
|
81
|
+
|
82
|
+
async def search_content_chunks_async(
|
83
|
+
user_id: str,
|
84
|
+
company_id: str,
|
85
|
+
chat_id: str,
|
86
|
+
search_string: str,
|
87
|
+
search_type: ContentSearchType,
|
88
|
+
limit: int,
|
89
|
+
search_language: str = DEFAULT_SEARCH_LANGUAGE,
|
90
|
+
reranker_config: ContentRerankerConfig | None = None,
|
91
|
+
scope_ids: list[str] | None = None,
|
92
|
+
chat_only: bool | None = None,
|
93
|
+
metadata_filter: dict | None = None,
|
94
|
+
content_ids: list[str] | None = None,
|
95
|
+
):
|
96
|
+
"""
|
97
|
+
Performs an asynchronous search for content chunks in the knowledge base.
|
98
|
+
"""
|
99
|
+
if not scope_ids:
|
100
|
+
logger.warning("No scope IDs provided for search.")
|
101
|
+
|
102
|
+
if content_ids:
|
103
|
+
logger.info(
|
104
|
+
f"Searching for content chunks asynchronously with content_ids: {content_ids}"
|
105
|
+
)
|
106
|
+
|
107
|
+
try:
|
108
|
+
searches = await unique_sdk.Search.create_async(
|
109
|
+
user_id=user_id,
|
110
|
+
company_id=company_id,
|
111
|
+
chatId=chat_id,
|
112
|
+
searchString=search_string,
|
113
|
+
searchType=search_type.name,
|
114
|
+
scopeIds=scope_ids,
|
115
|
+
limit=limit,
|
116
|
+
reranker=(
|
117
|
+
reranker_config.model_dump(by_alias=True) if reranker_config else None
|
118
|
+
),
|
119
|
+
language=search_language,
|
120
|
+
chatOnly=chat_only,
|
121
|
+
metaDataFilter=metadata_filter,
|
122
|
+
contentIds=content_ids,
|
123
|
+
)
|
124
|
+
return map_to_content_chunks(searches)
|
125
|
+
except Exception as e:
|
126
|
+
logger.error(f"Error while searching content chunks: {e}")
|
127
|
+
raise e
|
128
|
+
|
129
|
+
|
130
|
+
def search_contents(
|
131
|
+
user_id: str,
|
132
|
+
company_id: str,
|
133
|
+
chat_id: str,
|
134
|
+
where: dict,
|
135
|
+
):
|
136
|
+
"""
|
137
|
+
Performs an asynchronous search for content files in the knowledge base by filter.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
user_id (str): The user ID.
|
141
|
+
company_id (str): The company ID.
|
142
|
+
chat_id (str): The chat ID.
|
143
|
+
where (dict): The search criteria.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
list[Content]: The search results.
|
147
|
+
"""
|
148
|
+
if where.get("contentId"):
|
149
|
+
logger.info(f"Searching for content with content_id: {where['contentId']}")
|
150
|
+
|
151
|
+
try:
|
152
|
+
contents = unique_sdk.Content.search(
|
153
|
+
user_id=user_id,
|
154
|
+
company_id=company_id,
|
155
|
+
chatId=chat_id,
|
156
|
+
# TODO add type parameter in SDK
|
157
|
+
where=where, # type: ignore
|
158
|
+
)
|
159
|
+
return map_contents(contents)
|
160
|
+
except Exception as e:
|
161
|
+
logger.error(f"Error while searching contents: {e}")
|
162
|
+
raise e
|
163
|
+
|
164
|
+
|
165
|
+
async def search_contents_async(
|
166
|
+
user_id: str,
|
167
|
+
company_id: str,
|
168
|
+
chat_id: str,
|
169
|
+
where: dict,
|
170
|
+
):
|
171
|
+
"""Asynchronously searches for content in the knowledge base."""
|
172
|
+
if where.get("contentId"):
|
173
|
+
logger.info(f"Searching for content with content_id: {where['contentId']}")
|
174
|
+
|
175
|
+
try:
|
176
|
+
contents = await unique_sdk.Content.search_async(
|
177
|
+
user_id=user_id,
|
178
|
+
company_id=company_id,
|
179
|
+
chatId=chat_id,
|
180
|
+
where=where, # type: ignore
|
181
|
+
)
|
182
|
+
return map_contents(contents)
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Error while searching contents: {e}")
|
185
|
+
raise e
|
186
|
+
|
187
|
+
|
188
|
+
def _upsert_content(
|
189
|
+
user_id: str,
|
190
|
+
company_id: str,
|
191
|
+
input_data: dict,
|
192
|
+
scope_id: str | None = None,
|
193
|
+
chat_id: str | None = None,
|
194
|
+
file_url: str | None = None,
|
195
|
+
):
|
196
|
+
"""Upserts content in the knowledge base."""
|
197
|
+
return unique_sdk.Content.upsert(
|
198
|
+
user_id=user_id,
|
199
|
+
company_id=company_id,
|
200
|
+
input=input_data,
|
201
|
+
scopeId=scope_id,
|
202
|
+
chatId=chat_id,
|
203
|
+
fileUrl=file_url,
|
204
|
+
) # type: ignore
|
205
|
+
|
206
|
+
|
207
|
+
def upload_content(
|
208
|
+
user_id: str,
|
209
|
+
company_id: str,
|
210
|
+
path_to_content: str,
|
211
|
+
content_name: str,
|
212
|
+
mime_type: str,
|
213
|
+
scope_id: str | None = None,
|
214
|
+
chat_id: str | None = None,
|
215
|
+
skip_ingestion: bool = False,
|
216
|
+
):
|
217
|
+
"""
|
218
|
+
Uploads content to the knowledge base.
|
219
|
+
|
220
|
+
Args:
|
221
|
+
user_id (str): The user ID.
|
222
|
+
company_id (str): The company ID.
|
223
|
+
path_to_content (str): The path to the content to upload.
|
224
|
+
content_name (str): The name of the content.
|
225
|
+
mime_type (str): The MIME type of the content.
|
226
|
+
scope_id (str | None): The scope ID. Defaults to None.
|
227
|
+
chat_id (str | None): The chat ID. Defaults to None.
|
228
|
+
skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
|
229
|
+
|
230
|
+
Returns:
|
231
|
+
Content: The uploaded content.
|
232
|
+
"""
|
233
|
+
|
234
|
+
try:
|
235
|
+
return _trigger_upload_content(
|
236
|
+
user_id=user_id,
|
237
|
+
company_id=company_id,
|
238
|
+
path_to_content=path_to_content,
|
239
|
+
content_name=content_name,
|
240
|
+
mime_type=mime_type,
|
241
|
+
scope_id=scope_id,
|
242
|
+
chat_id=chat_id,
|
243
|
+
skip_ingestion=skip_ingestion,
|
244
|
+
)
|
245
|
+
except Exception as e:
|
246
|
+
logger.error(f"Error while uploading content: {e}")
|
247
|
+
raise e
|
248
|
+
|
249
|
+
|
250
|
+
def _trigger_upload_content(
|
251
|
+
user_id: str,
|
252
|
+
company_id: str,
|
253
|
+
path_to_content: str,
|
254
|
+
content_name: str,
|
255
|
+
mime_type: str,
|
256
|
+
scope_id: str | None = None,
|
257
|
+
chat_id: str | None = None,
|
258
|
+
skip_ingestion: bool = False,
|
259
|
+
):
|
260
|
+
"""
|
261
|
+
Uploads content to the knowledge base.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
user_id (str): The user ID.
|
265
|
+
company_id (str): The company ID.
|
266
|
+
path_to_content (str): The path to the content to upload.
|
267
|
+
content_name (str): The name of the content.
|
268
|
+
mime_type (str): The MIME type of the content.
|
269
|
+
scope_id (str | None): The scope ID. Defaults to None.
|
270
|
+
chat_id (str | None): The chat ID. Defaults to None.
|
271
|
+
skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
Content: The uploaded content.
|
275
|
+
"""
|
276
|
+
|
277
|
+
if not chat_id and not scope_id:
|
278
|
+
raise ValueError("chat_id or scope_id must be provided")
|
279
|
+
|
280
|
+
byte_size = os.path.getsize(path_to_content)
|
281
|
+
created_content = _upsert_content(
|
282
|
+
user_id=user_id,
|
283
|
+
company_id=company_id,
|
284
|
+
input_data={
|
285
|
+
"key": content_name,
|
286
|
+
"title": content_name,
|
287
|
+
"mimeType": mime_type,
|
288
|
+
},
|
289
|
+
scope_id=scope_id,
|
290
|
+
chat_id=chat_id,
|
291
|
+
) # type: ignore
|
292
|
+
|
293
|
+
write_url = created_content["writeUrl"]
|
294
|
+
|
295
|
+
if not write_url:
|
296
|
+
error_msg = "Write url for uploaded content is missing"
|
297
|
+
logger.error(error_msg)
|
298
|
+
raise ValueError(error_msg)
|
299
|
+
|
300
|
+
# upload to azure blob storage SAS url uploadUrl the pdf file translatedFile make sure it is treated as a application/pdf
|
301
|
+
with open(path_to_content, "rb") as file:
|
302
|
+
requests.put(
|
303
|
+
url=write_url,
|
304
|
+
data=file,
|
305
|
+
headers={
|
306
|
+
"X-Ms-Blob-Content-Type": mime_type,
|
307
|
+
"X-Ms-Blob-Type": "BlockBlob",
|
308
|
+
},
|
309
|
+
)
|
310
|
+
|
311
|
+
read_url = created_content["readUrl"]
|
312
|
+
|
313
|
+
if not read_url:
|
314
|
+
error_msg = "Read url for uploaded content is missing"
|
315
|
+
logger.error(error_msg)
|
316
|
+
raise ValueError(error_msg)
|
317
|
+
|
318
|
+
input_dict = {
|
319
|
+
"key": content_name,
|
320
|
+
"title": content_name,
|
321
|
+
"mimeType": mime_type,
|
322
|
+
"byteSize": byte_size,
|
323
|
+
}
|
324
|
+
|
325
|
+
if skip_ingestion:
|
326
|
+
input_dict["ingestionConfig"] = {"uniqueIngestionMode": "SKIP_INGESTION"}
|
327
|
+
|
328
|
+
if chat_id:
|
329
|
+
_upsert_content(
|
330
|
+
user_id=user_id,
|
331
|
+
company_id=company_id,
|
332
|
+
input_data=input_dict,
|
333
|
+
file_url=read_url,
|
334
|
+
chat_id=chat_id,
|
335
|
+
) # type: ignore
|
336
|
+
else:
|
337
|
+
_upsert_content(
|
338
|
+
user_id=user_id,
|
339
|
+
company_id=company_id,
|
340
|
+
input_data=input_dict,
|
341
|
+
file_url=read_url,
|
342
|
+
scope_id=scope_id,
|
343
|
+
) # type: ignore
|
344
|
+
|
345
|
+
return Content(**created_content)
|
346
|
+
|
347
|
+
|
348
|
+
def request_content_by_id(
|
349
|
+
user_id: str, company_id: str, content_id: str, chat_id: str | None
|
350
|
+
) -> requests.Response:
|
351
|
+
"""
|
352
|
+
Sends a request to download content from a chat.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
user_id (str): The user ID.
|
356
|
+
company_id (str): The company ID.
|
357
|
+
content_id (str): The ID of the content to download.
|
358
|
+
chat_id (str): The ID of the chat from which to download the content. Defaults to None to download from knowledge base.
|
359
|
+
|
360
|
+
Returns:
|
361
|
+
requests.Response: The response object containing the downloaded content.
|
362
|
+
|
363
|
+
"""
|
364
|
+
logger.info(f"Requesting content with content_id: {content_id}")
|
365
|
+
url = f"{unique_sdk.api_base}/content/{content_id}/file"
|
366
|
+
if chat_id:
|
367
|
+
url = f"{url}?chatId={chat_id}"
|
368
|
+
|
369
|
+
# Download the file and save it to the random directory
|
370
|
+
headers = {
|
371
|
+
"x-api-version": unique_sdk.api_version,
|
372
|
+
"x-app-id": unique_sdk.app_id,
|
373
|
+
"x-user-id": user_id,
|
374
|
+
"x-company-id": company_id,
|
375
|
+
"Authorization": "Bearer %s" % (unique_sdk.api_key,),
|
376
|
+
}
|
377
|
+
|
378
|
+
return requests.get(url, headers=headers)
|
379
|
+
|
380
|
+
|
381
|
+
def download_content_to_file_by_id(
|
382
|
+
user_id: str,
|
383
|
+
company_id: str,
|
384
|
+
content_id: str,
|
385
|
+
chat_id: str | None = None,
|
386
|
+
filename: str | None = None,
|
387
|
+
tmp_dir_path: str | Path | None = "/tmp",
|
388
|
+
):
|
389
|
+
"""
|
390
|
+
Downloads content from a chat and saves it to a file.
|
391
|
+
|
392
|
+
Args:
|
393
|
+
user_id (str): The user ID.
|
394
|
+
company_id (str): The company ID.
|
395
|
+
content_id (str): The ID of the content to download.
|
396
|
+
chat_id (str | None): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
|
397
|
+
filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
|
398
|
+
tmp_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
|
399
|
+
|
400
|
+
Returns:
|
401
|
+
Path: The path to the downloaded file.
|
402
|
+
|
403
|
+
Raises:
|
404
|
+
Exception: If the download fails or the filename cannot be determined.
|
405
|
+
"""
|
406
|
+
|
407
|
+
logger.info(f"Downloading content to file with content_id: {content_id}")
|
408
|
+
response = request_content_by_id(user_id, company_id, content_id, chat_id)
|
409
|
+
random_dir = tempfile.mkdtemp(dir=tmp_dir_path)
|
410
|
+
|
411
|
+
if response.status_code == 200:
|
412
|
+
if filename:
|
413
|
+
content_path = Path(random_dir) / filename
|
414
|
+
else:
|
415
|
+
pattern = r'filename="([^"]+)"'
|
416
|
+
match = re.search(pattern, response.headers.get("Content-Disposition", ""))
|
417
|
+
if match:
|
418
|
+
content_path = Path(random_dir) / match.group(1)
|
419
|
+
else:
|
420
|
+
error_msg = "Error downloading file: Filename could not be determined"
|
421
|
+
logger.error(error_msg)
|
422
|
+
raise Exception(error_msg)
|
423
|
+
|
424
|
+
with open(content_path, "wb") as file:
|
425
|
+
file.write(response.content)
|
426
|
+
else:
|
427
|
+
error_msg = f"Error downloading file: Status code {response.status_code}"
|
428
|
+
logger.error(error_msg)
|
429
|
+
raise Exception(error_msg)
|
430
|
+
|
431
|
+
return content_path
|
432
|
+
|
433
|
+
|
434
|
+
# TODO: Discuss if we should deprecate this method due to unclear use by content_name
|
435
|
+
def download_content(
|
436
|
+
user_id: str,
|
437
|
+
company_id: str,
|
438
|
+
content_id: str,
|
439
|
+
content_name: str,
|
440
|
+
chat_id: str | None = None,
|
441
|
+
dir_path: str | Path | None = "/tmp",
|
442
|
+
) -> Path:
|
443
|
+
"""
|
444
|
+
Downloads content to temporary directory
|
445
|
+
|
446
|
+
Args:
|
447
|
+
user_id (str): The user ID.
|
448
|
+
company_id (str): The company ID.
|
449
|
+
content_id (str): The id of the uploaded content.
|
450
|
+
content_name (str): The name of the uploaded content.
|
451
|
+
chat_id (str | None): The chat_id, defaults to None.
|
452
|
+
dir_path (str | Path): The directory path to download the content to, defaults to "/tmp". If not provided, the content will be downloaded to a random directory inside /tmp. Be aware that this directory won't be cleaned up automatically.
|
453
|
+
|
454
|
+
Returns:
|
455
|
+
content_path: The path to the downloaded content in the temporary directory.
|
456
|
+
|
457
|
+
Raises:
|
458
|
+
Exception: If the download fails.
|
459
|
+
"""
|
460
|
+
|
461
|
+
logger.info(f"Downloading content with content_id: {content_id}")
|
462
|
+
response = request_content_by_id(user_id, company_id, content_id, chat_id)
|
463
|
+
|
464
|
+
random_dir = tempfile.mkdtemp(dir=dir_path)
|
465
|
+
content_path = Path(random_dir) / content_name
|
466
|
+
|
467
|
+
if response.status_code == 200:
|
468
|
+
with open(content_path, "wb") as file:
|
469
|
+
file.write(response.content)
|
470
|
+
else:
|
471
|
+
error_msg = f"Error downloading file: Status code {response.status_code}"
|
472
|
+
logger.error(error_msg)
|
473
|
+
raise Exception(error_msg)
|
474
|
+
|
475
|
+
return content_path
|