unique_toolkit 0.5.54__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. unique_toolkit/_common/validate_required_values.py +21 -0
  2. unique_toolkit/app/__init__.py +20 -0
  3. unique_toolkit/app/schemas.py +73 -7
  4. unique_toolkit/chat/__init__.py +5 -4
  5. unique_toolkit/chat/constants.py +3 -0
  6. unique_toolkit/chat/functions.py +661 -0
  7. unique_toolkit/chat/schemas.py +11 -11
  8. unique_toolkit/chat/service.py +273 -430
  9. unique_toolkit/content/__init__.py +1 -0
  10. unique_toolkit/content/constants.py +2 -0
  11. unique_toolkit/content/functions.py +475 -0
  12. unique_toolkit/content/service.py +163 -300
  13. unique_toolkit/content/utils.py +32 -0
  14. unique_toolkit/embedding/__init__.py +3 -0
  15. unique_toolkit/embedding/constants.py +2 -0
  16. unique_toolkit/embedding/functions.py +79 -0
  17. unique_toolkit/embedding/service.py +47 -34
  18. unique_toolkit/evaluators/__init__.py +1 -0
  19. unique_toolkit/evaluators/constants.py +1 -0
  20. unique_toolkit/evaluators/context_relevancy/constants.py +3 -3
  21. unique_toolkit/evaluators/context_relevancy/utils.py +5 -2
  22. unique_toolkit/evaluators/hallucination/utils.py +2 -1
  23. unique_toolkit/language_model/__init__.py +1 -0
  24. unique_toolkit/language_model/constants.py +4 -0
  25. unique_toolkit/language_model/functions.py +362 -0
  26. unique_toolkit/language_model/service.py +246 -293
  27. unique_toolkit/short_term_memory/__init__.py +5 -0
  28. unique_toolkit/short_term_memory/constants.py +1 -0
  29. unique_toolkit/short_term_memory/functions.py +175 -0
  30. unique_toolkit/short_term_memory/service.py +153 -27
  31. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/METADATA +36 -7
  32. unique_toolkit-0.6.0.dist-info/RECORD +64 -0
  33. unique_toolkit-0.5.54.dist-info/RECORD +0 -50
  34. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/LICENSE +0 -0
  35. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/WHEEL +0 -0
@@ -1,3 +1,4 @@
1
+ from .constants import DOMAIN_NAME as DOMAIN_NAME
1
2
  from .schemas import (
2
3
  Content as Content,
3
4
  )
@@ -0,0 +1,2 @@
1
+ DOMAIN_NAME = "content"
2
+ DEFAULT_SEARCH_LANGUAGE = "english"
@@ -0,0 +1,475 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ import requests
8
+ import unique_sdk
9
+
10
+ from unique_toolkit.content import DOMAIN_NAME
11
+ from unique_toolkit.content.constants import DEFAULT_SEARCH_LANGUAGE
12
+ from unique_toolkit.content.schemas import (
13
+ Content,
14
+ ContentChunk,
15
+ ContentRerankerConfig,
16
+ ContentSearchType,
17
+ )
18
+ from unique_toolkit.content.utils import map_contents, map_to_content_chunks
19
+
20
+ logger = logging.getLogger(f"toolkit.{DOMAIN_NAME}.{__name__}")
21
+
22
+
23
+ def search_content_chunks(
24
+ user_id: str,
25
+ company_id: str,
26
+ chat_id: str,
27
+ search_string: str,
28
+ search_type: ContentSearchType,
29
+ limit: int,
30
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
31
+ reranker_config: ContentRerankerConfig | None = None,
32
+ scope_ids: list[str] | None = None,
33
+ chat_only: bool | None = None,
34
+ metadata_filter: dict | None = None,
35
+ content_ids: list[str] | None = None,
36
+ ) -> list[ContentChunk]:
37
+ """
38
+ Performs a synchronous search for content chunks in the knowledge base.
39
+
40
+ Args:
41
+ search_string (str): The search string.
42
+ search_type (ContentSearchType): The type of search to perform.
43
+ limit (int): The maximum number of results to return.
44
+ search_language (str): The language for the full-text search. Defaults to "english".
45
+ reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
46
+ scope_ids (list[str] | None): The scope IDs. Defaults to None.
47
+ chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
48
+ metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
49
+ content_ids (list[str] | None): The content IDs to search. Defaults to None.
50
+ Returns:
51
+ list[ContentChunk]: The search results.
52
+ """
53
+ if not scope_ids:
54
+ logger.warning("No scope IDs provided for search.")
55
+
56
+ if content_ids:
57
+ logger.info(f"Searching for content chunks with content_ids: {content_ids}")
58
+
59
+ try:
60
+ searches = unique_sdk.Search.create(
61
+ user_id=user_id,
62
+ company_id=company_id,
63
+ chatId=chat_id,
64
+ searchString=search_string,
65
+ searchType=search_type.name,
66
+ scopeIds=scope_ids,
67
+ limit=limit,
68
+ reranker=(
69
+ reranker_config.model_dump(by_alias=True) if reranker_config else None
70
+ ),
71
+ language=search_language,
72
+ chatOnly=chat_only,
73
+ metaDataFilter=metadata_filter,
74
+ contentIds=content_ids,
75
+ )
76
+ return map_to_content_chunks(searches)
77
+ except Exception as e:
78
+ logger.error(f"Error while searching content chunks: {e}")
79
+ raise e
80
+
81
+
82
+ async def search_content_chunks_async(
83
+ user_id: str,
84
+ company_id: str,
85
+ chat_id: str,
86
+ search_string: str,
87
+ search_type: ContentSearchType,
88
+ limit: int,
89
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
90
+ reranker_config: ContentRerankerConfig | None = None,
91
+ scope_ids: list[str] | None = None,
92
+ chat_only: bool | None = None,
93
+ metadata_filter: dict | None = None,
94
+ content_ids: list[str] | None = None,
95
+ ):
96
+ """
97
+ Performs an asynchronous search for content chunks in the knowledge base.
98
+ """
99
+ if not scope_ids:
100
+ logger.warning("No scope IDs provided for search.")
101
+
102
+ if content_ids:
103
+ logger.info(
104
+ f"Searching for content chunks asynchronously with content_ids: {content_ids}"
105
+ )
106
+
107
+ try:
108
+ searches = await unique_sdk.Search.create_async(
109
+ user_id=user_id,
110
+ company_id=company_id,
111
+ chatId=chat_id,
112
+ searchString=search_string,
113
+ searchType=search_type.name,
114
+ scopeIds=scope_ids,
115
+ limit=limit,
116
+ reranker=(
117
+ reranker_config.model_dump(by_alias=True) if reranker_config else None
118
+ ),
119
+ language=search_language,
120
+ chatOnly=chat_only,
121
+ metaDataFilter=metadata_filter,
122
+ contentIds=content_ids,
123
+ )
124
+ return map_to_content_chunks(searches)
125
+ except Exception as e:
126
+ logger.error(f"Error while searching content chunks: {e}")
127
+ raise e
128
+
129
+
130
+ def search_contents(
131
+ user_id: str,
132
+ company_id: str,
133
+ chat_id: str,
134
+ where: dict,
135
+ ):
136
+ """
137
+ Performs an asynchronous search for content files in the knowledge base by filter.
138
+
139
+ Args:
140
+ user_id (str): The user ID.
141
+ company_id (str): The company ID.
142
+ chat_id (str): The chat ID.
143
+ where (dict): The search criteria.
144
+
145
+ Returns:
146
+ list[Content]: The search results.
147
+ """
148
+ if where.get("contentId"):
149
+ logger.info(f"Searching for content with content_id: {where['contentId']}")
150
+
151
+ try:
152
+ contents = unique_sdk.Content.search(
153
+ user_id=user_id,
154
+ company_id=company_id,
155
+ chatId=chat_id,
156
+ # TODO add type parameter in SDK
157
+ where=where, # type: ignore
158
+ )
159
+ return map_contents(contents)
160
+ except Exception as e:
161
+ logger.error(f"Error while searching contents: {e}")
162
+ raise e
163
+
164
+
165
+ async def search_contents_async(
166
+ user_id: str,
167
+ company_id: str,
168
+ chat_id: str,
169
+ where: dict,
170
+ ):
171
+ """Asynchronously searches for content in the knowledge base."""
172
+ if where.get("contentId"):
173
+ logger.info(f"Searching for content with content_id: {where['contentId']}")
174
+
175
+ try:
176
+ contents = await unique_sdk.Content.search_async(
177
+ user_id=user_id,
178
+ company_id=company_id,
179
+ chatId=chat_id,
180
+ where=where, # type: ignore
181
+ )
182
+ return map_contents(contents)
183
+ except Exception as e:
184
+ logger.error(f"Error while searching contents: {e}")
185
+ raise e
186
+
187
+
188
+ def _upsert_content(
189
+ user_id: str,
190
+ company_id: str,
191
+ input_data: dict,
192
+ scope_id: str | None = None,
193
+ chat_id: str | None = None,
194
+ file_url: str | None = None,
195
+ ):
196
+ """Upserts content in the knowledge base."""
197
+ return unique_sdk.Content.upsert(
198
+ user_id=user_id,
199
+ company_id=company_id,
200
+ input=input_data,
201
+ scopeId=scope_id,
202
+ chatId=chat_id,
203
+ fileUrl=file_url,
204
+ ) # type: ignore
205
+
206
+
207
+ def upload_content(
208
+ user_id: str,
209
+ company_id: str,
210
+ path_to_content: str,
211
+ content_name: str,
212
+ mime_type: str,
213
+ scope_id: str | None = None,
214
+ chat_id: str | None = None,
215
+ skip_ingestion: bool = False,
216
+ ):
217
+ """
218
+ Uploads content to the knowledge base.
219
+
220
+ Args:
221
+ user_id (str): The user ID.
222
+ company_id (str): The company ID.
223
+ path_to_content (str): The path to the content to upload.
224
+ content_name (str): The name of the content.
225
+ mime_type (str): The MIME type of the content.
226
+ scope_id (str | None): The scope ID. Defaults to None.
227
+ chat_id (str | None): The chat ID. Defaults to None.
228
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
229
+
230
+ Returns:
231
+ Content: The uploaded content.
232
+ """
233
+
234
+ try:
235
+ return _trigger_upload_content(
236
+ user_id=user_id,
237
+ company_id=company_id,
238
+ path_to_content=path_to_content,
239
+ content_name=content_name,
240
+ mime_type=mime_type,
241
+ scope_id=scope_id,
242
+ chat_id=chat_id,
243
+ skip_ingestion=skip_ingestion,
244
+ )
245
+ except Exception as e:
246
+ logger.error(f"Error while uploading content: {e}")
247
+ raise e
248
+
249
+
250
+ def _trigger_upload_content(
251
+ user_id: str,
252
+ company_id: str,
253
+ path_to_content: str,
254
+ content_name: str,
255
+ mime_type: str,
256
+ scope_id: str | None = None,
257
+ chat_id: str | None = None,
258
+ skip_ingestion: bool = False,
259
+ ):
260
+ """
261
+ Uploads content to the knowledge base.
262
+
263
+ Args:
264
+ user_id (str): The user ID.
265
+ company_id (str): The company ID.
266
+ path_to_content (str): The path to the content to upload.
267
+ content_name (str): The name of the content.
268
+ mime_type (str): The MIME type of the content.
269
+ scope_id (str | None): The scope ID. Defaults to None.
270
+ chat_id (str | None): The chat ID. Defaults to None.
271
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
272
+
273
+ Returns:
274
+ Content: The uploaded content.
275
+ """
276
+
277
+ if not chat_id and not scope_id:
278
+ raise ValueError("chat_id or scope_id must be provided")
279
+
280
+ byte_size = os.path.getsize(path_to_content)
281
+ created_content = _upsert_content(
282
+ user_id=user_id,
283
+ company_id=company_id,
284
+ input_data={
285
+ "key": content_name,
286
+ "title": content_name,
287
+ "mimeType": mime_type,
288
+ },
289
+ scope_id=scope_id,
290
+ chat_id=chat_id,
291
+ ) # type: ignore
292
+
293
+ write_url = created_content["writeUrl"]
294
+
295
+ if not write_url:
296
+ error_msg = "Write url for uploaded content is missing"
297
+ logger.error(error_msg)
298
+ raise ValueError(error_msg)
299
+
300
+ # upload to azure blob storage SAS url uploadUrl the pdf file translatedFile make sure it is treated as a application/pdf
301
+ with open(path_to_content, "rb") as file:
302
+ requests.put(
303
+ url=write_url,
304
+ data=file,
305
+ headers={
306
+ "X-Ms-Blob-Content-Type": mime_type,
307
+ "X-Ms-Blob-Type": "BlockBlob",
308
+ },
309
+ )
310
+
311
+ read_url = created_content["readUrl"]
312
+
313
+ if not read_url:
314
+ error_msg = "Read url for uploaded content is missing"
315
+ logger.error(error_msg)
316
+ raise ValueError(error_msg)
317
+
318
+ input_dict = {
319
+ "key": content_name,
320
+ "title": content_name,
321
+ "mimeType": mime_type,
322
+ "byteSize": byte_size,
323
+ }
324
+
325
+ if skip_ingestion:
326
+ input_dict["ingestionConfig"] = {"uniqueIngestionMode": "SKIP_INGESTION"}
327
+
328
+ if chat_id:
329
+ _upsert_content(
330
+ user_id=user_id,
331
+ company_id=company_id,
332
+ input_data=input_dict,
333
+ file_url=read_url,
334
+ chat_id=chat_id,
335
+ ) # type: ignore
336
+ else:
337
+ _upsert_content(
338
+ user_id=user_id,
339
+ company_id=company_id,
340
+ input_data=input_dict,
341
+ file_url=read_url,
342
+ scope_id=scope_id,
343
+ ) # type: ignore
344
+
345
+ return Content(**created_content)
346
+
347
+
348
+ def request_content_by_id(
349
+ user_id: str, company_id: str, content_id: str, chat_id: str | None
350
+ ) -> requests.Response:
351
+ """
352
+ Sends a request to download content from a chat.
353
+
354
+ Args:
355
+ user_id (str): The user ID.
356
+ company_id (str): The company ID.
357
+ content_id (str): The ID of the content to download.
358
+ chat_id (str): The ID of the chat from which to download the content. Defaults to None to download from knowledge base.
359
+
360
+ Returns:
361
+ requests.Response: The response object containing the downloaded content.
362
+
363
+ """
364
+ logger.info(f"Requesting content with content_id: {content_id}")
365
+ url = f"{unique_sdk.api_base}/content/{content_id}/file"
366
+ if chat_id:
367
+ url = f"{url}?chatId={chat_id}"
368
+
369
+ # Download the file and save it to the random directory
370
+ headers = {
371
+ "x-api-version": unique_sdk.api_version,
372
+ "x-app-id": unique_sdk.app_id,
373
+ "x-user-id": user_id,
374
+ "x-company-id": company_id,
375
+ "Authorization": "Bearer %s" % (unique_sdk.api_key,),
376
+ }
377
+
378
+ return requests.get(url, headers=headers)
379
+
380
+
381
+ def download_content_to_file_by_id(
382
+ user_id: str,
383
+ company_id: str,
384
+ content_id: str,
385
+ chat_id: str | None = None,
386
+ filename: str | None = None,
387
+ tmp_dir_path: str | Path | None = "/tmp",
388
+ ):
389
+ """
390
+ Downloads content from a chat and saves it to a file.
391
+
392
+ Args:
393
+ user_id (str): The user ID.
394
+ company_id (str): The company ID.
395
+ content_id (str): The ID of the content to download.
396
+ chat_id (str | None): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
397
+ filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
398
+ tmp_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
399
+
400
+ Returns:
401
+ Path: The path to the downloaded file.
402
+
403
+ Raises:
404
+ Exception: If the download fails or the filename cannot be determined.
405
+ """
406
+
407
+ logger.info(f"Downloading content to file with content_id: {content_id}")
408
+ response = request_content_by_id(user_id, company_id, content_id, chat_id)
409
+ random_dir = tempfile.mkdtemp(dir=tmp_dir_path)
410
+
411
+ if response.status_code == 200:
412
+ if filename:
413
+ content_path = Path(random_dir) / filename
414
+ else:
415
+ pattern = r'filename="([^"]+)"'
416
+ match = re.search(pattern, response.headers.get("Content-Disposition", ""))
417
+ if match:
418
+ content_path = Path(random_dir) / match.group(1)
419
+ else:
420
+ error_msg = "Error downloading file: Filename could not be determined"
421
+ logger.error(error_msg)
422
+ raise Exception(error_msg)
423
+
424
+ with open(content_path, "wb") as file:
425
+ file.write(response.content)
426
+ else:
427
+ error_msg = f"Error downloading file: Status code {response.status_code}"
428
+ logger.error(error_msg)
429
+ raise Exception(error_msg)
430
+
431
+ return content_path
432
+
433
+
434
+ # TODO: Discuss if we should deprecate this method due to unclear use by content_name
435
+ def download_content(
436
+ user_id: str,
437
+ company_id: str,
438
+ content_id: str,
439
+ content_name: str,
440
+ chat_id: str | None = None,
441
+ dir_path: str | Path | None = "/tmp",
442
+ ) -> Path:
443
+ """
444
+ Downloads content to temporary directory
445
+
446
+ Args:
447
+ user_id (str): The user ID.
448
+ company_id (str): The company ID.
449
+ content_id (str): The id of the uploaded content.
450
+ content_name (str): The name of the uploaded content.
451
+ chat_id (str | None): The chat_id, defaults to None.
452
+ dir_path (str | Path): The directory path to download the content to, defaults to "/tmp". If not provided, the content will be downloaded to a random directory inside /tmp. Be aware that this directory won't be cleaned up automatically.
453
+
454
+ Returns:
455
+ content_path: The path to the downloaded content in the temporary directory.
456
+
457
+ Raises:
458
+ Exception: If the download fails.
459
+ """
460
+
461
+ logger.info(f"Downloading content with content_id: {content_id}")
462
+ response = request_content_by_id(user_id, company_id, content_id, chat_id)
463
+
464
+ random_dir = tempfile.mkdtemp(dir=dir_path)
465
+ content_path = Path(random_dir) / content_name
466
+
467
+ if response.status_code == 200:
468
+ with open(content_path, "wb") as file:
469
+ file.write(response.content)
470
+ else:
471
+ error_msg = f"Error downloading file: Status code {response.status_code}"
472
+ logger.error(error_msg)
473
+ raise Exception(error_msg)
474
+
475
+ return content_path