unique_toolkit 1.9.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
1
  import logging
2
- import mimetypes
3
- from enum import StrEnum
4
2
  from pathlib import Path
5
3
  from typing import Any, overload
6
4
 
@@ -8,6 +6,7 @@ import unique_sdk
8
6
  from requests import Response
9
7
  from typing_extensions import deprecated
10
8
 
9
+ from unique_toolkit._common.utils.files import is_file_content, is_image_content
11
10
  from unique_toolkit._common.validate_required_values import validate_required_values
12
11
  from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
13
12
  from unique_toolkit.app.unique_settings import UniqueSettings
@@ -35,29 +34,7 @@ from unique_toolkit.content.schemas import (
35
34
  logger = logging.getLogger(f"toolkit.{DOMAIN_NAME}.{__name__}")
36
35
 
37
36
 
38
- class FileMimeType(StrEnum):
39
- PDF = "application/pdf"
40
- DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
41
- DOC = "application/msword"
42
- XLSX = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
43
- XLS = "application/vnd.ms-excel"
44
- PPTX = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
45
- CSV = "text/csv"
46
- HTML = "text/html"
47
- MD = "text/markdown"
48
- TXT = "text/plain"
49
-
50
-
51
- class ImageMimeType(StrEnum):
52
- JPEG = "image/jpeg"
53
- PNG = "image/png"
54
- GIF = "image/gif"
55
- BMP = "image/bmp"
56
- WEBP = "image/webp"
57
- TIFF = "image/tiff"
58
- SVG = "image/svg+xml"
59
-
60
-
37
+ @deprecated("Use KnowledgeBaseService instead")
61
38
  class ContentService:
62
39
  """
63
40
  Provides methods for searching, downloading and uploading content in the knowledge base.
@@ -337,6 +314,7 @@ class ContentService:
337
314
  logger.error(f"Error while searching content chunks: {e}")
338
315
  raise e
339
316
 
317
+ @deprecated("Use search_chunks_async instead")
340
318
  async def search_content_chunks_async(
341
319
  self,
342
320
  search_string: str,
@@ -694,17 +672,7 @@ class ContentService:
694
672
  return content
695
673
 
696
674
  def is_file_content(self, filename: str) -> bool:
697
- mimetype, _ = mimetypes.guess_type(filename)
698
-
699
- if not mimetype:
700
- return False
701
-
702
- return mimetype in FileMimeType.__members__.values()
675
+ return is_file_content(filename=filename)
703
676
 
704
677
  def is_image_content(self, filename: str) -> bool:
705
- mimetype, _ = mimetypes.guess_type(filename)
706
-
707
- if not mimetype:
708
- return False
709
-
710
- return mimetype in ImageMimeType.__members__.values()
678
+ return is_image_content(filename=filename)
@@ -0,0 +1,520 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Any, overload
4
+
5
+ import unique_sdk
6
+
7
+ from unique_toolkit._common.validate_required_values import validate_required_values
8
+ from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
9
+ from unique_toolkit.app.unique_settings import UniqueSettings
10
+ from unique_toolkit.content.constants import (
11
+ DEFAULT_SEARCH_LANGUAGE,
12
+ )
13
+ from unique_toolkit.content.functions import (
14
+ download_content_to_bytes,
15
+ download_content_to_file_by_id,
16
+ get_content_info,
17
+ search_content_chunks,
18
+ search_content_chunks_async,
19
+ search_contents,
20
+ search_contents_async,
21
+ update_content,
22
+ upload_content,
23
+ upload_content_from_bytes,
24
+ )
25
+ from unique_toolkit.content.schemas import (
26
+ Content,
27
+ ContentChunk,
28
+ ContentInfo,
29
+ ContentRerankerConfig,
30
+ ContentSearchType,
31
+ PaginatedContentInfo,
32
+ )
33
+
34
+ _LOGGER = logging.getLogger(f"toolkit.knowledge_base.{__name__}")
35
+
36
+ _DEFAULT_SCORE_THRESHOLD: float = 0.5
37
+
38
+
39
+ class KnowledgeBaseService:
40
+ """
41
+ Provides methods for searching, downloading and uploading content in the knowledge base.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ company_id: str,
47
+ user_id: str,
48
+ metadata_filter: dict | None = None,
49
+ ):
50
+ """
51
+ Initialize the ContentService with a company_id, user_id and chat_id.
52
+ """
53
+
54
+ self._metadata_filter = None
55
+ [company_id, user_id] = validate_required_values([company_id, user_id])
56
+ self._company_id = company_id
57
+ self._user_id = user_id
58
+ self._metadata_filter = metadata_filter
59
+
60
+ @classmethod
61
+ def from_event(cls, event: BaseEvent):
62
+ """
63
+ Initialize the ContentService with an event.
64
+ """
65
+ metadata_filter = None
66
+
67
+ if isinstance(event, (ChatEvent | Event)):
68
+ metadata_filter = event.payload.metadata_filter
69
+
70
+ return cls(
71
+ company_id=event.company_id,
72
+ user_id=event.user_id,
73
+ metadata_filter=metadata_filter,
74
+ )
75
+
76
+ @classmethod
77
+ def from_settings(
78
+ cls,
79
+ settings: UniqueSettings | str | None = None,
80
+ metadata_filter: dict | None = None,
81
+ ):
82
+ """
83
+ Initialize the ContentService with a settings object and metadata filter.
84
+ """
85
+
86
+ if settings is None:
87
+ settings = UniqueSettings.from_env_auto_with_sdk_init()
88
+ elif isinstance(settings, str):
89
+ settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)
90
+
91
+ return cls(
92
+ company_id=settings.auth.company_id.get_secret_value(),
93
+ user_id=settings.auth.user_id.get_secret_value(),
94
+ metadata_filter=metadata_filter,
95
+ )
96
+
97
+ @overload
98
+ def search_content_chunks(
99
+ self,
100
+ *,
101
+ search_string: str,
102
+ search_type: ContentSearchType,
103
+ limit: int,
104
+ scope_ids: list[str],
105
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
106
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
107
+ reranker_config: ContentRerankerConfig | None = None,
108
+ ) -> list[ContentChunk]: ...
109
+
110
+ @overload
111
+ def search_content_chunks(
112
+ self,
113
+ *,
114
+ search_string: str,
115
+ search_type: ContentSearchType,
116
+ limit: int,
117
+ metadata_filter: dict,
118
+ scope_ids: list[str] | None = None,
119
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
120
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
121
+ reranker_config: ContentRerankerConfig | None = None,
122
+ ) -> list[ContentChunk]: ...
123
+
124
+ @overload
125
+ def search_content_chunks(
126
+ self,
127
+ *,
128
+ search_string: str,
129
+ search_type: ContentSearchType,
130
+ limit: int,
131
+ metadata_filter: dict,
132
+ content_ids: list[str],
133
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
134
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
135
+ reranker_config: ContentRerankerConfig | None = None,
136
+ ) -> list[ContentChunk]: ...
137
+
138
+ def search_content_chunks(
139
+ self,
140
+ *,
141
+ search_string: str,
142
+ search_type: ContentSearchType,
143
+ limit: int,
144
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
145
+ reranker_config: ContentRerankerConfig | None = None,
146
+ scope_ids: list[str] | None = None,
147
+ metadata_filter: dict | None = None,
148
+ content_ids: list[str] | None = None,
149
+ score_threshold: float | None = None,
150
+ ) -> list[ContentChunk]:
151
+ """
152
+ Performs a synchronous search for content chunks in the knowledge base.
153
+
154
+ Args:
155
+ search_string (str): The search string.
156
+ search_type (ContentSearchType): The type of search to perform.
157
+ limit (int): The maximum number of results to return.
158
+ search_language (str, optional): The language for the full-text search. Defaults to "english".
159
+ reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
160
+ scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
161
+ metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
162
+ content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
163
+ score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.
164
+
165
+ Returns:
166
+ list[ContentChunk]: The search results.
167
+
168
+ Raises:
169
+ Exception: If there's an error during the search operation.
170
+ """
171
+
172
+ if metadata_filter is None:
173
+ metadata_filter = self._metadata_filter
174
+
175
+ try:
176
+ searches = search_content_chunks(
177
+ user_id=self._user_id,
178
+ company_id=self._company_id,
179
+ chat_id="",
180
+ search_string=search_string,
181
+ search_type=search_type,
182
+ limit=limit,
183
+ search_language=search_language,
184
+ reranker_config=reranker_config,
185
+ scope_ids=scope_ids,
186
+ chat_only=False,
187
+ metadata_filter=metadata_filter,
188
+ content_ids=content_ids,
189
+ score_threshold=score_threshold,
190
+ )
191
+ return searches
192
+ except Exception as e:
193
+ _LOGGER.error(f"Error while searching content chunks: {e}")
194
+ raise e
195
+
196
+ @overload
197
+ async def search_content_chunks_async(
198
+ self,
199
+ *,
200
+ search_string: str,
201
+ search_type: ContentSearchType,
202
+ limit: int,
203
+ scope_ids: list[str],
204
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
205
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
206
+ reranker_config: ContentRerankerConfig | None = None,
207
+ ) -> list[ContentChunk]: ...
208
+
209
+ @overload
210
+ async def search_content_chunks_async(
211
+ self,
212
+ *,
213
+ search_string: str,
214
+ search_type: ContentSearchType,
215
+ limit: int,
216
+ metadata_filter: dict,
217
+ scope_ids: list[str] | None = None,
218
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
219
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
220
+ reranker_config: ContentRerankerConfig | None = None,
221
+ ) -> list[ContentChunk]: ...
222
+
223
+ @overload
224
+ async def search_content_chunks_async(
225
+ self,
226
+ *,
227
+ search_string: str,
228
+ search_type: ContentSearchType,
229
+ limit: int,
230
+ metadata_filter: dict,
231
+ content_ids: list[str],
232
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
233
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
234
+ reranker_config: ContentRerankerConfig | None = None,
235
+ ) -> list[ContentChunk]: ...
236
+
237
+ async def search_content_chunks_async(
238
+ self,
239
+ *,
240
+ search_string: str,
241
+ search_type: ContentSearchType,
242
+ limit: int,
243
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
244
+ reranker_config: ContentRerankerConfig | None = None,
245
+ scope_ids: list[str] | None = None,
246
+ metadata_filter: dict | None = None,
247
+ content_ids: list[str] | None = None,
248
+ score_threshold: float | None = None,
249
+ ):
250
+ """
251
+ Performs an asynchronous search for content chunks in the knowledge base.
252
+
253
+ Args:
254
+ search_string (str): The search string.
255
+ search_type (ContentSearchType): The type of search to perform.
256
+ limit (int): The maximum number of results to return.
257
+ search_language (str, optional): The language for the full-text search. Defaults to "english".
258
+ reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
259
+ scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
260
+ metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
261
+ content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
262
+ score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.
263
+
264
+ Returns:
265
+ list[ContentChunk]: The search results.
266
+
267
+ Raises:
268
+ Exception: If there's an error during the search operation.
269
+ """
270
+ if metadata_filter is None:
271
+ metadata_filter = self._metadata_filter
272
+
273
+ try:
274
+ searches = await search_content_chunks_async(
275
+ user_id=self._user_id,
276
+ company_id=self._company_id,
277
+ chat_id="",
278
+ search_string=search_string,
279
+ search_type=search_type,
280
+ limit=limit,
281
+ search_language=search_language,
282
+ reranker_config=reranker_config,
283
+ scope_ids=scope_ids,
284
+ chat_only=False,
285
+ metadata_filter=metadata_filter,
286
+ content_ids=content_ids,
287
+ score_threshold=score_threshold,
288
+ )
289
+ return searches
290
+ except Exception as e:
291
+ _LOGGER.error(f"Error while searching content chunks: {e}")
292
+ raise e
293
+
294
+ def search_contents(
295
+ self,
296
+ *,
297
+ where: dict,
298
+ ) -> list[Content]:
299
+ """
300
+ Performs a search in the knowledge base by filter (and not a smilarity search)
301
+ This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.
302
+
303
+ Args:
304
+ where (dict): The search criteria.
305
+
306
+ Returns:
307
+ list[Content]: The search results.
308
+ """
309
+
310
+ return search_contents(
311
+ user_id=self._user_id,
312
+ company_id=self._company_id,
313
+ chat_id="",
314
+ where=where,
315
+ )
316
+
317
+ async def search_contents_async(
318
+ self,
319
+ *,
320
+ where: dict,
321
+ ) -> list[Content]:
322
+ """
323
+ Performs an asynchronous search for content files in the knowledge base by filter.
324
+
325
+ Args:
326
+ where (dict): The search criteria.
327
+
328
+ Returns:
329
+ list[Content]: The search results.
330
+ """
331
+
332
+ return await search_contents_async(
333
+ user_id=self._user_id,
334
+ company_id=self._company_id,
335
+ chat_id="",
336
+ where=where,
337
+ )
338
+
339
+ def upload_content_from_bytes(
340
+ self,
341
+ content: bytes,
342
+ *,
343
+ content_name: str,
344
+ mime_type: str,
345
+ scope_id: str,
346
+ skip_ingestion: bool = False,
347
+ ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
348
+ metadata: dict | None = None,
349
+ ) -> Content:
350
+ """
351
+ Uploads content to the knowledge base.
352
+
353
+ Args:
354
+ content (bytes): The content to upload.
355
+ content_name (str): The name of the content.
356
+ mime_type (str): The MIME type of the content.
357
+ scope_id (str | None): The scope ID. Defaults to None.
358
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
359
+ skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
360
+ ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
361
+ metadata (dict | None): The metadata to associate with the content. Defaults to None.
362
+
363
+ Returns:
364
+ Content: The uploaded content.
365
+ """
366
+
367
+ return upload_content_from_bytes(
368
+ user_id=self._user_id,
369
+ company_id=self._company_id,
370
+ content=content,
371
+ content_name=content_name,
372
+ mime_type=mime_type,
373
+ scope_id=scope_id,
374
+ chat_id="",
375
+ skip_ingestion=skip_ingestion,
376
+ ingestion_config=ingestion_config,
377
+ metadata=metadata,
378
+ )
379
+
380
+ def upload_content(
381
+ self,
382
+ path_to_content: str,
383
+ content_name: str,
384
+ mime_type: str,
385
+ scope_id: str,
386
+ skip_ingestion: bool = False,
387
+ skip_excel_ingestion: bool = False,
388
+ ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
389
+ metadata: dict[str, Any] | None = None,
390
+ ):
391
+ """
392
+ Uploads content to the knowledge base.
393
+
394
+ Args:
395
+ path_to_content (str): The path to the content to upload.
396
+ content_name (str): The name of the content.
397
+ mime_type (str): The MIME type of the content.
398
+ scope_id (str | None): The scope ID. Defaults to None.
399
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
400
+ skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
401
+ ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
402
+ metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.
403
+
404
+ Returns:
405
+ Content: The uploaded content.
406
+ """
407
+
408
+ return upload_content(
409
+ user_id=self._user_id,
410
+ company_id=self._company_id,
411
+ path_to_content=path_to_content,
412
+ content_name=content_name,
413
+ mime_type=mime_type,
414
+ scope_id=scope_id,
415
+ chat_id="",
416
+ skip_ingestion=skip_ingestion,
417
+ skip_excel_ingestion=skip_excel_ingestion,
418
+ ingestion_config=ingestion_config,
419
+ metadata=metadata,
420
+ )
421
+
422
+ def download_content_to_file(
423
+ self,
424
+ *,
425
+ content_id: str,
426
+ output_dir_path: Path | None = None,
427
+ output_filename: str | None = None,
428
+ ):
429
+ """
430
+ Downloads content from a chat and saves it to a file.
431
+
432
+ Args:
433
+ content_id (str): The ID of the content to download.
434
+ filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
435
+ tmp_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
436
+
437
+ Returns:
438
+ Path: The path to the downloaded file.
439
+
440
+ Raises:
441
+ Exception: If the download fails or the filename cannot be determined.
442
+ """
443
+
444
+ return download_content_to_file_by_id(
445
+ user_id=self._user_id,
446
+ company_id=self._company_id,
447
+ content_id=content_id,
448
+ chat_id="",
449
+ filename=output_filename,
450
+ tmp_dir_path=output_dir_path,
451
+ )
452
+
453
+ def download_content_to_bytes(
454
+ self,
455
+ *,
456
+ content_id: str,
457
+ ) -> bytes:
458
+ """
459
+ Downloads content to memory
460
+
461
+ Args:
462
+ content_id (str): The id of the uploaded content.
463
+ chat_id (Optional[str]): The chat_id, defaults to None.
464
+
465
+ Returns:
466
+ bytes: The downloaded content.
467
+
468
+ Raises:
469
+ Exception: If the download fails.
470
+ """
471
+
472
+ return download_content_to_bytes(
473
+ user_id=self._user_id,
474
+ company_id=self._company_id,
475
+ content_id=content_id,
476
+ chat_id=None,
477
+ )
478
+
479
+ def get_paginated_content_infos(
480
+ self,
481
+ *,
482
+ metadata_filter: dict[str, Any] | None = None,
483
+ skip: int | None = None,
484
+ take: int | None = None,
485
+ file_path: str | None = None,
486
+ ) -> PaginatedContentInfo:
487
+ return get_content_info(
488
+ user_id=self._user_id,
489
+ company_id=self._company_id,
490
+ metadata_filter=metadata_filter,
491
+ skip=skip,
492
+ take=take,
493
+ file_path=file_path,
494
+ )
495
+
496
+ def replace_content_metadata(
497
+ self,
498
+ *,
499
+ content_id: str,
500
+ metadata: dict[str, Any],
501
+ ) -> ContentInfo:
502
+ return update_content(
503
+ user_id=self._user_id,
504
+ company_id=self._company_id,
505
+ content_id=content_id,
506
+ metadata=metadata,
507
+ )
508
+
509
+
510
+ if __name__ == "__main__":
511
+ kb_service = KnowledgeBaseService.from_settings()
512
+
513
+ kb_service.search_contents(where={"metadata.key": "123"})
514
+ kb_service.search_content_chunks(
515
+ search_string="test",
516
+ search_type=ContentSearchType.VECTOR,
517
+ limit=10,
518
+ scope_ids=["123"],
519
+ metadata_filter={"key": "123"},
520
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.9.1
3
+ Version: 1.11.0
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -24,7 +24,7 @@ Requires-Dist: regex (>=2024.5.15,<2025.0.0)
24
24
  Requires-Dist: sseclient (>=0.0.27,<0.0.28)
25
25
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
26
26
  Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
27
- Requires-Dist: unique-sdk (>=0.10.0,<0.11.0)
27
+ Requires-Dist: unique-sdk (>=0.10.28,<0.11.0)
28
28
  Description-Content-Type: text/markdown
29
29
 
30
30
  # Unique Toolkit
@@ -118,6 +118,14 @@ All notable changes to this project will be documented in this file.
118
118
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
119
119
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
120
120
 
121
+ ## [1.11.0] - 2025-10-07
122
+ - Add sub-agent response referencing.
123
+
124
+ ## [1.10.0] - 2025-10-07
125
+ - Introduce future proof knowledgebase service decoupled from chat
126
+ - Extend chat service to download contents in the chat
127
+ - Update documentation
128
+
121
129
  ## [1.9.1] - 2025-10-06
122
130
  - Switch default model used in evaluation service from `GPT-3.5-turbo (0125)` to `GPT-4o (1120)`
123
131
 
@@ -841,3 +849,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
841
849
 
842
850
  ## [0.0.2] - 2024-07-10
843
851
  - Initial release of `unique_toolkit`.
852
+