unique_toolkit 0.7.9__py3-none-any.whl → 1.33.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. unique_toolkit/__init__.py +36 -3
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +357 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +225 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +368 -0
  16. unique_toolkit/_common/endpoint_requestor.py +480 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/experimental/endpoint_builder.py +368 -0
  19. unique_toolkit/_common/experimental/endpoint_requestor.py +488 -0
  20. unique_toolkit/_common/feature_flags/schema.py +9 -0
  21. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  22. unique_toolkit/_common/pydantic_helpers.py +174 -0
  23. unique_toolkit/_common/referencing.py +53 -0
  24. unique_toolkit/_common/string_utilities.py +140 -0
  25. unique_toolkit/_common/tests/test_referencing.py +521 -0
  26. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  27. unique_toolkit/_common/token/image_token_counting.py +67 -0
  28. unique_toolkit/_common/token/token_counting.py +204 -0
  29. unique_toolkit/_common/utils/__init__.py +1 -0
  30. unique_toolkit/_common/utils/files.py +43 -0
  31. unique_toolkit/_common/utils/image/encode.py +25 -0
  32. unique_toolkit/_common/utils/jinja/helpers.py +10 -0
  33. unique_toolkit/_common/utils/jinja/render.py +18 -0
  34. unique_toolkit/_common/utils/jinja/schema.py +65 -0
  35. unique_toolkit/_common/utils/jinja/utils.py +80 -0
  36. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  37. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  38. unique_toolkit/_common/utils/write_configuration.py +51 -0
  39. unique_toolkit/_common/validators.py +101 -4
  40. unique_toolkit/agentic/__init__.py +1 -0
  41. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  42. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  43. unique_toolkit/agentic/evaluation/config.py +36 -0
  44. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  45. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  46. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  47. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  48. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  49. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +112 -0
  50. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  51. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +20 -16
  52. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +32 -21
  53. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  54. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  55. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  56. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  57. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +298 -0
  58. unique_toolkit/agentic/history_manager/history_manager.py +241 -0
  59. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  60. unique_toolkit/agentic/history_manager/utils.py +96 -0
  61. unique_toolkit/agentic/message_log_manager/__init__.py +5 -0
  62. unique_toolkit/agentic/message_log_manager/service.py +93 -0
  63. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  64. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  65. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  66. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +71 -0
  67. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +297 -0
  68. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  69. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  70. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  71. unique_toolkit/agentic/tools/__init__.py +1 -0
  72. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  73. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  74. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  75. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  76. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  77. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  78. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  79. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  80. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  81. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +240 -0
  82. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +84 -0
  83. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +78 -0
  84. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +264 -0
  85. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  86. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display.py +421 -0
  87. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +2103 -0
  88. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  89. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  90. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  91. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  92. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  93. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  94. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  95. unique_toolkit/agentic/tools/a2a/tool/config.py +158 -0
  96. unique_toolkit/agentic/tools/a2a/tool/service.py +393 -0
  97. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  98. unique_toolkit/agentic/tools/config.py +128 -0
  99. unique_toolkit/agentic/tools/factory.py +44 -0
  100. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  101. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  102. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  103. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  104. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  105. unique_toolkit/agentic/tools/openai_builtin/base.py +46 -0
  106. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  107. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +88 -0
  108. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +250 -0
  109. unique_toolkit/agentic/tools/openai_builtin/manager.py +79 -0
  110. unique_toolkit/agentic/tools/schemas.py +145 -0
  111. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  112. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  113. unique_toolkit/agentic/tools/tool.py +187 -0
  114. unique_toolkit/agentic/tools/tool_manager.py +492 -0
  115. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  116. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  117. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  118. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  119. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  120. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  121. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  122. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  123. unique_toolkit/app/__init__.py +9 -0
  124. unique_toolkit/app/dev_util.py +180 -0
  125. unique_toolkit/app/fast_api_factory.py +131 -0
  126. unique_toolkit/app/init_sdk.py +32 -1
  127. unique_toolkit/app/schemas.py +206 -31
  128. unique_toolkit/app/unique_settings.py +367 -0
  129. unique_toolkit/app/webhook.py +77 -0
  130. unique_toolkit/chat/__init__.py +8 -1
  131. unique_toolkit/chat/deprecated/service.py +232 -0
  132. unique_toolkit/chat/functions.py +648 -78
  133. unique_toolkit/chat/rendering.py +34 -0
  134. unique_toolkit/chat/responses_api.py +461 -0
  135. unique_toolkit/chat/schemas.py +134 -2
  136. unique_toolkit/chat/service.py +115 -767
  137. unique_toolkit/content/functions.py +353 -8
  138. unique_toolkit/content/schemas.py +128 -15
  139. unique_toolkit/content/service.py +321 -45
  140. unique_toolkit/content/smart_rules.py +301 -0
  141. unique_toolkit/content/utils.py +10 -3
  142. unique_toolkit/data_extraction/README.md +96 -0
  143. unique_toolkit/data_extraction/__init__.py +11 -0
  144. unique_toolkit/data_extraction/augmented/__init__.py +5 -0
  145. unique_toolkit/data_extraction/augmented/service.py +93 -0
  146. unique_toolkit/data_extraction/base.py +25 -0
  147. unique_toolkit/data_extraction/basic/__init__.py +11 -0
  148. unique_toolkit/data_extraction/basic/config.py +18 -0
  149. unique_toolkit/data_extraction/basic/prompt.py +13 -0
  150. unique_toolkit/data_extraction/basic/service.py +55 -0
  151. unique_toolkit/embedding/service.py +103 -12
  152. unique_toolkit/framework_utilities/__init__.py +1 -0
  153. unique_toolkit/framework_utilities/langchain/__init__.py +10 -0
  154. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  155. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  156. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  157. unique_toolkit/framework_utilities/openai/client.py +84 -0
  158. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  159. unique_toolkit/framework_utilities/utils.py +23 -0
  160. unique_toolkit/language_model/__init__.py +3 -0
  161. unique_toolkit/language_model/_responses_api_utils.py +93 -0
  162. unique_toolkit/language_model/builder.py +27 -11
  163. unique_toolkit/language_model/default_language_model.py +3 -0
  164. unique_toolkit/language_model/functions.py +345 -43
  165. unique_toolkit/language_model/infos.py +1288 -46
  166. unique_toolkit/language_model/reference.py +242 -0
  167. unique_toolkit/language_model/schemas.py +481 -49
  168. unique_toolkit/language_model/service.py +229 -28
  169. unique_toolkit/protocols/support.py +145 -0
  170. unique_toolkit/services/__init__.py +7 -0
  171. unique_toolkit/services/chat_service.py +1631 -0
  172. unique_toolkit/services/knowledge_base.py +1094 -0
  173. unique_toolkit/short_term_memory/service.py +178 -41
  174. unique_toolkit/smart_rules/__init__.py +0 -0
  175. unique_toolkit/smart_rules/compile.py +56 -0
  176. unique_toolkit/test_utilities/events.py +197 -0
  177. unique_toolkit-1.33.3.dist-info/METADATA +1145 -0
  178. unique_toolkit-1.33.3.dist-info/RECORD +205 -0
  179. unique_toolkit/evaluators/__init__.py +0 -1
  180. unique_toolkit/evaluators/config.py +0 -35
  181. unique_toolkit/evaluators/constants.py +0 -1
  182. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  183. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  184. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  185. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  186. unique_toolkit-0.7.9.dist-info/METADATA +0 -413
  187. unique_toolkit-0.7.9.dist-info/RECORD +0 -64
  188. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  189. {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/LICENSE +0 -0
  190. {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1094 @@
1
+ import asyncio
2
+ import logging
3
+ import mimetypes
4
+ from pathlib import Path, PurePath
5
+ from typing import Any, Callable, overload
6
+
7
+ import humps
8
+ import unique_sdk
9
+
10
+ from unique_toolkit._common.validate_required_values import validate_required_values
11
+ from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
12
+ from unique_toolkit.app.unique_settings import UniqueSettings
13
+ from unique_toolkit.content.constants import (
14
+ DEFAULT_SEARCH_LANGUAGE,
15
+ )
16
+ from unique_toolkit.content.functions import (
17
+ delete_content,
18
+ delete_content_async,
19
+ download_content_to_bytes,
20
+ download_content_to_file_by_id,
21
+ get_content_info,
22
+ get_folder_info,
23
+ search_content_chunks,
24
+ search_content_chunks_async,
25
+ search_contents,
26
+ search_contents_async,
27
+ update_content,
28
+ upload_content,
29
+ upload_content_from_bytes,
30
+ upload_content_from_bytes_async,
31
+ )
32
+ from unique_toolkit.content.schemas import (
33
+ BaseFolderInfo,
34
+ Content,
35
+ ContentChunk,
36
+ ContentInfo,
37
+ ContentRerankerConfig,
38
+ ContentSearchType,
39
+ DeleteContentResponse,
40
+ FolderInfo,
41
+ PaginatedContentInfos,
42
+ )
43
+ from unique_toolkit.content.smart_rules import Operator, Statement
44
+
45
+ _LOGGER = logging.getLogger(f"toolkit.knowledge_base.{__name__}")
46
+
47
+ _DEFAULT_SCORE_THRESHOLD: float = 0.5
48
+
49
+
50
+ class KnowledgeBaseService:
51
+ """
52
+ Provides methods for searching, downloading and uploading content in the knowledge base.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ company_id: str,
58
+ user_id: str,
59
+ metadata_filter: dict | None = None,
60
+ ):
61
+ """
62
+ Initialize the ContentService with a company_id, user_id and chat_id.
63
+ """
64
+
65
+ self._metadata_filter = None
66
+ [company_id, user_id] = validate_required_values([company_id, user_id])
67
+ self._company_id = company_id
68
+ self._user_id = user_id
69
+ self._metadata_filter = metadata_filter
70
+
71
+ @classmethod
72
+ def from_event(cls, event: BaseEvent):
73
+ """
74
+ Initialize the ContentService with an event.
75
+ """
76
+ metadata_filter = None
77
+
78
+ if isinstance(event, (ChatEvent | Event)):
79
+ metadata_filter = event.payload.metadata_filter
80
+
81
+ return cls(
82
+ company_id=event.company_id,
83
+ user_id=event.user_id,
84
+ metadata_filter=metadata_filter,
85
+ )
86
+
87
+ @classmethod
88
+ def from_settings(
89
+ cls,
90
+ settings: UniqueSettings | str | None = None,
91
+ metadata_filter: dict | None = None,
92
+ ):
93
+ """
94
+ Initialize the ContentService with a settings object and metadata filter.
95
+ """
96
+
97
+ if settings is None:
98
+ settings = UniqueSettings.from_env_auto_with_sdk_init()
99
+ elif isinstance(settings, str):
100
+ settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)
101
+
102
+ return cls(
103
+ company_id=settings.auth.company_id.get_secret_value(),
104
+ user_id=settings.auth.user_id.get_secret_value(),
105
+ metadata_filter=metadata_filter,
106
+ )
107
+
108
+ # Content Search
109
+ # ------------------------------------------------------------------------------------------------
110
+
111
+ @overload
112
+ def search_content_chunks(
113
+ self,
114
+ *,
115
+ search_string: str,
116
+ search_type: ContentSearchType,
117
+ limit: int,
118
+ scope_ids: list[str],
119
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
120
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
121
+ reranker_config: ContentRerankerConfig | None = None,
122
+ ) -> list[ContentChunk]: ...
123
+
124
+ @overload
125
+ def search_content_chunks(
126
+ self,
127
+ *,
128
+ search_string: str,
129
+ search_type: ContentSearchType,
130
+ limit: int,
131
+ metadata_filter: dict,
132
+ scope_ids: list[str] | None = None,
133
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
134
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
135
+ reranker_config: ContentRerankerConfig | None = None,
136
+ ) -> list[ContentChunk]: ...
137
+
138
+ @overload
139
+ def search_content_chunks(
140
+ self,
141
+ *,
142
+ search_string: str,
143
+ search_type: ContentSearchType,
144
+ limit: int,
145
+ metadata_filter: dict,
146
+ content_ids: list[str],
147
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
148
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
149
+ reranker_config: ContentRerankerConfig | None = None,
150
+ ) -> list[ContentChunk]: ...
151
+
152
+ def search_content_chunks(
153
+ self,
154
+ *,
155
+ search_string: str,
156
+ search_type: ContentSearchType,
157
+ limit: int,
158
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
159
+ reranker_config: ContentRerankerConfig | None = None,
160
+ scope_ids: list[str] | None = None,
161
+ metadata_filter: dict | None = None,
162
+ content_ids: list[str] | None = None,
163
+ score_threshold: float | None = None,
164
+ ) -> list[ContentChunk]:
165
+ """
166
+ Performs a synchronous search for content chunks in the knowledge base.
167
+
168
+ Args:
169
+ search_string (str): The search string.
170
+ search_type (ContentSearchType): The type of search to perform.
171
+ limit (int): The maximum number of results to return.
172
+ search_language (str, optional): The language for the full-text search. Defaults to "english".
173
+ reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
174
+ scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
175
+ metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
176
+ content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
177
+ score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.
178
+
179
+ Returns:
180
+ list[ContentChunk]: The search results.
181
+
182
+ Raises:
183
+ Exception: If there's an error during the search operation.
184
+ """
185
+
186
+ if metadata_filter is None:
187
+ metadata_filter = self._metadata_filter
188
+
189
+ try:
190
+ searches = search_content_chunks(
191
+ user_id=self._user_id,
192
+ company_id=self._company_id,
193
+ chat_id="",
194
+ search_string=search_string,
195
+ search_type=search_type,
196
+ limit=limit,
197
+ search_language=search_language,
198
+ reranker_config=reranker_config,
199
+ scope_ids=scope_ids,
200
+ chat_only=False,
201
+ metadata_filter=metadata_filter,
202
+ content_ids=content_ids,
203
+ score_threshold=score_threshold,
204
+ )
205
+ return searches
206
+ except Exception as e:
207
+ _LOGGER.error(f"Error while searching content chunks: {e}")
208
+ raise e
209
+
210
+ @overload
211
+ async def search_content_chunks_async(
212
+ self,
213
+ *,
214
+ search_string: str,
215
+ search_type: ContentSearchType,
216
+ limit: int,
217
+ scope_ids: list[str],
218
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
219
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
220
+ reranker_config: ContentRerankerConfig | None = None,
221
+ ) -> list[ContentChunk]: ...
222
+
223
+ @overload
224
+ async def search_content_chunks_async(
225
+ self,
226
+ *,
227
+ search_string: str,
228
+ search_type: ContentSearchType,
229
+ limit: int,
230
+ metadata_filter: dict,
231
+ scope_ids: list[str] | None = None,
232
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
233
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
234
+ reranker_config: ContentRerankerConfig | None = None,
235
+ ) -> list[ContentChunk]: ...
236
+
237
+ @overload
238
+ async def search_content_chunks_async(
239
+ self,
240
+ *,
241
+ search_string: str,
242
+ search_type: ContentSearchType,
243
+ limit: int,
244
+ metadata_filter: dict,
245
+ content_ids: list[str],
246
+ score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
247
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
248
+ reranker_config: ContentRerankerConfig | None = None,
249
+ ) -> list[ContentChunk]: ...
250
+
251
+ async def search_content_chunks_async(
252
+ self,
253
+ *,
254
+ search_string: str,
255
+ search_type: ContentSearchType,
256
+ limit: int,
257
+ search_language: str = DEFAULT_SEARCH_LANGUAGE,
258
+ reranker_config: ContentRerankerConfig | None = None,
259
+ scope_ids: list[str] | None = None,
260
+ metadata_filter: dict | None = None,
261
+ content_ids: list[str] | None = None,
262
+ score_threshold: float | None = None,
263
+ ):
264
+ """
265
+ Performs an asynchronous search for content chunks in the knowledge base.
266
+
267
+ Args:
268
+ search_string (str): The search string.
269
+ search_type (ContentSearchType): The type of search to perform.
270
+ limit (int): The maximum number of results to return.
271
+ search_language (str, optional): The language for the full-text search. Defaults to "english".
272
+ reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
273
+ scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
274
+ metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
275
+ content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
276
+ score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.
277
+
278
+ Returns:
279
+ list[ContentChunk]: The search results.
280
+
281
+ Raises:
282
+ Exception: If there's an error during the search operation.
283
+ """
284
+ if metadata_filter is None:
285
+ metadata_filter = self._metadata_filter
286
+
287
+ try:
288
+ searches = await search_content_chunks_async(
289
+ user_id=self._user_id,
290
+ company_id=self._company_id,
291
+ chat_id="",
292
+ search_string=search_string,
293
+ search_type=search_type,
294
+ limit=limit,
295
+ search_language=search_language,
296
+ reranker_config=reranker_config,
297
+ scope_ids=scope_ids,
298
+ chat_only=False,
299
+ metadata_filter=metadata_filter,
300
+ content_ids=content_ids,
301
+ score_threshold=score_threshold,
302
+ )
303
+ return searches
304
+ except Exception as e:
305
+ _LOGGER.error(f"Error while searching content chunks: {e}")
306
+ raise e
307
+
308
+ def search_contents(
309
+ self,
310
+ *,
311
+ where: dict,
312
+ include_failed_content: bool = False,
313
+ ) -> list[Content]:
314
+ """
315
+ Performs a search in the knowledge base by filter (and not a smilarity search)
316
+ This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.
317
+
318
+ Args:
319
+ where (dict): The search criteria.
320
+
321
+ Returns:
322
+ list[Content]: The search results.
323
+ """
324
+
325
+ return search_contents(
326
+ user_id=self._user_id,
327
+ company_id=self._company_id,
328
+ chat_id="",
329
+ where=where,
330
+ include_failed_content=include_failed_content,
331
+ )
332
+
333
+ async def search_contents_async(
334
+ self,
335
+ *,
336
+ where: dict,
337
+ include_failed_content: bool = False,
338
+ ) -> list[Content]:
339
+ """
340
+ Performs an asynchronous search for content files in the knowledge base by filter.
341
+
342
+ Args:
343
+ where (dict): The search criteria.
344
+
345
+ Returns:
346
+ list[Content]: The search results.
347
+ """
348
+
349
+ return await search_contents_async(
350
+ user_id=self._user_id,
351
+ company_id=self._company_id,
352
+ chat_id="",
353
+ where=where,
354
+ include_failed_content=include_failed_content,
355
+ )
356
+
357
+ # Content Management
358
+ # ------------------------------------------------------------------------------------------------
359
+
360
+ def upload_content_from_bytes(
361
+ self,
362
+ content: bytes,
363
+ *,
364
+ content_name: str,
365
+ mime_type: str,
366
+ scope_id: str,
367
+ skip_ingestion: bool = False,
368
+ ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
369
+ metadata: dict | None = None,
370
+ ) -> Content:
371
+ """
372
+ Uploads content to the knowledge base.
373
+
374
+ Args:
375
+ content (bytes): The content to upload.
376
+ content_name (str): The name of the content.
377
+ mime_type (str): The MIME type of the content.
378
+ scope_id (str | None): The scope ID. Defaults to None.
379
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
380
+ ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
381
+ metadata (dict | None): The metadata to associate with the content. Defaults to None.
382
+
383
+ Returns:
384
+ Content: The uploaded content.
385
+ """
386
+
387
+ return upload_content_from_bytes(
388
+ user_id=self._user_id,
389
+ company_id=self._company_id,
390
+ content=content,
391
+ content_name=content_name,
392
+ mime_type=mime_type,
393
+ scope_id=scope_id,
394
+ chat_id="",
395
+ skip_ingestion=skip_ingestion,
396
+ ingestion_config=ingestion_config,
397
+ metadata=metadata,
398
+ )
399
+
400
+ async def upload_content_from_bytes_async(
401
+ self,
402
+ content: bytes,
403
+ *,
404
+ content_name: str,
405
+ mime_type: str,
406
+ scope_id: str,
407
+ skip_ingestion: bool = False,
408
+ ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
409
+ metadata: dict | None = None,
410
+ ) -> Content:
411
+ """
412
+ Uploads content to the knowledge base.
413
+
414
+ Args:
415
+ content (bytes): The content to upload.
416
+ content_name (str): The name of the content.
417
+ mime_type (str): The MIME type of the content.
418
+ scope_id (str | None): The scope ID. Defaults to None.
419
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
420
+ skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
421
+ ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
422
+ metadata (dict | None): The metadata to associate with the content. Defaults to None.
423
+
424
+ Returns:
425
+ Content: The uploaded content.
426
+ """
427
+
428
+ return await upload_content_from_bytes_async(
429
+ user_id=self._user_id,
430
+ company_id=self._company_id,
431
+ content=content,
432
+ content_name=content_name,
433
+ mime_type=mime_type,
434
+ scope_id=scope_id,
435
+ chat_id="",
436
+ skip_ingestion=skip_ingestion,
437
+ ingestion_config=ingestion_config,
438
+ metadata=metadata,
439
+ )
440
+
441
+ def upload_content(
442
+ self,
443
+ path_to_content: str,
444
+ content_name: str,
445
+ mime_type: str,
446
+ scope_id: str,
447
+ skip_ingestion: bool = False,
448
+ skip_excel_ingestion: bool = False,
449
+ ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
450
+ metadata: dict[str, Any] | None = None,
451
+ ) -> Content:
452
+ """
453
+ Uploads content to the knowledge base.
454
+
455
+ Args:
456
+ path_to_content (str): The path to the content to upload.
457
+ content_name (str): The name of the content.
458
+ mime_type (str): The MIME type of the content.
459
+ scope_id (str | None): The scope ID. Defaults to None.
460
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
461
+ skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
462
+ ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
463
+ metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.
464
+
465
+ Returns:
466
+ Content: The uploaded content.
467
+ """
468
+
469
+ return upload_content(
470
+ user_id=self._user_id,
471
+ company_id=self._company_id,
472
+ path_to_content=path_to_content,
473
+ content_name=content_name,
474
+ mime_type=mime_type,
475
+ scope_id=scope_id,
476
+ chat_id="",
477
+ skip_ingestion=skip_ingestion,
478
+ skip_excel_ingestion=skip_excel_ingestion,
479
+ ingestion_config=ingestion_config,
480
+ metadata=metadata,
481
+ )
482
+
483
+ def download_content_to_file(
484
+ self,
485
+ *,
486
+ content_id: str,
487
+ output_dir_path: Path | None = None,
488
+ output_filename: str | None = None,
489
+ ) -> Path:
490
+ """
491
+ Downloads content from a chat and saves it to a file.
492
+
493
+ Args:
494
+ content_id (str): The ID of the content to download.
495
+ output_filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
496
+ output_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
497
+
498
+ Returns:
499
+ Path: The path to the downloaded file.
500
+
501
+ Raises:
502
+ Exception: If the download fails or the filename cannot be determined.
503
+ """
504
+
505
+ return download_content_to_file_by_id(
506
+ user_id=self._user_id,
507
+ company_id=self._company_id,
508
+ content_id=content_id,
509
+ chat_id="",
510
+ filename=output_filename,
511
+ tmp_dir_path=output_dir_path,
512
+ )
513
+
514
+ def download_content_to_bytes(
515
+ self,
516
+ *,
517
+ content_id: str,
518
+ ) -> bytes:
519
+ """
520
+ Downloads content to memory
521
+
522
+ Args:
523
+ content_id (str): The id of the uploaded content.
524
+
525
+ Returns:
526
+ bytes: The downloaded content.
527
+
528
+ Raises:
529
+ Exception: If the download fails.
530
+ """
531
+
532
+ return download_content_to_bytes(
533
+ user_id=self._user_id,
534
+ company_id=self._company_id,
535
+ content_id=content_id,
536
+ chat_id=None,
537
+ )
538
+
539
+ def batch_file_upload(
540
+ self,
541
+ *,
542
+ local_files: list[Path],
543
+ remote_folders: list[PurePath],
544
+ overwrite: bool = False,
545
+ metadata_generator: Callable[[Path, PurePath], dict[str, Any]] | None = None,
546
+ ) -> None:
547
+ """
548
+ Upload files to the knowledge base into corresponding folders
549
+
550
+ Args:
551
+ local_files (list[Path]): The local files to upload
552
+ remote_folders (list[PurePath]): The remote folders to upload the files to
553
+ overwrite (bool): Whether to overwrite existing files
554
+ metadata_generator (Callable[[Path, PurePath], dict[str, Any]] | None): The metadata generator function
555
+
556
+ Returns:
557
+ None
558
+ """
559
+
560
+ if len(local_files) != len(remote_folders):
561
+ raise ValueError(
562
+ "The number of local files and remote folders must be the same"
563
+ )
564
+
565
+ creation_result = self.create_folders(paths=remote_folders)
566
+
567
+ folders_path_to_scope_id = {
568
+ folder_path: result.id
569
+ for folder_path, result in zip(remote_folders, creation_result)
570
+ }
571
+
572
+ _old_scope_id = None
573
+ _existing_file_names: list[str] = []
574
+
575
+ for remote_folder_path, local_file_path in zip(remote_folders, local_files):
576
+ scope_id = folders_path_to_scope_id[remote_folder_path]
577
+ mime_type = mimetypes.guess_type(local_file_path.name)[0]
578
+
579
+ if mime_type is None:
580
+ _LOGGER.warning(
581
+ f"No mime type found for file {local_file_path.name}, skipping"
582
+ )
583
+ continue
584
+
585
+ if not overwrite:
586
+ if _old_scope_id is None or _old_scope_id != scope_id:
587
+ _LOGGER.debug(f"Switching to new folder {scope_id}")
588
+ _old_scope_id = scope_id
589
+ _existing_file_names = self.get_file_names_in_folder(
590
+ scope_id=scope_id
591
+ )
592
+
593
+ if local_file_path.name in _existing_file_names:
594
+ _LOGGER.warning(
595
+ f"File {local_file_path.name} already exists in folder {scope_id}, skipping"
596
+ )
597
+ continue
598
+
599
+ metadata = None
600
+ if metadata_generator is not None:
601
+ metadata = metadata_generator(local_file_path, remote_folder_path)
602
+
603
+ self.upload_content(
604
+ path_to_content=str(local_file_path),
605
+ content_name=local_file_path.name,
606
+ mime_type=mime_type,
607
+ scope_id=scope_id,
608
+ metadata=metadata,
609
+ )
610
+
611
+ # Content Information
612
+ # ------------------------------------------------------------------------------------------------
613
+ def get_paginated_content_infos(
614
+ self,
615
+ *,
616
+ metadata_filter: dict[str, Any] | None = None,
617
+ skip: int | None = None,
618
+ take: int | None = None,
619
+ file_path: str | None = None,
620
+ ) -> PaginatedContentInfos:
621
+ return get_content_info(
622
+ user_id=self._user_id,
623
+ company_id=self._company_id,
624
+ metadata_filter=metadata_filter,
625
+ skip=skip,
626
+ take=take,
627
+ file_path=file_path,
628
+ )
629
+
630
+ def get_file_names_in_folder(self, *, scope_id: str) -> list[str]:
631
+ """
632
+ Get the list of file names in a knowledge base folder
633
+
634
+ Args:
635
+ scope_id (str): The scope id of the folder
636
+
637
+ Returns:
638
+ list[str]: The list of file names in the folder
639
+ """
640
+ smart_rule = Statement(
641
+ operator=Operator.EQUALS, value=scope_id, path=["folderId"]
642
+ )
643
+ infos = self.get_paginated_content_infos(
644
+ metadata_filter=smart_rule.model_dump(mode="json")
645
+ )
646
+ return [i.key for i in infos.content_infos]
647
+
648
+ # Folder Management
649
+ # ------------------------------------------------------------------------------------------------
650
+
651
+ def get_folder_info(
652
+ self,
653
+ *,
654
+ scope_id: str,
655
+ ) -> FolderInfo:
656
+ return get_folder_info(
657
+ user_id=self._user_id,
658
+ company_id=self._company_id,
659
+ scope_id=scope_id,
660
+ )
661
+
662
+ def _resolve_visible_file_tree(self, content_infos: list[ContentInfo]) -> list[str]:
663
+ # collect all scope ids
664
+ folder_id_paths: set[str] = set()
665
+ known_folder_paths: set[str] = set()
666
+ for content_info in content_infos:
667
+ if (
668
+ content_info.metadata
669
+ and content_info.metadata.get(r"{FullPath}") is not None
670
+ ):
671
+ known_folder_paths.add(str(content_info.metadata.get(r"{FullPath}")))
672
+ continue
673
+
674
+ if (
675
+ content_info.metadata
676
+ and content_info.metadata.get("folderIdPath") is not None
677
+ ):
678
+ folder_id_paths.add(str(content_info.metadata.get("folderIdPath")))
679
+
680
+ scope_ids: set[str] = set()
681
+ for fp in folder_id_paths:
682
+ scope_ids_list = set(fp.replace("uniquepathid://", "").split("/"))
683
+ scope_ids.update(scope_ids_list)
684
+
685
+ scope_id_to_folder_name: dict[str, str] = {}
686
+ for scope_id in scope_ids:
687
+ folder_info = self.get_folder_info(
688
+ scope_id=scope_id,
689
+ )
690
+ scope_id_to_folder_name[scope_id] = folder_info.name
691
+
692
+ folder_paths: set[str] = set()
693
+ for folder_id_path in folder_id_paths:
694
+ scope_ids_list = folder_id_path.replace("uniquepathid://", "").split("/")
695
+
696
+ if all(scope_id in scope_id_to_folder_name for scope_id in scope_ids_list):
697
+ folder_path = [
698
+ scope_id_to_folder_name[scope_id] for scope_id in scope_ids_list
699
+ ]
700
+ folder_paths.add("/".join(folder_path))
701
+
702
+ return [
703
+ p if p.startswith("/") else f"/{p}"
704
+ for p in folder_paths.union(known_folder_paths)
705
+ ]
706
+
707
+ def resolve_visible_file_tree(
708
+ self, *, metadata_filter: dict[str, Any] | None = None
709
+ ) -> list[str]:
710
+ """
711
+ Resolves the visible file tree for the knowledge base for the current user.
712
+
713
+ Args:
714
+ metadata_filter (dict[str, Any] | None): The metadata filter to use. Defaults to None.
715
+
716
+ Returns:
717
+ list[str]: The visible file tree.
718
+
719
+
720
+
721
+ """
722
+ info = self.get_paginated_content_infos(
723
+ metadata_filter=metadata_filter,
724
+ )
725
+
726
+ return self._resolve_visible_file_tree(content_infos=info.content_infos)
727
+
728
+ def _pop_forbidden_metadata_keys(self, metadata: dict[str, Any]) -> dict[str, Any]:
729
+ forbidden_keys = [
730
+ "key",
731
+ "url",
732
+ "title",
733
+ "folderId",
734
+ "mimeType",
735
+ "companyId",
736
+ "contentId",
737
+ "folderIdPath",
738
+ "externalFileOwner",
739
+ ]
740
+ for key in forbidden_keys:
741
+ metadata.pop(key, None)
742
+ return metadata
743
+
744
+ def create_folders(self, *, paths: list[PurePath]) -> list[BaseFolderInfo]:
745
+ """
746
+ Create folders in the knowledge base if the path does not exists.
747
+
748
+ Args:
749
+ paths (list[PurePath]): The paths to create the folders at
750
+
751
+ Returns:
752
+ list[BaseFolderInfo]: The information about the created folders or existing folders
753
+ """
754
+ result = unique_sdk.Folder.create_paths(
755
+ user_id=self._user_id,
756
+ company_id=self._company_id,
757
+ paths=[path.as_posix() for path in paths],
758
+ )
759
+ return [
760
+ BaseFolderInfo.model_validate(folder, by_alias=True, by_name=True)
761
+ for folder in result["createdFolders"]
762
+ ]
763
+
764
+ # Metadata
765
+
766
+ # Metadata Management
767
+ # ------------------------------------------------------------------------------------------------
768
+
769
+ def replace_content_metadata(
770
+ self,
771
+ *,
772
+ content_id: str,
773
+ metadata: dict[str, Any],
774
+ ) -> ContentInfo:
775
+ return update_content(
776
+ user_id=self._user_id,
777
+ company_id=self._company_id,
778
+ content_id=content_id,
779
+ metadata=metadata,
780
+ )
781
+
782
+ def update_content_metadata(
783
+ self,
784
+ *,
785
+ content_info: ContentInfo,
786
+ additional_metadata: dict[str, Any],
787
+ ) -> ContentInfo:
788
+ camelized_additional_metadata = humps.camelize(additional_metadata)
789
+ camelized_additional_metadata = self._pop_forbidden_metadata_keys(
790
+ camelized_additional_metadata
791
+ )
792
+
793
+ if content_info.metadata is not None:
794
+ content_info.metadata.update(camelized_additional_metadata)
795
+ else:
796
+ content_info.metadata = camelized_additional_metadata
797
+
798
+ return update_content(
799
+ user_id=self._user_id,
800
+ company_id=self._company_id,
801
+ content_id=content_info.id,
802
+ metadata=content_info.metadata,
803
+ )
804
+
805
+ def remove_content_metadata(
806
+ self,
807
+ *,
808
+ content_info: ContentInfo,
809
+ keys_to_remove: list[str],
810
+ ) -> ContentInfo:
811
+ """
812
+ Removes the specified keys irreversibly from the content metadata.
813
+
814
+ Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
815
+ """
816
+
817
+ if content_info.metadata is None:
818
+ _LOGGER.warning(f"Content metadata is None for content {content_info.id}")
819
+ return content_info
820
+
821
+ for key in keys_to_remove:
822
+ content_info.metadata[humps.camelize(key)] = None
823
+
824
+ return update_content(
825
+ user_id=self._user_id,
826
+ company_id=self._company_id,
827
+ content_id=content_info.id,
828
+ metadata=content_info.metadata or {},
829
+ )
830
+
831
+ @overload
832
+ def update_contents_metadata(
833
+ self,
834
+ *,
835
+ additional_metadata: dict[str, Any],
836
+ content_infos: list[ContentInfo],
837
+ ) -> list[ContentInfo]: ...
838
+
839
+ @overload
840
+ def update_contents_metadata(
841
+ self, *, additional_metadata: dict[str, Any], metadata_filter: dict[str, Any]
842
+ ) -> list[ContentInfo]: ...
843
+
844
+ def update_contents_metadata(
845
+ self,
846
+ *,
847
+ additional_metadata: dict[str, Any],
848
+ metadata_filter: dict[str, Any] | None = None,
849
+ content_infos: list[ContentInfo] | None = None,
850
+ ) -> list[ContentInfo]:
851
+ """Update the metadata of the contents matching the metadata filter.
852
+
853
+ Note: Keys are camelized before being updated as metadata keys are stored in camelCase.
854
+ """
855
+
856
+ additional_metadata_camelized = humps.camelize(additional_metadata)
857
+ additional_metadata_camelized = self._pop_forbidden_metadata_keys(
858
+ additional_metadata_camelized
859
+ )
860
+
861
+ if content_infos is None:
862
+ content_infos = self.get_paginated_content_infos(
863
+ metadata_filter=metadata_filter,
864
+ ).content_infos
865
+
866
+ for info in content_infos:
867
+ self.update_content_metadata(
868
+ content_info=info, additional_metadata=additional_metadata_camelized
869
+ )
870
+
871
+ return content_infos
872
+
873
+ @overload
874
+ def remove_contents_metadata(
875
+ self,
876
+ *,
877
+ keys_to_remove: list[str],
878
+ content_infos: list[ContentInfo],
879
+ ) -> list[ContentInfo]: ...
880
+
881
+ @overload
882
+ def remove_contents_metadata(
883
+ self, *, keys_to_remove: list[str], metadata_filter: dict[str, Any]
884
+ ) -> list[ContentInfo]: ...
885
+
886
+ def remove_contents_metadata(
887
+ self,
888
+ *,
889
+ keys_to_remove: list[str],
890
+ metadata_filter: dict[str, Any] | None = None,
891
+ content_infos: list[ContentInfo] | None = None,
892
+ ) -> list[ContentInfo]:
893
+ """Remove the specified keys irreversibly from the content metadata.
894
+
895
+ Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
896
+
897
+ """
898
+
899
+ if content_infos is None:
900
+ content_infos = self.get_paginated_content_infos(
901
+ metadata_filter=metadata_filter,
902
+ ).content_infos
903
+
904
+ for info in content_infos:
905
+ self.remove_content_metadata(
906
+ content_info=info, keys_to_remove=keys_to_remove
907
+ )
908
+
909
+ return content_infos
910
+
911
+ # Delete
912
+ # ------------------------------------------------------------------------------------------------
913
+
914
+ @overload
915
+ def delete_content(
916
+ self,
917
+ *,
918
+ content_id: str,
919
+ ) -> DeleteContentResponse: ...
920
+
921
+ """Delete content by id"""
922
+
923
+ @overload
924
+ def delete_content(
925
+ self,
926
+ *,
927
+ file_path: str,
928
+ ) -> DeleteContentResponse: ...
929
+
930
+ """Delete all content matching the file path"""
931
+
932
+ def delete_content(
933
+ self,
934
+ *,
935
+ content_id: str | None = None,
936
+ file_path: str | None = None,
937
+ ) -> DeleteContentResponse:
938
+ """Delete content by id, file path or metadata filter"""
939
+
940
+ return delete_content(
941
+ user_id=self._user_id,
942
+ company_id=self._company_id,
943
+ content_id=content_id,
944
+ file_path=file_path,
945
+ )
946
+
947
+ def delete_contents(
948
+ self,
949
+ *,
950
+ metadata_filter: dict[str, Any],
951
+ ) -> list[DeleteContentResponse]:
952
+ """Delete all content matching the metadata filter"""
953
+ resp: list[DeleteContentResponse] = []
954
+
955
+ if metadata_filter:
956
+ infos = self.get_paginated_content_infos(
957
+ metadata_filter=metadata_filter,
958
+ )
959
+
960
+ for info in infos.content_infos:
961
+ resp.append(
962
+ delete_content(
963
+ user_id=self._user_id,
964
+ company_id=self._company_id,
965
+ content_id=info.id,
966
+ )
967
+ )
968
+
969
+ return resp
970
+
971
+ @overload
972
+ async def delete_content_async(
973
+ self,
974
+ *,
975
+ content_id: str,
976
+ ) -> DeleteContentResponse: ...
977
+
978
+ @overload
979
+ async def delete_content_async(
980
+ self,
981
+ *,
982
+ file_path: str,
983
+ ) -> DeleteContentResponse: ...
984
+
985
+ async def delete_content_async(
986
+ self,
987
+ *,
988
+ content_id: str | None = None,
989
+ file_path: str | None = None,
990
+ ) -> DeleteContentResponse:
991
+ return await delete_content_async(
992
+ user_id=self._user_id,
993
+ company_id=self._company_id,
994
+ content_id=content_id,
995
+ file_path=file_path,
996
+ )
997
+
998
+ async def delete_contents_async(
999
+ self,
1000
+ *,
1001
+ metadata_filter: dict[str, Any],
1002
+ ) -> list[DeleteContentResponse]:
1003
+ """Delete all content matching the metadata filter"""
1004
+ if not metadata_filter:
1005
+ return []
1006
+
1007
+ infos = self.get_paginated_content_infos(
1008
+ metadata_filter=metadata_filter,
1009
+ )
1010
+
1011
+ # Create all delete tasks without awaiting them
1012
+ delete_tasks = [
1013
+ delete_content_async(
1014
+ user_id=self._user_id,
1015
+ company_id=self._company_id,
1016
+ content_id=info.id,
1017
+ )
1018
+ for info in infos.content_infos
1019
+ ]
1020
+
1021
+ # Await all delete operations concurrently
1022
+ resp = await asyncio.gather(*delete_tasks)
1023
+
1024
+ return list(resp)
1025
+
1026
+ def _get_knowledge_base_location(
1027
+ self, *, scope_id: str
1028
+ ) -> tuple[PurePath, list[str]]:
1029
+ """
1030
+ Get the path of a folder from a scope id.
1031
+
1032
+ Args:
1033
+ scope_id (str): The scope id of the folder.
1034
+
1035
+ Returns:
1036
+ PurePath: The path of the folder.
1037
+ list[str]: The list of scope ids from root to the folder.
1038
+ """
1039
+
1040
+ list_of_folder_names: list[str] = []
1041
+ list_of_scope_ids: list[str] = []
1042
+ folder_info = self.get_folder_info(scope_id=scope_id)
1043
+ list_of_scope_ids.append(folder_info.id)
1044
+ if folder_info.parent_id is not None:
1045
+ list_of_folder_names.append(folder_info.name)
1046
+ else:
1047
+ return PurePath("/" + folder_info.name), list_of_scope_ids
1048
+
1049
+ while folder_info.parent_id is not None:
1050
+ folder_info = self.get_folder_info(scope_id=folder_info.parent_id)
1051
+ list_of_folder_names.append(folder_info.name)
1052
+
1053
+ list_of_scope_ids.reverse()
1054
+ return PurePath("/" + "/".join(list_of_folder_names[::-1])), list_of_scope_ids
1055
+
1056
+ # Utility Functions
1057
+ # ------------------------------------------------------------------------------------------------
1058
+
1059
+ def get_folder_path(self, *, scope_id: str) -> PurePath:
1060
+ """
1061
+ Get the path of a folder from a scope id.
1062
+ Args:
1063
+ scope_id (str): The scope id of the folder.
1064
+
1065
+ Returns:
1066
+ PurePath: The path of the folder.
1067
+ """
1068
+ folder_path, _ = self._get_knowledge_base_location(scope_id=scope_id)
1069
+ return folder_path
1070
+
1071
+ def get_scope_id_path(self, *, scope_id: str) -> list[str]:
1072
+ """
1073
+ Get the path of a folder from a scope id.
1074
+ Args:
1075
+ scope_id (str): The scope id of the folder.
1076
+
1077
+ Returns:
1078
+ list[str]: The list of scope ids from root to the folder.
1079
+ """
1080
+ _, list_of_scope_ids = self._get_knowledge_base_location(scope_id=scope_id)
1081
+ return list_of_scope_ids
1082
+
1083
+
1084
+ if __name__ == "__main__":
1085
+ kb_service = KnowledgeBaseService.from_settings()
1086
+
1087
+ kb_service.search_contents(where={"metadata.key": "123"})
1088
+ kb_service.search_content_chunks(
1089
+ search_string="test",
1090
+ search_type=ContentSearchType.VECTOR,
1091
+ limit=10,
1092
+ scope_ids=["123"],
1093
+ metadata_filter={"key": "123"},
1094
+ )