unique_toolkit 0.5.55__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. unique_toolkit/_common/validate_required_values.py +21 -0
  2. unique_toolkit/app/__init__.py +20 -0
  3. unique_toolkit/app/schemas.py +73 -7
  4. unique_toolkit/chat/__init__.py +5 -4
  5. unique_toolkit/chat/constants.py +3 -0
  6. unique_toolkit/chat/functions.py +661 -0
  7. unique_toolkit/chat/schemas.py +11 -11
  8. unique_toolkit/chat/service.py +273 -430
  9. unique_toolkit/content/__init__.py +1 -0
  10. unique_toolkit/content/constants.py +2 -0
  11. unique_toolkit/content/functions.py +475 -0
  12. unique_toolkit/content/service.py +163 -315
  13. unique_toolkit/content/utils.py +32 -0
  14. unique_toolkit/embedding/__init__.py +3 -0
  15. unique_toolkit/embedding/constants.py +2 -0
  16. unique_toolkit/embedding/functions.py +79 -0
  17. unique_toolkit/embedding/service.py +47 -34
  18. unique_toolkit/evaluators/__init__.py +1 -0
  19. unique_toolkit/evaluators/constants.py +1 -0
  20. unique_toolkit/evaluators/context_relevancy/constants.py +3 -3
  21. unique_toolkit/evaluators/context_relevancy/utils.py +5 -2
  22. unique_toolkit/evaluators/hallucination/utils.py +2 -1
  23. unique_toolkit/language_model/__init__.py +1 -0
  24. unique_toolkit/language_model/constants.py +4 -0
  25. unique_toolkit/language_model/functions.py +362 -0
  26. unique_toolkit/language_model/service.py +246 -293
  27. unique_toolkit/short_term_memory/__init__.py +5 -0
  28. unique_toolkit/short_term_memory/constants.py +1 -0
  29. unique_toolkit/short_term_memory/functions.py +175 -0
  30. unique_toolkit/short_term_memory/service.py +153 -27
  31. {unique_toolkit-0.5.55.dist-info → unique_toolkit-0.6.0.dist-info}/METADATA +33 -7
  32. unique_toolkit-0.6.0.dist-info/RECORD +64 -0
  33. unique_toolkit-0.5.55.dist-info/RECORD +0 -50
  34. {unique_toolkit-0.5.55.dist-info → unique_toolkit-0.6.0.dist-info}/LICENSE +0 -0
  35. {unique_toolkit-0.5.55.dist-info → unique_toolkit-0.6.0.dist-info}/WHEEL +0 -0
@@ -1,15 +1,23 @@
1
1
  import logging
2
- import os
3
- import re
4
- import tempfile
5
2
  from pathlib import Path
6
- from typing import Optional, Union, cast
7
3
 
8
- import requests
9
- import unique_sdk
10
-
11
- from unique_toolkit._common._base_service import BaseService
12
- from unique_toolkit.app.schemas import Event
4
+ from requests import Response
5
+ from typing_extensions import deprecated
6
+
7
+ from unique_toolkit._common.validate_required_values import validate_required_values
8
+ from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
9
+ from unique_toolkit.content import DOMAIN_NAME
10
+ from unique_toolkit.content.constants import DEFAULT_SEARCH_LANGUAGE
11
+ from unique_toolkit.content.functions import (
12
+ download_content,
13
+ download_content_to_file_by_id,
14
+ request_content_by_id,
15
+ search_content_chunks,
16
+ search_content_chunks_async,
17
+ search_contents,
18
+ search_contents_async,
19
+ upload_content,
20
+ )
13
21
  from unique_toolkit.content.schemas import (
14
22
  Content,
15
23
  ContentChunk,
@@ -17,21 +25,51 @@ from unique_toolkit.content.schemas import (
17
25
  ContentSearchType,
18
26
  )
19
27
 
28
+ logger = logging.getLogger(f"toolkit.{DOMAIN_NAME}.{__name__}")
20
29
 
21
- class ContentService(BaseService):
30
+
31
+ class ContentService:
22
32
  """
23
33
  Provides methods for searching, downloading and uploading content in the knowledge base.
24
34
 
25
35
  Attributes:
26
- event (Event): The Event object.
27
- logger (Optional[logging.Logger]): The logger. Defaults to None.
36
+ company_id (str | None): The company ID.
37
+ user_id (str | None): The user ID.
38
+ metadata_filter (dict | None): The metadata filter.
39
+ chat_id (str | None): The chat ID.
28
40
  """
29
41
 
30
- def __init__(self, event: Event, logger: Optional[logging.Logger] = None):
31
- super().__init__(event, logger)
32
- self.metadata_filter = event.payload.metadata_filter
42
+ def __init__(
43
+ self,
44
+ event: Event | BaseEvent | None = None,
45
+ company_id: str | None = None,
46
+ user_id: str | None = None,
47
+ ):
48
+ self._event = event # Changed to protected attribute
49
+ if event:
50
+ self.company_id = event.company_id
51
+ self.user_id = event.user_id
52
+ if isinstance(event, (ChatEvent, Event)):
53
+ self.metadata_filter = event.payload.metadata_filter
54
+ self.chat_id = event.payload.chat_id
55
+ else:
56
+ [company_id, user_id] = validate_required_values([company_id, user_id])
57
+ self.company_id = company_id
58
+ self.user_id = user_id
59
+ self.metadata_filter = None
60
+
61
+ @property
62
+ @deprecated(
63
+ "The event property is deprecated and will be removed in a future version."
64
+ )
65
+ def event(self) -> Event | BaseEvent | None:
66
+ """
67
+ Get the event object (deprecated).
33
68
 
34
- DEFAULT_SEARCH_LANGUAGE = "english"
69
+ Returns:
70
+ Event | BaseEvent | None: The event object.
71
+ """
72
+ return self._event
35
73
 
36
74
  def search_content_chunks(
37
75
  self,
@@ -39,11 +77,11 @@ class ContentService(BaseService):
39
77
  search_type: ContentSearchType,
40
78
  limit: int,
41
79
  search_language: str = DEFAULT_SEARCH_LANGUAGE,
42
- reranker_config: Optional[ContentRerankerConfig] = None,
43
- scope_ids: Optional[list[str]] = None,
44
- chat_only: Optional[bool] = None,
45
- metadata_filter: Optional[dict] = None,
46
- content_ids: Optional[list[str]] = None,
80
+ reranker_config: ContentRerankerConfig | None = None,
81
+ scope_ids: list[str] | None = None,
82
+ chat_only: bool | None = None,
83
+ metadata_filter: dict | None = None,
84
+ content_ids: list[str] | None = None,
47
85
  ) -> list[ContentChunk]:
48
86
  """
49
87
  Performs a synchronous search for content chunks in the knowledge base.
@@ -53,64 +91,49 @@ class ContentService(BaseService):
53
91
  search_type (ContentSearchType): The type of search to perform.
54
92
  limit (int): The maximum number of results to return.
55
93
  search_language (str): The language for the full-text search. Defaults to "english".
56
- reranker_config (Optional[ContentRerankerConfig]): The reranker configuration. Defaults to None.
57
- scope_ids (Optional[list[str]]): The scope IDs. Defaults to None.
58
- chat_only (Optional[bool]): Whether to search only in the current chat. Defaults to None.
59
- metadata_filter (Optional[dict]): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
60
- content_ids (Optional[list[str]]): The content IDs to search. Defaults to None.
94
+ reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
95
+ scope_ids (list[str] | None): The scope IDs. Defaults to None.
96
+ chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
97
+ metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
98
+ content_ids (list[str] | None): The content IDs to search. Defaults to None.
61
99
  Returns:
62
100
  list[ContentChunk]: The search results.
63
101
  """
64
- if not scope_ids:
65
- self.logger.warning("No scope IDs provided for search.")
66
-
67
- if content_ids:
68
- self.logger.info("Searching chunks for content IDs: %s", content_ids)
69
102
 
70
103
  if metadata_filter is None:
71
104
  metadata_filter = self.metadata_filter
72
105
 
73
106
  try:
74
- searches = unique_sdk.Search.create(
75
- user_id=self.event.user_id,
76
- company_id=self.event.company_id,
77
- chatId=self.event.payload.chat_id,
78
- searchString=search_string,
79
- searchType=search_type.name,
80
- scopeIds=scope_ids,
107
+ searches = search_content_chunks(
108
+ user_id=self.user_id,
109
+ company_id=self.company_id,
110
+ chat_id=self.chat_id,
111
+ search_string=search_string,
112
+ search_type=search_type,
81
113
  limit=limit,
82
- reranker=(
83
- reranker_config.model_dump(by_alias=True)
84
- if reranker_config
85
- else None
86
- ),
87
- language=search_language,
88
- chatOnly=chat_only,
89
- metaDataFilter=metadata_filter,
90
- contentIds=content_ids,
114
+ search_language=search_language,
115
+ reranker_config=reranker_config,
116
+ scope_ids=scope_ids,
117
+ chat_only=chat_only,
118
+ metadata_filter=metadata_filter,
119
+ content_ids=content_ids,
91
120
  )
121
+ return searches
92
122
  except Exception as e:
93
- self.logger.error(f"Error while searching content chunks: {e}")
123
+ logger.error(f"Error while searching content chunks: {e}")
94
124
  raise e
95
125
 
96
- def map_to_content_chunks(searches: list[unique_sdk.Search]):
97
- return [ContentChunk(**search) for search in searches]
98
-
99
- # TODO change return type in sdk from Search to list[Search]
100
- searches = cast(list[unique_sdk.Search], searches)
101
- return map_to_content_chunks(searches)
102
-
103
126
  async def search_content_chunks_async(
104
127
  self,
105
128
  search_string: str,
106
129
  search_type: ContentSearchType,
107
130
  limit: int,
108
131
  search_language: str = DEFAULT_SEARCH_LANGUAGE,
109
- reranker_config: Optional[ContentRerankerConfig] = None,
110
- scope_ids: Optional[list[str]] = None,
111
- chat_only: Optional[bool] = None,
112
- metadata_filter: Optional[dict] = None,
113
- content_ids: Optional[list[str]] = None,
132
+ reranker_config: ContentRerankerConfig | None = None,
133
+ scope_ids: list[str] | None = None,
134
+ chat_only: bool | None = None,
135
+ metadata_filter: dict | None = None,
136
+ content_ids: list[str] | None = None,
114
137
  ):
115
138
  """
116
139
  Performs an asynchronous search for content chunks in the knowledge base.
@@ -120,53 +143,37 @@ class ContentService(BaseService):
120
143
  search_type (ContentSearchType): The type of search to perform.
121
144
  limit (int): The maximum number of results to return.
122
145
  search_language (str): The language for the full-text search. Defaults to "english".
123
- reranker_config (Optional[ContentRerankerConfig]): The reranker configuration. Defaults to None.
124
- scope_ids (Optional[list[str]]): The scope IDs. Defaults to None.
125
- chat_only (Optional[bool]): Whether to search only in the current chat. Defaults to None.
126
- metadata_filter (Optional[dict]): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
127
- content_ids (Optional[list[str]]): The content IDs to search. Defaults to None.
146
+ reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
147
+ scope_ids (list[str] | None): The scope IDs. Defaults to None.
148
+ chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
149
+ metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
150
+ content_ids (list[str] | None): The content IDs to search. Defaults to None.
128
151
  Returns:
129
152
  list[ContentChunk]: The search results.
130
153
  """
131
- if not scope_ids:
132
- self.logger.warning("No scope IDs provided for search.")
133
-
134
- if content_ids:
135
- self.logger.info("Searching chunks for content IDs: %s", content_ids)
136
-
137
154
  if metadata_filter is None:
138
155
  metadata_filter = self.metadata_filter
139
156
 
140
157
  try:
141
- searches = await unique_sdk.Search.create_async(
142
- user_id=self.event.user_id,
143
- company_id=self.event.company_id,
144
- chatId=self.event.payload.chat_id,
145
- searchString=search_string,
146
- searchType=search_type.name,
147
- scopeIds=scope_ids,
158
+ searches = await search_content_chunks_async(
159
+ user_id=self.user_id,
160
+ company_id=self.company_id,
161
+ chat_id=self.chat_id,
162
+ search_string=search_string,
163
+ search_type=search_type,
148
164
  limit=limit,
149
- reranker=(
150
- reranker_config.model_dump(by_alias=True)
151
- if reranker_config
152
- else None
153
- ),
154
- language=search_language,
155
- chatOnly=chat_only,
156
- metaDataFilter=metadata_filter,
157
- contentIds=content_ids,
165
+ search_language=search_language,
166
+ reranker_config=reranker_config,
167
+ scope_ids=scope_ids,
168
+ chat_only=chat_only,
169
+ metadata_filter=metadata_filter,
170
+ content_ids=content_ids,
158
171
  )
172
+ return searches
159
173
  except Exception as e:
160
- self.logger.error(f"Error while searching content chunks: {e}")
174
+ logger.error(f"Error while searching content chunks: {e}")
161
175
  raise e
162
176
 
163
- def map_to_content_chunks(searches: list[unique_sdk.Search]):
164
- return [ContentChunk(**search) for search in searches]
165
-
166
- # TODO change return type in sdk from Search to list[Search]
167
- searches = cast(list[unique_sdk.Search], searches)
168
- return map_to_content_chunks(searches)
169
-
170
177
  def search_contents(
171
178
  self,
172
179
  where: dict,
@@ -181,22 +188,12 @@ class ContentService(BaseService):
181
188
  Returns:
182
189
  list[Content]: The search results.
183
190
  """
184
- if where.get("contentId"):
185
- self.logger.info("Searching content for content ID: %s", where["contentId"])
186
-
187
- try:
188
- contents = unique_sdk.Content.search(
189
- user_id=self.event.user_id,
190
- company_id=self.event.company_id,
191
- chatId=self.event.payload.chat_id,
192
- # TODO add type parameter
193
- where=where, # type: ignore
194
- )
195
- except Exception as e:
196
- self.logger.error(f"Error while searching contents: {e}")
197
- raise e
198
-
199
- return self._map_contents(contents)
191
+ return search_contents(
192
+ user_id=self.user_id,
193
+ company_id=self.company_id,
194
+ chat_id=self.chat_id,
195
+ where=where,
196
+ )
200
197
 
201
198
  async def search_contents_async(
202
199
  self,
@@ -211,62 +208,28 @@ class ContentService(BaseService):
211
208
  Returns:
212
209
  list[Content]: The search results.
213
210
  """
214
- if where.get("contentId"):
215
- self.logger.info("Searching content for content ID: %s", where["contentId"])
216
-
217
- try:
218
- contents = await unique_sdk.Content.search_async(
219
- user_id=self.event.user_id,
220
- company_id=self.event.company_id,
221
- chatId=self.event.payload.chat_id,
222
- # TODO add type parameter
223
- where=where, # type: ignore
224
- )
225
- except Exception as e:
226
- self.logger.error(f"Error while searching contents: {e}")
227
- raise e
228
-
229
- return self._map_contents(contents)
211
+ return await search_contents_async(
212
+ user_id=self.user_id,
213
+ company_id=self.company_id,
214
+ chat_id=self.chat_id,
215
+ where=where,
216
+ )
230
217
 
231
218
  def search_content_on_chat(
232
219
  self,
233
220
  ) -> list[Content]:
234
- where = {"ownerId": {"equals": self.event.payload.chat_id}}
221
+ where = {"ownerId": {"equals": self.chat_id}}
235
222
 
236
223
  return self.search_contents(where)
237
224
 
238
- @staticmethod
239
- def _map_content_chunk(content_chunk: dict):
240
- return ContentChunk(
241
- id=content_chunk["id"],
242
- text=content_chunk["text"],
243
- start_page=content_chunk["startPage"],
244
- end_page=content_chunk["endPage"],
245
- order=content_chunk["order"],
246
- )
247
-
248
- def _map_content(self, content: dict):
249
- return Content(
250
- id=content["id"],
251
- key=content["key"],
252
- title=content["title"],
253
- url=content["url"],
254
- chunks=[self._map_content_chunk(chunk) for chunk in content["chunks"]],
255
- created_at=content["createdAt"],
256
- updated_at=content["updatedAt"],
257
- )
258
-
259
- def _map_contents(self, contents):
260
- return [self._map_content(content) for content in contents]
261
-
262
225
  def upload_content(
263
226
  self,
264
227
  path_to_content: str,
265
228
  content_name: str,
266
229
  mime_type: str,
267
- scope_id: Optional[str] = None,
268
- chat_id: Optional[str] = None,
269
- skip_ingestion: Optional[bool] = False,
230
+ scope_id: str | None = None,
231
+ chat_id: str | None = None,
232
+ skip_ingestion: bool = False,
270
233
  ):
271
234
  """
272
235
  Uploads content to the knowledge base.
@@ -275,108 +238,30 @@ class ContentService(BaseService):
275
238
  path_to_content (str): The path to the content to upload.
276
239
  content_name (str): The name of the content.
277
240
  mime_type (str): The MIME type of the content.
278
- scope_id (Optional[str]): The scope ID. Defaults to None.
279
- chat_id (Optional[str]): The chat ID. Defaults to None.
241
+ scope_id (str | None): The scope ID. Defaults to None.
242
+ chat_id (str | None): The chat ID. Defaults to None.
243
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
280
244
 
281
245
  Returns:
282
246
  Content: The uploaded content.
283
247
  """
284
248
 
285
- try:
286
- return self._trigger_upload_content(
287
- path_to_content=path_to_content,
288
- content_name=content_name,
289
- mime_type=mime_type,
290
- scope_id=scope_id,
291
- chat_id=chat_id,
292
- skip_ingestion=skip_ingestion,
293
- )
294
- except Exception as e:
295
- self.logger.error(f"Error while uploading content: {e}")
296
- raise e
297
-
298
- def _trigger_upload_content(
299
- self,
300
- path_to_content: str,
301
- content_name: str,
302
- mime_type: str,
303
- scope_id: Optional[str] = None,
304
- chat_id: Optional[str] = None,
305
- skip_ingestion: Optional[bool] = False,
306
- ):
307
- if not chat_id and not scope_id:
308
- raise ValueError("chat_id or scope_id must be provided")
309
-
310
- byte_size = os.path.getsize(path_to_content)
311
- created_content = unique_sdk.Content.upsert(
312
- user_id=self.event.user_id,
313
- company_id=self.event.company_id,
314
- input={
315
- "key": content_name,
316
- "title": content_name,
317
- "mimeType": mime_type,
318
- },
319
- scopeId=scope_id,
320
- chatId=chat_id,
321
- ) # type: ignore
322
-
323
- write_url = created_content["writeUrl"]
324
-
325
- if not write_url:
326
- error_msg = "Write url for uploaded content is missing"
327
- self.logger.error(error_msg)
328
- raise ValueError(error_msg)
329
-
330
- # upload to azure blob storage SAS url uploadUrl the pdf file translatedFile make sure it is treated as a application/pdf
331
- with open(path_to_content, "rb") as file:
332
- requests.put(
333
- url=write_url,
334
- data=file,
335
- headers={
336
- "X-Ms-Blob-Content-Type": mime_type,
337
- "X-Ms-Blob-Type": "BlockBlob",
338
- },
339
- )
340
-
341
- read_url = created_content["readUrl"]
342
-
343
- if not read_url:
344
- error_msg = "Read url for uploaded content is missing"
345
- self.logger.error(error_msg)
346
- raise ValueError(error_msg)
347
-
348
- input_dict = {
349
- "key": content_name,
350
- "title": content_name,
351
- "mimeType": mime_type,
352
- "byteSize": byte_size,
353
- }
354
-
355
- if skip_ingestion:
356
- input_dict["ingestionConfig"] = {"uniqueIngestionMode": "SKIP_INGESTION"}
357
-
358
- if chat_id:
359
- unique_sdk.Content.upsert(
360
- user_id=self.event.user_id,
361
- company_id=self.event.company_id,
362
- input=input_dict,
363
- fileUrl=read_url,
364
- chatId=chat_id,
365
- ) # type: ignore
366
- else:
367
- unique_sdk.Content.upsert(
368
- user_id=self.event.user_id,
369
- company_id=self.event.company_id,
370
- input=input_dict,
371
- fileUrl=read_url,
372
- scopeId=scope_id,
373
- ) # type: ignore
374
-
375
- return Content(**created_content)
249
+ return upload_content(
250
+ user_id=self.user_id,
251
+ company_id=self.company_id,
252
+ path_to_content=path_to_content,
253
+ content_name=content_name,
254
+ mime_type=mime_type,
255
+ scope_id=scope_id,
256
+ chat_id=chat_id,
257
+ skip_ingestion=skip_ingestion,
258
+ )
376
259
 
377
260
  def request_content_by_id(
378
- self, content_id: str, chat_id: str | None
379
- ) -> requests.Response:
261
+ self,
262
+ content_id: str,
263
+ chat_id: str | None,
264
+ ) -> Response:
380
265
  """
381
266
  Sends a request to download content from a chat.
382
267
 
@@ -388,37 +273,28 @@ class ContentService(BaseService):
388
273
  requests.Response: The response object containing the downloaded content.
389
274
 
390
275
  """
391
- self.logger.info("Requesting content by ID: %s", content_id)
392
- url = f"{unique_sdk.api_base}/content/{content_id}/file"
393
- if chat_id:
394
- url = f"{url}?chatId={chat_id}"
395
-
396
- # Download the file and save it to the random directory
397
- headers = {
398
- "x-api-version": unique_sdk.api_version,
399
- "x-app-id": unique_sdk.app_id,
400
- "x-user-id": self.event.user_id,
401
- "x-company-id": self.event.company_id,
402
- "Authorization": "Bearer %s" % (unique_sdk.api_key,),
403
- }
404
-
405
- return requests.get(url, headers=headers)
276
+ return request_content_by_id(
277
+ user_id=self.user_id,
278
+ company_id=self.company_id,
279
+ content_id=content_id,
280
+ chat_id=chat_id,
281
+ )
406
282
 
407
283
  def download_content_to_file_by_id(
408
284
  self,
409
285
  content_id: str,
410
- chat_id: Optional[str] = None,
286
+ chat_id: str | None = None,
411
287
  filename: str | None = None,
412
- tmp_dir_path: Optional[Union[str, Path]] = "/tmp",
288
+ tmp_dir_path: str | Path | None = "/tmp",
413
289
  ):
414
290
  """
415
291
  Downloads content from a chat and saves it to a file.
416
292
 
417
293
  Args:
418
294
  content_id (str): The ID of the content to download.
419
- chat_id (Optional[str]): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
295
+ chat_id (str | None): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
420
296
  filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
421
- tmp_dir_path (Optional[Union[str, Path]]): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
297
+ tmp_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
422
298
 
423
299
  Returns:
424
300
  Path: The path to the downloaded file.
@@ -427,43 +303,22 @@ class ContentService(BaseService):
427
303
  Exception: If the download fails or the filename cannot be determined.
428
304
  """
429
305
 
430
- self.logger.info("Requesting content by ID: %s", content_id)
431
- response = self.request_content_by_id(content_id, chat_id)
432
- random_dir = tempfile.mkdtemp(dir=tmp_dir_path)
433
-
434
- if response.status_code == 200:
435
- if filename:
436
- content_path = Path(random_dir) / filename
437
- else:
438
- pattern = r'filename="([^"]+)"'
439
- match = re.search(
440
- pattern, response.headers.get("Content-Disposition", "")
441
- )
442
- if match:
443
- content_path = Path(random_dir) / match.group(1)
444
- else:
445
- error_msg = (
446
- "Error downloading file: Filename could not be determined"
447
- )
448
- self.logger.error(error_msg)
449
- raise Exception(error_msg)
450
-
451
- with open(content_path, "wb") as file:
452
- file.write(response.content)
453
- else:
454
- error_msg = f"Error downloading file: Status code {response.status_code}"
455
- self.logger.error(error_msg)
456
- raise Exception(error_msg)
457
-
458
- return content_path
306
+ return download_content_to_file_by_id(
307
+ user_id=self.user_id,
308
+ company_id=self.company_id,
309
+ content_id=content_id,
310
+ chat_id=chat_id,
311
+ filename=filename,
312
+ tmp_dir_path=tmp_dir_path,
313
+ )
459
314
 
460
315
  # TODO: Discuss if we should deprecate this method due to unclear use by content_name
461
316
  def download_content(
462
317
  self,
463
318
  content_id: str,
464
319
  content_name: str,
465
- chat_id: Optional[str] = None,
466
- dir_path: Optional[Union[str, Path]] = "/tmp",
320
+ chat_id: str | None = None,
321
+ dir_path: str | Path | None = "/tmp",
467
322
  ) -> Path:
468
323
  """
469
324
  Downloads content to temporary directory
@@ -481,18 +336,11 @@ class ContentService(BaseService):
481
336
  Exception: If the download fails.
482
337
  """
483
338
 
484
- self.logger.info("Downloading content by ID: %s", content_id)
485
- response = self.request_content_by_id(content_id, chat_id)
486
-
487
- random_dir = tempfile.mkdtemp(dir=dir_path)
488
- content_path = Path(random_dir) / content_name
489
-
490
- if response.status_code == 200:
491
- with open(content_path, "wb") as file:
492
- file.write(response.content)
493
- else:
494
- error_msg = f"Error downloading file: Status code {response.status_code}"
495
- self.logger.error(error_msg)
496
- raise Exception(error_msg)
497
-
498
- return content_path
339
+ return download_content(
340
+ user_id=self.user_id,
341
+ company_id=self.company_id,
342
+ content_id=content_id,
343
+ content_name=content_name,
344
+ chat_id=chat_id,
345
+ dir_path=dir_path,
346
+ )
@@ -1,8 +1,10 @@
1
1
  import re
2
2
 
3
3
  import tiktoken
4
+ import unique_sdk
4
5
 
5
6
  from unique_toolkit.content.schemas import (
7
+ Content,
6
8
  ContentChunk,
7
9
  )
8
10
 
@@ -186,3 +188,33 @@ def count_tokens(text: str, encoding_model="cl100k_base") -> int:
186
188
  """
187
189
  encoding = tiktoken.get_encoding(encoding_model)
188
190
  return len(encoding.encode(text))
191
+
192
+
193
+ def map_content_chunk(content_chunk: dict):
194
+ return ContentChunk(
195
+ id=content_chunk["id"],
196
+ text=content_chunk["text"],
197
+ start_page=content_chunk["startPage"],
198
+ end_page=content_chunk["endPage"],
199
+ order=content_chunk["order"],
200
+ )
201
+
202
+
203
+ def map_content(content: dict):
204
+ return Content(
205
+ id=content["id"],
206
+ key=content["key"],
207
+ title=content["title"],
208
+ url=content["url"],
209
+ chunks=[map_content_chunk(chunk) for chunk in content["chunks"]],
210
+ created_at=content["createdAt"],
211
+ updated_at=content["updatedAt"],
212
+ )
213
+
214
+
215
+ def map_contents(contents):
216
+ return [map_content(content) for content in contents]
217
+
218
+
219
+ def map_to_content_chunks(searches: list[unique_sdk.Search]):
220
+ return [ContentChunk(**search) for search in searches]
@@ -1,3 +1,6 @@
1
+ from .constants import (
2
+ DOMAIN_NAME as DOMAIN_NAME,
3
+ )
1
4
  from .schemas import Embeddings as Embeddings
2
5
  from .service import EmbeddingService as EmbeddingService
3
6
  from .utils import (