unique_toolkit 0.5.54__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. unique_toolkit/_common/validate_required_values.py +21 -0
  2. unique_toolkit/app/__init__.py +20 -0
  3. unique_toolkit/app/schemas.py +73 -7
  4. unique_toolkit/chat/__init__.py +5 -4
  5. unique_toolkit/chat/constants.py +3 -0
  6. unique_toolkit/chat/functions.py +661 -0
  7. unique_toolkit/chat/schemas.py +11 -11
  8. unique_toolkit/chat/service.py +273 -430
  9. unique_toolkit/content/__init__.py +1 -0
  10. unique_toolkit/content/constants.py +2 -0
  11. unique_toolkit/content/functions.py +475 -0
  12. unique_toolkit/content/service.py +163 -300
  13. unique_toolkit/content/utils.py +32 -0
  14. unique_toolkit/embedding/__init__.py +3 -0
  15. unique_toolkit/embedding/constants.py +2 -0
  16. unique_toolkit/embedding/functions.py +79 -0
  17. unique_toolkit/embedding/service.py +47 -34
  18. unique_toolkit/evaluators/__init__.py +1 -0
  19. unique_toolkit/evaluators/constants.py +1 -0
  20. unique_toolkit/evaluators/context_relevancy/constants.py +3 -3
  21. unique_toolkit/evaluators/context_relevancy/utils.py +5 -2
  22. unique_toolkit/evaluators/hallucination/utils.py +2 -1
  23. unique_toolkit/language_model/__init__.py +1 -0
  24. unique_toolkit/language_model/constants.py +4 -0
  25. unique_toolkit/language_model/functions.py +362 -0
  26. unique_toolkit/language_model/service.py +246 -293
  27. unique_toolkit/short_term_memory/__init__.py +5 -0
  28. unique_toolkit/short_term_memory/constants.py +1 -0
  29. unique_toolkit/short_term_memory/functions.py +175 -0
  30. unique_toolkit/short_term_memory/service.py +153 -27
  31. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/METADATA +36 -7
  32. unique_toolkit-0.6.0.dist-info/RECORD +64 -0
  33. unique_toolkit-0.5.54.dist-info/RECORD +0 -50
  34. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/LICENSE +0 -0
  35. {unique_toolkit-0.5.54.dist-info → unique_toolkit-0.6.0.dist-info}/WHEEL +0 -0
@@ -1,15 +1,23 @@
1
1
  import logging
2
- import os
3
- import re
4
- import tempfile
5
2
  from pathlib import Path
6
- from typing import Optional, Union, cast
7
3
 
8
- import requests
9
- import unique_sdk
10
-
11
- from unique_toolkit._common._base_service import BaseService
12
- from unique_toolkit.app.schemas import Event
4
+ from requests import Response
5
+ from typing_extensions import deprecated
6
+
7
+ from unique_toolkit._common.validate_required_values import validate_required_values
8
+ from unique_toolkit.app.schemas import BaseEvent, ChatEvent, Event
9
+ from unique_toolkit.content import DOMAIN_NAME
10
+ from unique_toolkit.content.constants import DEFAULT_SEARCH_LANGUAGE
11
+ from unique_toolkit.content.functions import (
12
+ download_content,
13
+ download_content_to_file_by_id,
14
+ request_content_by_id,
15
+ search_content_chunks,
16
+ search_content_chunks_async,
17
+ search_contents,
18
+ search_contents_async,
19
+ upload_content,
20
+ )
13
21
  from unique_toolkit.content.schemas import (
14
22
  Content,
15
23
  ContentChunk,
@@ -17,21 +25,51 @@ from unique_toolkit.content.schemas import (
17
25
  ContentSearchType,
18
26
  )
19
27
 
28
+ logger = logging.getLogger(f"toolkit.{DOMAIN_NAME}.{__name__}")
20
29
 
21
- class ContentService(BaseService):
30
+
31
+ class ContentService:
22
32
  """
23
33
  Provides methods for searching, downloading and uploading content in the knowledge base.
24
34
 
25
35
  Attributes:
26
- event (Event): The Event object.
27
- logger (Optional[logging.Logger]): The logger. Defaults to None.
36
+ company_id (str | None): The company ID.
37
+ user_id (str | None): The user ID.
38
+ metadata_filter (dict | None): The metadata filter.
39
+ chat_id (str | None): The chat ID.
28
40
  """
29
41
 
30
- def __init__(self, event: Event, logger: Optional[logging.Logger] = None):
31
- super().__init__(event, logger)
32
- self.metadata_filter = event.payload.metadata_filter
42
+ def __init__(
43
+ self,
44
+ event: Event | BaseEvent | None = None,
45
+ company_id: str | None = None,
46
+ user_id: str | None = None,
47
+ ):
48
+ self._event = event # Changed to protected attribute
49
+ if event:
50
+ self.company_id = event.company_id
51
+ self.user_id = event.user_id
52
+ if isinstance(event, (ChatEvent, Event)):
53
+ self.metadata_filter = event.payload.metadata_filter
54
+ self.chat_id = event.payload.chat_id
55
+ else:
56
+ [company_id, user_id] = validate_required_values([company_id, user_id])
57
+ self.company_id = company_id
58
+ self.user_id = user_id
59
+ self.metadata_filter = None
60
+
61
+ @property
62
+ @deprecated(
63
+ "The event property is deprecated and will be removed in a future version."
64
+ )
65
+ def event(self) -> Event | BaseEvent | None:
66
+ """
67
+ Get the event object (deprecated).
33
68
 
34
- DEFAULT_SEARCH_LANGUAGE = "english"
69
+ Returns:
70
+ Event | BaseEvent | None: The event object.
71
+ """
72
+ return self._event
35
73
 
36
74
  def search_content_chunks(
37
75
  self,
@@ -39,11 +77,11 @@ class ContentService(BaseService):
39
77
  search_type: ContentSearchType,
40
78
  limit: int,
41
79
  search_language: str = DEFAULT_SEARCH_LANGUAGE,
42
- reranker_config: Optional[ContentRerankerConfig] = None,
43
- scope_ids: Optional[list[str]] = None,
44
- chat_only: Optional[bool] = None,
45
- metadata_filter: Optional[dict] = None,
46
- content_ids: Optional[list[str]] = None,
80
+ reranker_config: ContentRerankerConfig | None = None,
81
+ scope_ids: list[str] | None = None,
82
+ chat_only: bool | None = None,
83
+ metadata_filter: dict | None = None,
84
+ content_ids: list[str] | None = None,
47
85
  ) -> list[ContentChunk]:
48
86
  """
49
87
  Performs a synchronous search for content chunks in the knowledge base.
@@ -53,61 +91,49 @@ class ContentService(BaseService):
53
91
  search_type (ContentSearchType): The type of search to perform.
54
92
  limit (int): The maximum number of results to return.
55
93
  search_language (str): The language for the full-text search. Defaults to "english".
56
- reranker_config (Optional[ContentRerankerConfig]): The reranker configuration. Defaults to None.
57
- scope_ids (Optional[list[str]]): The scope IDs. Defaults to None.
58
- chat_only (Optional[bool]): Whether to search only in the current chat. Defaults to None.
59
- metadata_filter (Optional[dict]): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
60
- content_ids (Optional[list[str]]): The content IDs to search. Defaults to None.
94
+ reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
95
+ scope_ids (list[str] | None): The scope IDs. Defaults to None.
96
+ chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
97
+ metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
98
+ content_ids (list[str] | None): The content IDs to search. Defaults to None.
61
99
  Returns:
62
100
  list[ContentChunk]: The search results.
63
101
  """
64
- if not scope_ids:
65
- self.logger.warning("No scope IDs provided for search.")
66
102
 
67
103
  if metadata_filter is None:
68
104
  metadata_filter = self.metadata_filter
69
105
 
70
106
  try:
71
- searches = unique_sdk.Search.create(
72
- user_id=self.event.user_id,
73
- company_id=self.event.company_id,
74
- chatId=self.event.payload.chat_id,
75
- searchString=search_string,
76
- searchType=search_type.name,
77
- scopeIds=scope_ids,
107
+ searches = search_content_chunks(
108
+ user_id=self.user_id,
109
+ company_id=self.company_id,
110
+ chat_id=self.chat_id,
111
+ search_string=search_string,
112
+ search_type=search_type,
78
113
  limit=limit,
79
- reranker=(
80
- reranker_config.model_dump(by_alias=True)
81
- if reranker_config
82
- else None
83
- ),
84
- language=search_language,
85
- chatOnly=chat_only,
86
- metaDataFilter=metadata_filter,
87
- contentIds=content_ids,
114
+ search_language=search_language,
115
+ reranker_config=reranker_config,
116
+ scope_ids=scope_ids,
117
+ chat_only=chat_only,
118
+ metadata_filter=metadata_filter,
119
+ content_ids=content_ids,
88
120
  )
121
+ return searches
89
122
  except Exception as e:
90
- self.logger.error(f"Error while searching content chunks: {e}")
123
+ logger.error(f"Error while searching content chunks: {e}")
91
124
  raise e
92
125
 
93
- def map_to_content_chunks(searches: list[unique_sdk.Search]):
94
- return [ContentChunk(**search) for search in searches]
95
-
96
- # TODO change return type in sdk from Search to list[Search]
97
- searches = cast(list[unique_sdk.Search], searches)
98
- return map_to_content_chunks(searches)
99
-
100
126
  async def search_content_chunks_async(
101
127
  self,
102
128
  search_string: str,
103
129
  search_type: ContentSearchType,
104
130
  limit: int,
105
131
  search_language: str = DEFAULT_SEARCH_LANGUAGE,
106
- reranker_config: Optional[ContentRerankerConfig] = None,
107
- scope_ids: Optional[list[str]] = None,
108
- chat_only: Optional[bool] = None,
109
- metadata_filter: Optional[dict] = None,
110
- content_ids: Optional[list[str]] = None,
132
+ reranker_config: ContentRerankerConfig | None = None,
133
+ scope_ids: list[str] | None = None,
134
+ chat_only: bool | None = None,
135
+ metadata_filter: dict | None = None,
136
+ content_ids: list[str] | None = None,
111
137
  ):
112
138
  """
113
139
  Performs an asynchronous search for content chunks in the knowledge base.
@@ -117,50 +143,37 @@ class ContentService(BaseService):
117
143
  search_type (ContentSearchType): The type of search to perform.
118
144
  limit (int): The maximum number of results to return.
119
145
  search_language (str): The language for the full-text search. Defaults to "english".
120
- reranker_config (Optional[ContentRerankerConfig]): The reranker configuration. Defaults to None.
121
- scope_ids (Optional[list[str]]): The scope IDs. Defaults to None.
122
- chat_only (Optional[bool]): Whether to search only in the current chat. Defaults to None.
123
- metadata_filter (Optional[dict]): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
124
- content_ids (Optional[list[str]]): The content IDs to search. Defaults to None.
146
+ reranker_config (ContentRerankerConfig | None): The reranker configuration. Defaults to None.
147
+ scope_ids (list[str] | None): The scope IDs. Defaults to None.
148
+ chat_only (bool | None): Whether to search only in the current chat. Defaults to None.
149
+ metadata_filter (dict | None): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
150
+ content_ids (list[str] | None): The content IDs to search. Defaults to None.
125
151
  Returns:
126
152
  list[ContentChunk]: The search results.
127
153
  """
128
- if not scope_ids:
129
- self.logger.warning("No scope IDs provided for search.")
130
-
131
154
  if metadata_filter is None:
132
155
  metadata_filter = self.metadata_filter
133
156
 
134
157
  try:
135
- searches = await unique_sdk.Search.create_async(
136
- user_id=self.event.user_id,
137
- company_id=self.event.company_id,
138
- chatId=self.event.payload.chat_id,
139
- searchString=search_string,
140
- searchType=search_type.name,
141
- scopeIds=scope_ids,
158
+ searches = await search_content_chunks_async(
159
+ user_id=self.user_id,
160
+ company_id=self.company_id,
161
+ chat_id=self.chat_id,
162
+ search_string=search_string,
163
+ search_type=search_type,
142
164
  limit=limit,
143
- reranker=(
144
- reranker_config.model_dump(by_alias=True)
145
- if reranker_config
146
- else None
147
- ),
148
- language=search_language,
149
- chatOnly=chat_only,
150
- metaDataFilter=metadata_filter,
151
- contentIds=content_ids,
165
+ search_language=search_language,
166
+ reranker_config=reranker_config,
167
+ scope_ids=scope_ids,
168
+ chat_only=chat_only,
169
+ metadata_filter=metadata_filter,
170
+ content_ids=content_ids,
152
171
  )
172
+ return searches
153
173
  except Exception as e:
154
- self.logger.error(f"Error while searching content chunks: {e}")
174
+ logger.error(f"Error while searching content chunks: {e}")
155
175
  raise e
156
176
 
157
- def map_to_content_chunks(searches: list[unique_sdk.Search]):
158
- return [ContentChunk(**search) for search in searches]
159
-
160
- # TODO change return type in sdk from Search to list[Search]
161
- searches = cast(list[unique_sdk.Search], searches)
162
- return map_to_content_chunks(searches)
163
-
164
177
  def search_contents(
165
178
  self,
166
179
  where: dict,
@@ -175,19 +188,12 @@ class ContentService(BaseService):
175
188
  Returns:
176
189
  list[Content]: The search results.
177
190
  """
178
- try:
179
- contents = unique_sdk.Content.search(
180
- user_id=self.event.user_id,
181
- company_id=self.event.company_id,
182
- chatId=self.event.payload.chat_id,
183
- # TODO add type parameter
184
- where=where, # type: ignore
185
- )
186
- except Exception as e:
187
- self.logger.error(f"Error while searching contents: {e}")
188
- raise e
189
-
190
- return self._map_contents(contents)
191
+ return search_contents(
192
+ user_id=self.user_id,
193
+ company_id=self.company_id,
194
+ chat_id=self.chat_id,
195
+ where=where,
196
+ )
191
197
 
192
198
  async def search_contents_async(
193
199
  self,
@@ -202,59 +208,28 @@ class ContentService(BaseService):
202
208
  Returns:
203
209
  list[Content]: The search results.
204
210
  """
205
- try:
206
- contents = await unique_sdk.Content.search_async(
207
- user_id=self.event.user_id,
208
- company_id=self.event.company_id,
209
- chatId=self.event.payload.chat_id,
210
- # TODO add type parameter
211
- where=where, # type: ignore
212
- )
213
- except Exception as e:
214
- self.logger.error(f"Error while searching contents: {e}")
215
- raise e
216
-
217
- return self._map_contents(contents)
211
+ return await search_contents_async(
212
+ user_id=self.user_id,
213
+ company_id=self.company_id,
214
+ chat_id=self.chat_id,
215
+ where=where,
216
+ )
218
217
 
219
218
  def search_content_on_chat(
220
219
  self,
221
220
  ) -> list[Content]:
222
- where = {"ownerId": {"equals": self.event.payload.chat_id}}
221
+ where = {"ownerId": {"equals": self.chat_id}}
223
222
 
224
223
  return self.search_contents(where)
225
224
 
226
- @staticmethod
227
- def _map_content_chunk(content_chunk: dict):
228
- return ContentChunk(
229
- id=content_chunk["id"],
230
- text=content_chunk["text"],
231
- start_page=content_chunk["startPage"],
232
- end_page=content_chunk["endPage"],
233
- order=content_chunk["order"],
234
- )
235
-
236
- def _map_content(self, content: dict):
237
- return Content(
238
- id=content["id"],
239
- key=content["key"],
240
- title=content["title"],
241
- url=content["url"],
242
- chunks=[self._map_content_chunk(chunk) for chunk in content["chunks"]],
243
- created_at=content["createdAt"],
244
- updated_at=content["updatedAt"],
245
- )
246
-
247
- def _map_contents(self, contents):
248
- return [self._map_content(content) for content in contents]
249
-
250
225
  def upload_content(
251
226
  self,
252
227
  path_to_content: str,
253
228
  content_name: str,
254
229
  mime_type: str,
255
- scope_id: Optional[str] = None,
256
- chat_id: Optional[str] = None,
257
- skip_ingestion: Optional[bool] = False,
230
+ scope_id: str | None = None,
231
+ chat_id: str | None = None,
232
+ skip_ingestion: bool = False,
258
233
  ):
259
234
  """
260
235
  Uploads content to the knowledge base.
@@ -263,108 +238,30 @@ class ContentService(BaseService):
263
238
  path_to_content (str): The path to the content to upload.
264
239
  content_name (str): The name of the content.
265
240
  mime_type (str): The MIME type of the content.
266
- scope_id (Optional[str]): The scope ID. Defaults to None.
267
- chat_id (Optional[str]): The chat ID. Defaults to None.
241
+ scope_id (str | None): The scope ID. Defaults to None.
242
+ chat_id (str | None): The chat ID. Defaults to None.
243
+ skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
268
244
 
269
245
  Returns:
270
246
  Content: The uploaded content.
271
247
  """
272
248
 
273
- try:
274
- return self._trigger_upload_content(
275
- path_to_content=path_to_content,
276
- content_name=content_name,
277
- mime_type=mime_type,
278
- scope_id=scope_id,
279
- chat_id=chat_id,
280
- skip_ingestion=skip_ingestion,
281
- )
282
- except Exception as e:
283
- self.logger.error(f"Error while uploading content: {e}")
284
- raise e
285
-
286
- def _trigger_upload_content(
287
- self,
288
- path_to_content: str,
289
- content_name: str,
290
- mime_type: str,
291
- scope_id: Optional[str] = None,
292
- chat_id: Optional[str] = None,
293
- skip_ingestion: Optional[bool] = False,
294
- ):
295
- if not chat_id and not scope_id:
296
- raise ValueError("chat_id or scope_id must be provided")
297
-
298
- byte_size = os.path.getsize(path_to_content)
299
- created_content = unique_sdk.Content.upsert(
300
- user_id=self.event.user_id,
301
- company_id=self.event.company_id,
302
- input={
303
- "key": content_name,
304
- "title": content_name,
305
- "mimeType": mime_type,
306
- },
307
- scopeId=scope_id,
308
- chatId=chat_id,
309
- ) # type: ignore
310
-
311
- write_url = created_content["writeUrl"]
312
-
313
- if not write_url:
314
- error_msg = "Write url for uploaded content is missing"
315
- self.logger.error(error_msg)
316
- raise ValueError(error_msg)
317
-
318
- # upload to azure blob storage SAS url uploadUrl the pdf file translatedFile make sure it is treated as a application/pdf
319
- with open(path_to_content, "rb") as file:
320
- requests.put(
321
- url=write_url,
322
- data=file,
323
- headers={
324
- "X-Ms-Blob-Content-Type": mime_type,
325
- "X-Ms-Blob-Type": "BlockBlob",
326
- },
327
- )
328
-
329
- read_url = created_content["readUrl"]
330
-
331
- if not read_url:
332
- error_msg = "Read url for uploaded content is missing"
333
- self.logger.error(error_msg)
334
- raise ValueError(error_msg)
335
-
336
- input_dict = {
337
- "key": content_name,
338
- "title": content_name,
339
- "mimeType": mime_type,
340
- "byteSize": byte_size,
341
- }
342
-
343
- if skip_ingestion:
344
- input_dict["ingestionConfig"] = {"uniqueIngestionMode": "SKIP_INGESTION"}
345
-
346
- if chat_id:
347
- unique_sdk.Content.upsert(
348
- user_id=self.event.user_id,
349
- company_id=self.event.company_id,
350
- input=input_dict,
351
- fileUrl=read_url,
352
- chatId=chat_id,
353
- ) # type: ignore
354
- else:
355
- unique_sdk.Content.upsert(
356
- user_id=self.event.user_id,
357
- company_id=self.event.company_id,
358
- input=input_dict,
359
- fileUrl=read_url,
360
- scopeId=scope_id,
361
- ) # type: ignore
362
-
363
- return Content(**created_content)
249
+ return upload_content(
250
+ user_id=self.user_id,
251
+ company_id=self.company_id,
252
+ path_to_content=path_to_content,
253
+ content_name=content_name,
254
+ mime_type=mime_type,
255
+ scope_id=scope_id,
256
+ chat_id=chat_id,
257
+ skip_ingestion=skip_ingestion,
258
+ )
364
259
 
365
260
  def request_content_by_id(
366
- self, content_id: str, chat_id: str | None
367
- ) -> requests.Response:
261
+ self,
262
+ content_id: str,
263
+ chat_id: str | None,
264
+ ) -> Response:
368
265
  """
369
266
  Sends a request to download content from a chat.
370
267
 
@@ -376,36 +273,28 @@ class ContentService(BaseService):
376
273
  requests.Response: The response object containing the downloaded content.
377
274
 
378
275
  """
379
- url = f"{unique_sdk.api_base}/content/{content_id}/file"
380
- if chat_id:
381
- url = f"{url}?chatId={chat_id}"
382
-
383
- # Download the file and save it to the random directory
384
- headers = {
385
- "x-api-version": unique_sdk.api_version,
386
- "x-app-id": unique_sdk.app_id,
387
- "x-user-id": self.event.user_id,
388
- "x-company-id": self.event.company_id,
389
- "Authorization": "Bearer %s" % (unique_sdk.api_key,),
390
- }
391
-
392
- return requests.get(url, headers=headers)
276
+ return request_content_by_id(
277
+ user_id=self.user_id,
278
+ company_id=self.company_id,
279
+ content_id=content_id,
280
+ chat_id=chat_id,
281
+ )
393
282
 
394
283
  def download_content_to_file_by_id(
395
284
  self,
396
285
  content_id: str,
397
- chat_id: Optional[str] = None,
286
+ chat_id: str | None = None,
398
287
  filename: str | None = None,
399
- tmp_dir_path: Optional[Union[str, Path]] = "/tmp",
288
+ tmp_dir_path: str | Path | None = "/tmp",
400
289
  ):
401
290
  """
402
291
  Downloads content from a chat and saves it to a file.
403
292
 
404
293
  Args:
405
294
  content_id (str): The ID of the content to download.
406
- chat_id (Optional[str]): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
295
+ chat_id (str | None): The ID of the chat to download from. Defaults to None and the file is downloaded from the knowledge base.
407
296
  filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
408
- tmp_dir_path (Optional[Union[str, Path]]): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
297
+ tmp_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".
409
298
 
410
299
  Returns:
411
300
  Path: The path to the downloaded file.
@@ -414,42 +303,22 @@ class ContentService(BaseService):
414
303
  Exception: If the download fails or the filename cannot be determined.
415
304
  """
416
305
 
417
- response = self.request_content_by_id(content_id, chat_id)
418
- random_dir = tempfile.mkdtemp(dir=tmp_dir_path)
419
-
420
- if response.status_code == 200:
421
- if filename:
422
- content_path = Path(random_dir) / filename
423
- else:
424
- pattern = r'filename="([^"]+)"'
425
- match = re.search(
426
- pattern, response.headers.get("Content-Disposition", "")
427
- )
428
- if match:
429
- content_path = Path(random_dir) / match.group(1)
430
- else:
431
- error_msg = (
432
- "Error downloading file: Filename could not be determined"
433
- )
434
- self.logger.error(error_msg)
435
- raise Exception(error_msg)
436
-
437
- with open(content_path, "wb") as file:
438
- file.write(response.content)
439
- else:
440
- error_msg = f"Error downloading file: Status code {response.status_code}"
441
- self.logger.error(error_msg)
442
- raise Exception(error_msg)
443
-
444
- return content_path
306
+ return download_content_to_file_by_id(
307
+ user_id=self.user_id,
308
+ company_id=self.company_id,
309
+ content_id=content_id,
310
+ chat_id=chat_id,
311
+ filename=filename,
312
+ tmp_dir_path=tmp_dir_path,
313
+ )
445
314
 
446
315
  # TODO: Discuss if we should deprecate this method due to unclear use by content_name
447
316
  def download_content(
448
317
  self,
449
318
  content_id: str,
450
319
  content_name: str,
451
- chat_id: Optional[str] = None,
452
- dir_path: Optional[Union[str, Path]] = "/tmp",
320
+ chat_id: str | None = None,
321
+ dir_path: str | Path | None = "/tmp",
453
322
  ) -> Path:
454
323
  """
455
324
  Downloads content to temporary directory
@@ -467,17 +336,11 @@ class ContentService(BaseService):
467
336
  Exception: If the download fails.
468
337
  """
469
338
 
470
- response = self.request_content_by_id(content_id, chat_id)
471
-
472
- random_dir = tempfile.mkdtemp(dir=dir_path)
473
- content_path = Path(random_dir) / content_name
474
-
475
- if response.status_code == 200:
476
- with open(content_path, "wb") as file:
477
- file.write(response.content)
478
- else:
479
- error_msg = f"Error downloading file: Status code {response.status_code}"
480
- self.logger.error(error_msg)
481
- raise Exception(error_msg)
482
-
483
- return content_path
339
+ return download_content(
340
+ user_id=self.user_id,
341
+ company_id=self.company_id,
342
+ content_id=content_id,
343
+ content_name=content_name,
344
+ chat_id=chat_id,
345
+ dir_path=dir_path,
346
+ )
@@ -1,8 +1,10 @@
1
1
  import re
2
2
 
3
3
  import tiktoken
4
+ import unique_sdk
4
5
 
5
6
  from unique_toolkit.content.schemas import (
7
+ Content,
6
8
  ContentChunk,
7
9
  )
8
10
 
@@ -186,3 +188,33 @@ def count_tokens(text: str, encoding_model="cl100k_base") -> int:
186
188
  """
187
189
  encoding = tiktoken.get_encoding(encoding_model)
188
190
  return len(encoding.encode(text))
191
+
192
+
193
+ def map_content_chunk(content_chunk: dict):
194
+ return ContentChunk(
195
+ id=content_chunk["id"],
196
+ text=content_chunk["text"],
197
+ start_page=content_chunk["startPage"],
198
+ end_page=content_chunk["endPage"],
199
+ order=content_chunk["order"],
200
+ )
201
+
202
+
203
+ def map_content(content: dict):
204
+ return Content(
205
+ id=content["id"],
206
+ key=content["key"],
207
+ title=content["title"],
208
+ url=content["url"],
209
+ chunks=[map_content_chunk(chunk) for chunk in content["chunks"]],
210
+ created_at=content["createdAt"],
211
+ updated_at=content["updatedAt"],
212
+ )
213
+
214
+
215
+ def map_contents(contents):
216
+ return [map_content(content) for content in contents]
217
+
218
+
219
+ def map_to_content_chunks(searches: list[unique_sdk.Search]):
220
+ return [ContentChunk(**search) for search in searches]
@@ -1,3 +1,6 @@
1
+ from .constants import (
2
+ DOMAIN_NAME as DOMAIN_NAME,
3
+ )
1
4
  from .schemas import Embeddings as Embeddings
2
5
  from .service import EmbeddingService as EmbeddingService
3
6
  from .utils import (
@@ -0,0 +1,2 @@
1
+ DOMAIN_NAME = "embedding"
2
+ DEFAULT_TIMEOUT = 600_000