unstructured-ingest 1.0.21__py3-none-any.whl → 1.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (31) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/mixedbreadai.py +28 -45
  3. unstructured_ingest/processes/connectors/jira.py +188 -170
  4. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
  5. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
  6. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
  7. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
  8. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
  9. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
  10. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
  11. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
  12. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
  13. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
  14. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
  15. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
  16. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
  17. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
  18. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
  19. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
  20. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
  21. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
  22. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
  23. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
  24. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
  25. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
  26. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
  27. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.23.dist-info}/METADATA +2 -2
  28. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.23.dist-info}/RECORD +31 -31
  29. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.23.dist-info}/WHEEL +0 -0
  30. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.23.dist-info}/entry_points.txt +0 -0
  31. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.23.dist-info}/licenses/LICENSE.md +0 -0
@@ -1 +1 @@
1
- __version__ = "1.0.21" # pragma: no cover
1
+ __version__ = "1.0.23" # pragma: no cover
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
19
19
 
20
20
 
21
21
  if TYPE_CHECKING:
22
- from mixedbread_ai.client import AsyncMixedbreadAI, MixedbreadAI
23
- from mixedbread_ai.core import RequestOptions
22
+ from mixedbread import AsyncMixedbread, Mixedbread
24
23
 
25
24
 
26
25
  class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
44
43
  )
45
44
 
46
45
  @requires_dependencies(
47
- ["mixedbread_ai"],
48
- extras="mixedbreadai",
46
+ ["mixedbread"],
47
+ extras="embed-mixedbreadai",
49
48
  )
50
- def get_client(self) -> "MixedbreadAI":
49
+ def get_client(self) -> "Mixedbread":
51
50
  """
52
51
  Create the Mixedbread AI client.
53
52
 
54
53
  Returns:
55
- MixedbreadAI: Initialized client.
54
+ Mixedbread: Initialized client.
56
55
  """
57
- from mixedbread_ai.client import MixedbreadAI
56
+ from mixedbread import Mixedbread
58
57
 
59
- return MixedbreadAI(
58
+ return Mixedbread(
60
59
  api_key=self.api_key.get_secret_value(),
60
+ max_retries=MAX_RETRIES,
61
61
  )
62
62
 
63
63
  @requires_dependencies(
64
- ["mixedbread_ai"],
65
- extras="mixedbreadai",
64
+ ["mixedbread"],
65
+ extras="embed-mixedbreadai",
66
66
  )
67
- def get_async_client(self) -> "AsyncMixedbreadAI":
68
- from mixedbread_ai.client import AsyncMixedbreadAI
67
+ def get_async_client(self) -> "AsyncMixedbread":
68
+ from mixedbread import AsyncMixedbread
69
69
 
70
- return AsyncMixedbreadAI(
70
+ return AsyncMixedbread(
71
71
  api_key=self.api_key.get_secret_value(),
72
+ max_retries=MAX_RETRIES,
72
73
  )
73
74
 
74
75
 
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
88
89
  return self.embed_query(query="Q")
89
90
 
90
91
  @requires_dependencies(
91
- ["mixedbread_ai"],
92
+ ["mixedbread"],
92
93
  extras="embed-mixedbreadai",
93
94
  )
94
- def get_request_options(self) -> "RequestOptions":
95
- from mixedbread_ai.core import RequestOptions
96
-
97
- return RequestOptions(
98
- max_retries=MAX_RETRIES,
99
- timeout_in_seconds=TIMEOUT,
100
- additional_headers={"User-Agent": USER_AGENT},
101
- )
102
-
103
- def get_client(self) -> "MixedbreadAI":
95
+ def get_client(self) -> "Mixedbread":
104
96
  return self.config.get_client()
105
97
 
106
- def embed_batch(self, client: "MixedbreadAI", batch: list[str]) -> list[list[float]]:
107
- response = client.embeddings(
98
+ def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
99
+ response = client.embed(
108
100
  model=self.config.embedder_model_name,
101
+ input=batch,
109
102
  normalized=True,
110
103
  encoding_format=ENCODING_FORMAT,
111
- truncation_strategy=TRUNCATION_STRATEGY,
112
- request_options=self.get_request_options(),
113
- input=batch,
104
+ extra_headers={"User-Agent": USER_AGENT},
105
+ timeout=TIMEOUT,
114
106
  )
115
107
  return [datum.embedding for datum in response.data]
116
108
 
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
124
116
  return await self.embed_query(query="Q")
125
117
 
126
118
  @requires_dependencies(
127
- ["mixedbread_ai"],
119
+ ["mixedbread"],
128
120
  extras="embed-mixedbreadai",
129
121
  )
130
- def get_request_options(self) -> "RequestOptions":
131
- from mixedbread_ai.core import RequestOptions
132
-
133
- return RequestOptions(
134
- max_retries=MAX_RETRIES,
135
- timeout_in_seconds=TIMEOUT,
136
- additional_headers={"User-Agent": USER_AGENT},
137
- )
138
-
139
- def get_client(self) -> "AsyncMixedbreadAI":
122
+ def get_client(self) -> "AsyncMixedbread":
140
123
  return self.config.get_async_client()
141
124
 
142
- async def embed_batch(self, client: "AsyncMixedbreadAI", batch: list[str]) -> list[list[float]]:
143
- response = await client.embeddings(
125
+ async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
126
+ response = await client.embed(
144
127
  model=self.config.embedder_model_name,
128
+ input=batch,
145
129
  normalized=True,
146
130
  encoding_format=ENCODING_FORMAT,
147
- truncation_strategy=TRUNCATION_STRATEGY,
148
- request_options=self.get_request_options(),
149
- input=batch,
131
+ extra_headers={"User-Agent": USER_AGENT},
132
+ timeout=TIMEOUT,
150
133
  )
151
134
  return [datum.embedding for datum in response.data]
@@ -1,11 +1,11 @@
1
- import math
2
1
  from collections import abc
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
5
4
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
5
+ from time import time
6
+ from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union, cast
7
7
 
8
- from pydantic import Field, Secret
8
+ from pydantic import BaseModel, Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import (
11
11
  FileData,
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
21
21
  DownloadResponse,
22
22
  Indexer,
23
23
  IndexerConfig,
24
+ download_responses,
24
25
  )
25
26
  from unstructured_ingest.logger import logger
26
27
  from unstructured_ingest.processes.connector_registry import (
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
37
38
  DEFAULT_R_SEP = "\n"
38
39
 
39
40
 
40
- @dataclass
41
- class JiraIssueMetadata:
41
+ class JiraIssueMetadata(BaseModel):
42
42
  id: str
43
43
  key: str
44
- board_id: Optional[str] = None
45
44
 
46
- @property
47
- def project_id(self) -> str:
45
+ def get_project_id(self) -> str:
48
46
  return self.key.split("-")[0]
49
47
 
50
- def to_dict(self) -> Dict[str, Union[str, None]]:
51
- return {
52
- "id": self.id,
53
- "key": self.key,
54
- "board_id": self.board_id,
55
- "project_id": self.project_id,
56
- }
57
-
58
48
 
59
49
  class FieldGetter(dict):
60
50
  def __getitem__(self, key):
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
77
67
  return obj
78
68
 
79
69
 
80
- def issues_fetcher_wrapper(func, results_key="results", number_of_issues_to_fetch: int = 100):
81
- """
82
- A decorator function that wraps around a function to fetch issues from Jira API in a paginated
83
- manner. This is required because the Jira API has a limit of 100 issues per request.
84
-
85
- Args:
86
- func (callable): The function to be wrapped. This function should accept `limit` and `start`
87
- as keyword arguments.
88
- results_key (str, optional): The key in the response dictionary that contains the list of
89
- results. Defaults to "results".
90
- number_of_issues_to_fetch (int, optional): The total number of issues to fetch. Defaults to
91
- 100.
92
-
93
- Returns:
94
- list: A list of all fetched issues.
95
-
96
- Raises:
97
- KeyError: If the response dictionary does not contain the specified `results_key`.
98
- TypeError: If the response type from the Jira API is neither list nor dict.
99
- """
100
-
101
- def wrapper(*args, **kwargs) -> list:
102
- kwargs["limit"] = min(100, number_of_issues_to_fetch)
103
- kwargs["start"] = kwargs.get("start", 0)
104
-
105
- all_results = []
106
- num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
107
-
108
- for _ in range(num_iterations):
109
- response = func(*args, **kwargs)
110
- if isinstance(response, list):
111
- all_results += response
112
- elif isinstance(response, dict):
113
- if results_key not in response:
114
- raise KeyError(f'Response object is missing "{results_key}" key.')
115
- all_results += response[results_key]
116
- else:
117
- raise TypeError(
118
- f"""Unexpected response type from Jira API.
119
- Response type has to be either list or dict, got: {type(response).__name__}."""
120
- )
121
- kwargs["start"] += kwargs["limit"]
122
-
123
- return all_results
124
-
125
- return wrapper
70
+ def api_token_based_generator(
71
+ fn: Callable, key: str = "issues", **kwargs
72
+ ) -> Generator[dict, None, None]:
73
+ nextPageToken = kwargs.pop("nextPageToken", None)
74
+ while True:
75
+ resp = fn(nextPageToken=nextPageToken, **kwargs)
76
+ issues = resp.get(key, [])
77
+ for issue in issues:
78
+ yield issue
79
+ nextPageToken = resp.get("nextPageToken")
80
+ if not nextPageToken:
81
+ break
82
+
83
+
84
+ def api_page_based_generator(
85
+ fn: Callable, key: str = "issues", **kwargs
86
+ ) -> Generator[dict, None, None]:
87
+ start = kwargs.pop("start", 0)
88
+ while True:
89
+ resp = fn(start=start, **kwargs)
90
+ issues = resp.get(key, [])
91
+ if not issues:
92
+ break
93
+ for issue in issues:
94
+ yield issue
95
+ start += len(issues)
126
96
 
127
97
 
128
98
  class JiraAccessConfig(AccessConfig):
@@ -201,9 +171,17 @@ class JiraConnectionConfig(ConnectionConfig):
201
171
 
202
172
 
203
173
  class JiraIndexerConfig(IndexerConfig):
204
- projects: Optional[List[str]] = Field(None, description="List of project keys")
205
- boards: Optional[List[str]] = Field(None, description="List of board IDs")
206
- issues: Optional[List[str]] = Field(None, description="List of issue keys or IDs")
174
+ projects: Optional[list[str]] = Field(None, description="List of project keys")
175
+ boards: Optional[list[str]] = Field(None, description="List of board IDs")
176
+ issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
177
+ status_filters: Optional[list[str]] = Field(
178
+ default=None,
179
+ description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
180
+ )
181
+
182
+ def model_post_init(self, context: Any, /) -> None:
183
+ if not self.projects and not self.boards and not self.issues:
184
+ raise ValueError("At least one of projects, boards, or issues must be provided.")
207
185
 
208
186
 
209
187
  @dataclass
@@ -228,122 +206,103 @@ class JiraIndexer(Indexer):
228
206
  )
229
207
  logger.info("Connection to Jira successful.")
230
208
 
231
- def _get_issues_within_single_project(self, project_key: str) -> List[JiraIssueMetadata]:
209
+ def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
232
210
  with self.connection_config.get_client() as client:
233
- number_of_issues_to_fetch = client.get_project_issues_count(project=project_key)
234
- if isinstance(number_of_issues_to_fetch, dict):
235
- if "total" not in number_of_issues_to_fetch:
236
- raise KeyError('Response object is missing "total" key.')
237
- number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
238
- if not number_of_issues_to_fetch:
239
- logger.warning(f"No issues found in project: {project_key}. Skipping!")
240
- return []
241
- get_project_issues = issues_fetcher_wrapper(
242
- client.get_all_project_issues,
243
- results_key="issues",
244
- number_of_issues_to_fetch=number_of_issues_to_fetch,
245
- )
246
- issues = get_project_issues(project=project_key, fields=["key", "id"])
247
- logger.debug(f"Found {len(issues)} issues in project: {project_key}")
248
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
249
-
250
- def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
251
- project_keys = self.index_config.projects
252
- if not project_keys:
253
- # for when a component list is provided, without any projects
254
- if self.index_config.boards or self.index_config.issues:
255
- return []
256
- # for when no components are provided. all projects will be ingested
257
- else:
258
- with self.connection_config.get_client() as client:
259
- project_keys = [project["key"] for project in client.projects()]
260
- return [
261
- issue
262
- for project_key in project_keys
263
- for issue in self._get_issues_within_single_project(project_key)
264
- ]
211
+ fields = ["key", "id"]
212
+ jql = "project in ({})".format(", ".join(self.index_config.projects))
213
+ jql = self._update_jql(jql)
214
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
215
+ yield JiraIssueMetadata.model_validate(issue)
265
216
 
266
217
  def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
267
218
  with self.connection_config.get_client() as client:
268
- get_board_issues = issues_fetcher_wrapper(
269
- client.get_issues_for_board,
270
- results_key="issues",
271
- )
272
- issues = get_board_issues(board_id=board_id, fields=["key", "id"], jql=None)
273
- logger.debug(f"Found {len(issues)} issues in board: {board_id}")
274
- return [
275
- JiraIssueMetadata(id=issue["id"], key=issue["key"], board_id=board_id)
276
- for issue in issues
277
- ]
278
-
279
- def _get_issues_within_boards(self) -> List[JiraIssueMetadata]:
219
+ fields = ["key", "id"]
220
+ if self.index_config.status_filters:
221
+ jql = "status in ({}) ORDER BY id".format(
222
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
223
+ )
224
+ else:
225
+ jql = "ORDER BY id"
226
+ for issue in api_page_based_generator(
227
+ fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
228
+ ):
229
+ yield JiraIssueMetadata.model_validate(issue)
230
+
231
+ def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
280
232
  if not self.index_config.boards:
281
- return []
282
- return [
283
- issue
284
- for board_id in self.index_config.boards
285
- for issue in self._get_issues_within_single_board(board_id)
286
- ]
287
-
288
- def _get_issues(self) -> List[JiraIssueMetadata]:
289
- with self.connection_config.get_client() as client:
290
- issues = [
291
- client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
292
- for issue_key in self.index_config.issues or []
293
- ]
294
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
295
-
296
- def get_issues(self) -> List[JiraIssueMetadata]:
297
- issues = [
298
- *self._get_issues_within_boards(),
299
- *self._get_issues_within_projects(),
300
- *self._get_issues(),
301
- ]
302
- # Select unique issues by issue 'id'.
303
- # Since boards issues are fetched first,
304
- # if there are duplicates, the board issues will be kept,
305
- # in order to keep issue 'board_id' information.
306
- seen = set()
307
- unique_issues: List[JiraIssueMetadata] = []
308
- for issue in issues:
309
- if issue.id not in seen:
310
- unique_issues.append(issue)
311
- seen.add(issue.id)
312
- return unique_issues
233
+ yield
234
+ for board_id in self.index_config.boards:
235
+ for issue in self._get_issues_within_single_board(board_id=board_id):
236
+ yield issue
237
+
238
+ def _update_jql(self, jql: str) -> str:
239
+ if self.index_config.status_filters:
240
+ jql += " and status in ({})".format(
241
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
242
+ )
243
+ jql = jql + " ORDER BY id"
244
+ return jql
313
245
 
314
- def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
315
- from time import time
246
+ def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
247
+ with self.connection_config.get_client() as client:
248
+ fields = ["key", "id"]
249
+ jql = "key in ({})".format(", ".join(self.index_config.issues))
250
+ jql = self._update_jql(jql)
251
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
252
+ yield JiraIssueMetadata.model_validate(issue)
253
+
254
+ def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
255
+ # Build metadata
256
+ metadata = FileDataSourceMetadata(
257
+ date_processed=str(time()),
258
+ record_locator=issue.model_dump(),
259
+ )
316
260
 
317
- issues = self.get_issues()
318
- for issue in issues:
319
- # Build metadata
320
- metadata = FileDataSourceMetadata(
321
- date_processed=str(time()),
322
- record_locator=issue.to_dict(),
323
- )
261
+ # Construct relative path and filename
262
+ filename = f"{issue.id}.txt"
263
+ relative_path = str(Path(issue.get_project_id()) / filename)
324
264
 
325
- # Construct relative path and filename
326
- filename = f"{issue.id}.txt"
327
- relative_path = str(Path(issue.project_id) / filename)
265
+ source_identifiers = SourceIdentifiers(
266
+ filename=filename,
267
+ fullpath=relative_path,
268
+ rel_path=relative_path,
269
+ )
328
270
 
329
- source_identifiers = SourceIdentifiers(
330
- filename=filename,
331
- fullpath=relative_path,
332
- rel_path=relative_path,
333
- )
271
+ file_data = FileData(
272
+ identifier=issue.id,
273
+ connector_type=self.connector_type,
274
+ metadata=metadata,
275
+ additional_metadata=issue.model_dump(),
276
+ source_identifiers=source_identifiers,
277
+ )
278
+ return file_data
279
+
280
+ def get_generators(self) -> List[Callable]:
281
+ generators = []
282
+ if self.index_config.boards:
283
+ generators.append(self._get_issues_within_boards)
284
+ if self.index_config.issues:
285
+ generators.append(self._get_issues_by_keys)
286
+ if self.index_config.projects:
287
+ generators.append(self._get_issues_within_projects)
288
+ return generators
334
289
 
335
- file_data = FileData(
336
- identifier=issue.id,
337
- connector_type=self.connector_type,
338
- metadata=metadata,
339
- additional_metadata=issue.to_dict(),
340
- source_identifiers=source_identifiers,
341
- )
342
- yield file_data
290
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
291
+ seen_keys = []
292
+ for gen in self.get_generators():
293
+ for issue in gen():
294
+ if not issue:
295
+ continue
296
+ if issue.key in seen_keys:
297
+ continue
298
+ seen_keys.append(issue.key)
299
+ yield self._create_file_data_from_issue(issue=issue)
343
300
 
344
301
 
345
302
  class JiraDownloaderConfig(DownloaderConfig):
346
- pass
303
+ download_attachments: bool = Field(
304
+ default=False, description="If True, will download any attachments and process as well"
305
+ )
347
306
 
348
307
 
349
308
  @dataclass
@@ -448,7 +407,56 @@ class JiraDownloader(Downloader):
448
407
  logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
449
408
  raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
450
409
 
451
- def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
410
+ def generate_attachment_file_data(
411
+ self, attachment_dict: dict, parent_filedata: FileData
412
+ ) -> FileData:
413
+ new_filedata = parent_filedata.model_copy(deep=True)
414
+ if new_filedata.metadata.record_locator is None:
415
+ new_filedata.metadata.record_locator = {}
416
+ new_filedata.metadata.record_locator["parent_issue"] = (
417
+ parent_filedata.metadata.record_locator["id"]
418
+ )
419
+ # Append an identifier for attachment to not conflict with issue ids
420
+ new_filedata.identifier = "{}a".format(attachment_dict["id"])
421
+ filename = attachment_dict["filename"]
422
+ new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
423
+ new_filedata.metadata.date_created = attachment_dict.pop("created", None)
424
+ new_filedata.metadata.url = attachment_dict.pop("self", None)
425
+ new_filedata.metadata.record_locator = attachment_dict
426
+ new_filedata.source_identifiers = SourceIdentifiers(
427
+ filename=filename,
428
+ fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
429
+ )
430
+ return new_filedata
431
+
432
+ def process_attachments(
433
+ self, file_data: FileData, attachments: list[dict]
434
+ ) -> list[DownloadResponse]:
435
+ with self.connection_config.get_client() as client:
436
+ download_path = self.get_download_path(file_data)
437
+ attachment_download_dir = download_path.parent / "attachments"
438
+ attachment_download_dir.mkdir(parents=True, exist_ok=True)
439
+ download_responses = []
440
+ for attachment in attachments:
441
+ attachment_filename = Path(attachment["filename"])
442
+ attachment_id = attachment["id"]
443
+ attachment_download_path = attachment_download_dir / Path(
444
+ attachment_id
445
+ ).with_suffix(attachment_filename.suffix)
446
+ resp = client.get_attachment_content(attachment_id=attachment_id)
447
+ with open(attachment_download_path, "wb") as f:
448
+ f.write(resp)
449
+ attachment_filedata = self.generate_attachment_file_data(
450
+ attachment_dict=attachment, parent_filedata=file_data
451
+ )
452
+ download_responses.append(
453
+ self.generate_download_response(
454
+ file_data=attachment_filedata, download_path=attachment_download_path
455
+ )
456
+ )
457
+ return download_responses
458
+
459
+ def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
452
460
  issue_key = file_data.additional_metadata.get("key")
453
461
  if not issue_key:
454
462
  raise ValueError("Issue key not found in metadata.")
@@ -463,7 +471,17 @@ class JiraDownloader(Downloader):
463
471
  with open(download_path, "w") as f:
464
472
  f.write(issue_str)
465
473
  self.update_file_data(file_data, issue)
466
- return self.generate_download_response(file_data=file_data, download_path=download_path)
474
+ download_response = self.generate_download_response(
475
+ file_data=file_data, download_path=download_path
476
+ )
477
+ if self.download_config.download_attachments and (
478
+ attachments := issue.get("fields", {}).get("attachment")
479
+ ):
480
+ attachment_responses = self.process_attachments(
481
+ file_data=file_data, attachments=attachments
482
+ )
483
+ download_response = [download_response] + attachment_responses
484
+ return download_response
467
485
 
468
486
 
469
487
  jira_source_entry = SourceRegistryEntry(
@@ -1,5 +1,6 @@
1
1
  from typing import Dict
2
2
 
3
+ from unstructured_ingest.logger import logger
3
4
  from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
4
5
 
5
6
  from .checkbox import Checkbox, CheckboxCell
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
25
26
  from .url import URL, URLCell
26
27
  from .verification import Verification, VerificationCell
27
28
 
29
+ # It's possible to add 'button' property to Notion database.
30
+ # However, current Notion API documentation doesn't mention it.
31
+ # Buttons are only functional inside Notion UI. We can simply
32
+ # ignore them so that the we don't throw an error when trying
33
+ # to map 'button' properties.
34
+ unsupported_db_prop_types = ["button"]
35
+
28
36
  db_prop_type_mapping = {
29
37
  "checkbox": Checkbox,
30
38
  "created_by": CreatedBy,
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
55
63
  mapped_dict = {}
56
64
  for k, v in props.items():
57
65
  try:
58
- mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore
66
+ property_type = v["type"]
67
+ if property_type in unsupported_db_prop_types:
68
+ logger.warning(
69
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
70
+ )
71
+ continue
72
+ mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
59
73
  except KeyError as ke:
60
74
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
61
75
 
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
92
106
  mapped_dict = {}
93
107
  for k, v in props.items():
94
108
  try:
95
- t = v["type"]
96
- mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore
109
+ property_type = v["type"]
110
+ if property_type in unsupported_db_prop_types:
111
+ logger.warning(
112
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
113
+ )
114
+ continue
115
+ mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
97
116
  except KeyError as ke:
98
117
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
99
118
 
@@ -13,6 +13,7 @@ class Checkbox(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "checkbox"
16
+ description: Optional[str] = None
16
17
  checkbox: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -13,6 +13,7 @@ class CreatedBy(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "created_by"
16
+ description: Optional[str] = None
16
17
  created_by: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class CreatedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "created_time"
15
+ description: Optional[str] = None
15
16
  created_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Date(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "date"
16
+ description: Optional[str] = None
16
17
  date: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class Email(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "email"
15
+ description: Optional[str] = None
15
16
  email: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Files(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "files"
16
+ description: Optional[str] = None
16
17
  files: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -26,6 +26,7 @@ class Formula(DBPropertyBase):
26
26
  name: str
27
27
  formula: FormulaProp
28
28
  type: str = "formula"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -23,7 +23,7 @@ class LastEditedByCell(DBCellBase):
23
23
  id: str
24
24
  last_edited_by: People
25
25
  type: str = "last_edited_by"
26
-
26
+ description: Optional[str] = None
27
27
  name: Optional[str] = None
28
28
 
29
29
  @classmethod
@@ -12,6 +12,7 @@ class LastEditedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "last_edited_time"
15
+ description: Optional[str] = None
15
16
  last_edited_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -38,6 +38,7 @@ class MultiSelect(DBPropertyBase):
38
38
  name: str
39
39
  multi_select: MultiSelectProp
40
40
  type: str = "multi_select"
41
+ description: Optional[str] = None
41
42
 
42
43
  @classmethod
43
44
  def from_dict(cls, data: dict):
@@ -26,6 +26,7 @@ class Number(DBPropertyBase):
26
26
  name: str
27
27
  number: NumberProp
28
28
  type: str = "number"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class People(DBPropertyBase):
14
14
  name: str
15
15
  description: Optional[str] = None
16
16
  type: str = "people"
17
+ description: Optional[str] = None
17
18
  people: dict = field(default_factory=dict)
18
19
 
19
20
  @classmethod
@@ -12,6 +12,7 @@ class PhoneNumber(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "phone_number"
15
+ description: Optional[str] = None
15
16
  phone_number: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -45,6 +45,7 @@ class Relation(DBPropertyBase):
45
45
  name: str
46
46
  relation: RelationProp
47
47
  type: str = "relation"
48
+ description: Optional[str] = None
48
49
 
49
50
  @classmethod
50
51
  def from_dict(cls, data: dict):
@@ -15,6 +15,7 @@ class RichText(DBPropertyBase):
15
15
  id: str
16
16
  name: str
17
17
  type: str = "rich_text"
18
+ description: Optional[str] = None
18
19
  rich_text: dict = field(default_factory=dict)
19
20
 
20
21
  @classmethod
@@ -30,6 +30,7 @@ class Rollup(DBPropertyBase):
30
30
  name: str
31
31
  rollup: RollupProp
32
32
  type: str = "rollup"
33
+ description: Optional[str] = None
33
34
 
34
35
  @classmethod
35
36
  def from_dict(cls, data: dict):
@@ -38,8 +38,8 @@ class Select(DBPropertyBase):
38
38
  id: str
39
39
  name: str
40
40
  select: SelectProp
41
- description: Optional[str] = None
42
41
  type: str = "select"
42
+ description: Optional[str] = None
43
43
 
44
44
  @classmethod
45
45
  def from_dict(cls, data: dict):
@@ -55,6 +55,7 @@ class Status(DBPropertyBase):
55
55
  name: str
56
56
  status: StatusProp
57
57
  type: str = "status"
58
+ description: Optional[str] = None
58
59
 
59
60
  @classmethod
60
61
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class Title(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "title"
16
16
  title: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -17,6 +17,7 @@ class UniqueID(DBPropertyBase):
17
17
  name: str
18
18
  type: str = "unique_id"
19
19
  unique_id: dict = field(default_factory=dict)
20
+ description: Optional[str] = None
20
21
 
21
22
  @classmethod
22
23
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class URL(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "url"
16
16
  url: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -20,6 +20,7 @@ class Verification(DBPropertyBase):
20
20
  name: str
21
21
  type: str = "verification"
22
22
  verification: dict = field(default_factory=dict)
23
+ description: Optional[str] = None
23
24
 
24
25
  @classmethod
25
26
  def from_dict(cls, data: dict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.21
3
+ Version: 1.0.23
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
114
114
  Provides-Extra: milvus
115
115
  Requires-Dist: pymilvus; extra == 'milvus'
116
116
  Provides-Extra: mixedbreadai
117
- Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
117
+ Requires-Dist: mixedbread; extra == 'mixedbreadai'
118
118
  Provides-Extra: mongodb
119
119
  Requires-Dist: pymongo; extra == 'mongodb'
120
120
  Provides-Extra: msg
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=_fAo4tbdJV7k_s1lgXUPPmLFVpxbTy7HhoN9KbPxQ4Y,43
2
+ unstructured_ingest/__version__.py,sha256=xbdPxvOGZJUW_s_LZYTaPijNvLNKSjZuHlwNDGHpDjE,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -26,7 +26,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9
26
26
  unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
27
27
  unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
28
28
  unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
29
- unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
29
+ unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
30
30
  unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
31
31
  unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
32
32
  unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
73
73
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
74
74
  unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
75
75
  unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
76
- unstructured_ingest/processes/connectors/jira.py,sha256=alnwUYyID-mUIlGq1xh5QGEw2iZ2RwbOIyptev3dI6Q,18011
76
+ unstructured_ingest/processes/connectors/jira.py,sha256=Hw07c2HT2vA2l2wpoYWXPNtLbnWreXCIRimAxm0Gfpw,19055
77
77
  unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
78
78
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
79
79
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
@@ -174,29 +174,29 @@ unstructured_ingest/processes/connectors/notion/types/blocks/todo.py,sha256=Kiga
174
174
  unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py,sha256=6ae_eR3SOfUgTw-XO_F3JRBaczSp8UZfLBFMRMO5NHo,1188
175
175
  unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py,sha256=q_p9XH8sQB8xwFqi9yEl6Fvur3fTLdeVdQCh0gSju58,442
176
176
  unstructured_ingest/processes/connectors/notion/types/blocks/video.py,sha256=XK-O7XPs5ejTUWrg2FTLvbOZajs-yDtVhR79HSEcxvo,779
177
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=iUdtLGlHe52daXBWVlGghXcGSxCOCDiFASsuKb4_UAM,3225
178
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=1GiebXsYYoQmM1GZJBrzv9dnM9P9dtQJ-dwbQDo6PdI,1010
179
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=NvOuM_1SVBdn-6acYxKJ1ThMuWJ935aYtaKV1TOBTFQ,949
180
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=HPf6Dp7wWWRQ-j7AGFWMm-wkakuJ4R8_rfz9RPNZuec,834
181
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=5nYyh8S9HzFyZb2bZZttSZj8CE0Q2dc73cA27m-H1k4,1067
182
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=0powD6-ZftD0WSHxjTHMQ388RK-WfWLaYuEJIT19mdM,831
183
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=1-Jnx1YyWptNW3737oOzr7WGItS7aNj68BYaWyBRljo,1020
184
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=LV9d6NRLGu7eIr6wpfnkLxgQug107LGAFC1Z9ooc3xw,1069
185
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=1TMFWA5GIb6pJbYAF_Q2ZPOWNa9y0p8ELk6UzLMZ2FY,926
186
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=MvZgXTBx1uS-vn9vITFCIRPFnV26_rcao1YhBiZu5bU,864
187
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=WnEJyDmEaI39ETopeoXwFp6Iog-rF2TYWG3o-DRv2ic,1917
188
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=LbrTryg5z8Q1dLekxwXi_qCvOwH6n_LD4iVYsVBNGB4,1056
189
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=LXNbhBo53Lf7jM_bLYgarwLao7ymEJjZhJ-U5xMBbLc,1184
190
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=NoIVkd31mvvZYa0DOkE4JibJdhJjIEL8C78jNxcxtVo,909
191
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=e4zQmjtP2w1RNfKDaWu2-AOhwjM_QP_TrjyxAQUnpN4,1528
192
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=EeWvzLx9zYT3crH_eshAOf1L60k5gMqJUqQGiI-is7c,1171
193
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=xyaA2yIvDfe-tu_TXi6ZjCz2jCawFS2DVKvfSr-nxjQ,1277
194
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=Qy-t9kmJq5cZKohxNX2DbvIftX95CvDX6JMDVWzVYl4,1794
195
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=GneqbmGaBqOecoApH6I4YWf5flvuVk2m5wMJrRw7Nvo,2044
196
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=4Tnfxxx_Nr-Wkwd7urotp1xTGO9itWe8qI3OnfooLrE,1023
197
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=i95msSj2-l3yG8tw02dIChADDEy695MWmNWXJq0jGD4,1173
198
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=azWD7VwkYlI4vFYgsuMvr2618skr4MCtagSTuJ509bg,873
199
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=bge_vjWUYh9-CJ_gmVg7ESVLAxHeq_wicp-6vlqNRwQ,2367
177
+ unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=6kUXmCI58R1e50b1U-_xqrrPw3g2Mqtbt02aC7DVAxw,4118
178
+ unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=uS2B4nQ-ISt8QGxw7nNwst8MX5xRTecSvqokZ23DKyA,1048
179
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=RcpjFijEwyuGrPhSjrXT1nxaLoX2mnCvjveZ0f5Ke3c,987
180
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=eJjkK1nKb0-Ohi4lpCplbrUTkCgf4D2gWbFxEhDI_G8,872
181
+ unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=DECAkkhR6qQ-WKsOzQf2VPdYGcyrnAJNk4y4JHDVDuc,1105
182
+ unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=yOHIV_fpF9xzqcrkRIC4cF_aC8C7RsJJvRtEgSn30a8,869
183
+ unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=5_6FpFWoKNriaBRLtNDRUxu1ZO1UTvAFeu4H55VNY68,1058
184
+ unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=zcTeVuXpmuMNh4FHJHW5zgKWAqo0Wx7s9UsSEvA_wR8,1107
185
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=WpWlXz9AwS1rugpvoDoVOo055dVEAt3XmvudD17HJu8,963
186
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=kvmMmEXj3WndR9BG9MHwuM40luA4XhGfnF6rKDpYiF0,902
187
+ unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=LSDfUXpgUox7Z77_TKIlKHqYPUgO8Y06lVgvju6NXx8,1955
188
+ unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=y4ocq8_yX_yKkAtM3qcqIueM9y96-47gshM2mra_tgw,1094
189
+ unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=HBTbqw1L1h8XHKuuS0e0aoz0dAZXSLDy7zRwM1_rRps,1222
190
+ unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=mBEcwNLCI-FLU6t2FqR_tNTvrJFIQ7hqYeTB51HavBc,947
191
+ unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=fYYyzBhi1fmXrhdxu6W6uMr2e6HaDCfrvY7yZIFvgmM,1566
192
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=D2y-98U7MP-YmsrAPeT4vqG3m7HB4zoOzMMhhYN8VHY,1209
193
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=r8UXCW7Y-eE5W6RpXiyyMszCMRDtiwBmYOmYHZ_9-VY,1315
194
+ unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=-UAIuddoyKol45epuOYNlS8dchuwL0wMGwash4BwuH4,1794
195
+ unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=kUFZsWGQZAApEbs5qI37t8LPN0vUM5vcu4pPbEvIGkE,2082
196
+ unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=yd1vPbCBgGIbtUsC3zOu3-Cdpcst0dEkuFVdtS97hxA,1061
197
+ unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=e8WslwVD6ccf4x_3NihX4BWtH7y4zMAFH7Ur4jS3dH8,1211
198
+ unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=Bxu8x7mmH28l2nQSaAmygal8dZdUdHEFbUYIk75B0iQ,911
199
+ unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=B9J4zXHVJzzIcn_QD04Z9eibEij1ornw5RhZ2qmUdDU,2405
200
200
  unstructured_ingest/processes/connectors/qdrant/__init__.py,sha256=7WN_3M3qQ0O7pUJSXIKtPqAvKX2tQ_WxClCHbFeqPfc,757
201
201
  unstructured_ingest/processes/connectors/qdrant/cloud.py,sha256=H5Plp2xqFheESLertj56o78CL4exyCQhBDE1TGAzcWU,1618
202
202
  unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRiIHMPcctKyVBdsaLi8KXloAwq76o,1582
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.21.dist-info/METADATA,sha256=lYMmxWJ0ySauI_NWrAQo4YZQ7pXAK4bZ0dX0XIsgacE,8694
235
- unstructured_ingest-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.21.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.21.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.23.dist-info/METADATA,sha256=b0LZ3XzhlhUgDZd4mEUPxxhOT-lqKAOnDfiQeJhCgoA,8691
235
+ unstructured_ingest-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.23.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.23.dist-info/RECORD,,