unstructured-ingest 1.0.21__py3-none-any.whl → 1.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/mixedbreadai.py +28 -45
  3. unstructured_ingest/processes/connectors/jira.py +197 -191
  4. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
  5. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
  6. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
  7. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
  8. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
  9. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
  10. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
  11. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
  12. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
  13. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
  14. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
  15. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
  16. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
  17. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
  18. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
  19. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
  20. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
  21. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
  22. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
  23. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
  24. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
  25. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
  26. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
  27. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/METADATA +2 -2
  28. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/RECORD +31 -31
  29. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/WHEEL +0 -0
  30. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/entry_points.txt +0 -0
  31. {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/licenses/LICENSE.md +0 -0
@@ -1 +1 @@
1
- __version__ = "1.0.21" # pragma: no cover
1
+ __version__ = "1.0.24" # pragma: no cover
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
19
19
 
20
20
 
21
21
  if TYPE_CHECKING:
22
- from mixedbread_ai.client import AsyncMixedbreadAI, MixedbreadAI
23
- from mixedbread_ai.core import RequestOptions
22
+ from mixedbread import AsyncMixedbread, Mixedbread
24
23
 
25
24
 
26
25
  class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
44
43
  )
45
44
 
46
45
  @requires_dependencies(
47
- ["mixedbread_ai"],
48
- extras="mixedbreadai",
46
+ ["mixedbread"],
47
+ extras="embed-mixedbreadai",
49
48
  )
50
- def get_client(self) -> "MixedbreadAI":
49
+ def get_client(self) -> "Mixedbread":
51
50
  """
52
51
  Create the Mixedbread AI client.
53
52
 
54
53
  Returns:
55
- MixedbreadAI: Initialized client.
54
+ Mixedbread: Initialized client.
56
55
  """
57
- from mixedbread_ai.client import MixedbreadAI
56
+ from mixedbread import Mixedbread
58
57
 
59
- return MixedbreadAI(
58
+ return Mixedbread(
60
59
  api_key=self.api_key.get_secret_value(),
60
+ max_retries=MAX_RETRIES,
61
61
  )
62
62
 
63
63
  @requires_dependencies(
64
- ["mixedbread_ai"],
65
- extras="mixedbreadai",
64
+ ["mixedbread"],
65
+ extras="embed-mixedbreadai",
66
66
  )
67
- def get_async_client(self) -> "AsyncMixedbreadAI":
68
- from mixedbread_ai.client import AsyncMixedbreadAI
67
+ def get_async_client(self) -> "AsyncMixedbread":
68
+ from mixedbread import AsyncMixedbread
69
69
 
70
- return AsyncMixedbreadAI(
70
+ return AsyncMixedbread(
71
71
  api_key=self.api_key.get_secret_value(),
72
+ max_retries=MAX_RETRIES,
72
73
  )
73
74
 
74
75
 
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
88
89
  return self.embed_query(query="Q")
89
90
 
90
91
  @requires_dependencies(
91
- ["mixedbread_ai"],
92
+ ["mixedbread"],
92
93
  extras="embed-mixedbreadai",
93
94
  )
94
- def get_request_options(self) -> "RequestOptions":
95
- from mixedbread_ai.core import RequestOptions
96
-
97
- return RequestOptions(
98
- max_retries=MAX_RETRIES,
99
- timeout_in_seconds=TIMEOUT,
100
- additional_headers={"User-Agent": USER_AGENT},
101
- )
102
-
103
- def get_client(self) -> "MixedbreadAI":
95
+ def get_client(self) -> "Mixedbread":
104
96
  return self.config.get_client()
105
97
 
106
- def embed_batch(self, client: "MixedbreadAI", batch: list[str]) -> list[list[float]]:
107
- response = client.embeddings(
98
+ def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
99
+ response = client.embed(
108
100
  model=self.config.embedder_model_name,
101
+ input=batch,
109
102
  normalized=True,
110
103
  encoding_format=ENCODING_FORMAT,
111
- truncation_strategy=TRUNCATION_STRATEGY,
112
- request_options=self.get_request_options(),
113
- input=batch,
104
+ extra_headers={"User-Agent": USER_AGENT},
105
+ timeout=TIMEOUT,
114
106
  )
115
107
  return [datum.embedding for datum in response.data]
116
108
 
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
124
116
  return await self.embed_query(query="Q")
125
117
 
126
118
  @requires_dependencies(
127
- ["mixedbread_ai"],
119
+ ["mixedbread"],
128
120
  extras="embed-mixedbreadai",
129
121
  )
130
- def get_request_options(self) -> "RequestOptions":
131
- from mixedbread_ai.core import RequestOptions
132
-
133
- return RequestOptions(
134
- max_retries=MAX_RETRIES,
135
- timeout_in_seconds=TIMEOUT,
136
- additional_headers={"User-Agent": USER_AGENT},
137
- )
138
-
139
- def get_client(self) -> "AsyncMixedbreadAI":
122
+ def get_client(self) -> "AsyncMixedbread":
140
123
  return self.config.get_async_client()
141
124
 
142
- async def embed_batch(self, client: "AsyncMixedbreadAI", batch: list[str]) -> list[list[float]]:
143
- response = await client.embeddings(
125
+ async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
126
+ response = await client.embed(
144
127
  model=self.config.embedder_model_name,
128
+ input=batch,
145
129
  normalized=True,
146
130
  encoding_format=ENCODING_FORMAT,
147
- truncation_strategy=TRUNCATION_STRATEGY,
148
- request_options=self.get_request_options(),
149
- input=batch,
131
+ extra_headers={"User-Agent": USER_AGENT},
132
+ timeout=TIMEOUT,
150
133
  )
151
134
  return [datum.embedding for datum in response.data]
@@ -1,11 +1,11 @@
1
- import math
2
1
  from collections import abc
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
5
4
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
5
+ from time import time
6
+ from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union
7
7
 
8
- from pydantic import Field, Secret
8
+ from pydantic import BaseModel, Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import (
11
11
  FileData,
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
21
21
  DownloadResponse,
22
22
  Indexer,
23
23
  IndexerConfig,
24
+ download_responses,
24
25
  )
25
26
  from unstructured_ingest.logger import logger
26
27
  from unstructured_ingest.processes.connector_registry import (
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
37
38
  DEFAULT_R_SEP = "\n"
38
39
 
39
40
 
40
- @dataclass
41
- class JiraIssueMetadata:
41
+ class JiraIssueMetadata(BaseModel):
42
42
  id: str
43
43
  key: str
44
- board_id: Optional[str] = None
45
44
 
46
- @property
47
- def project_id(self) -> str:
45
+ def get_project_id(self) -> str:
48
46
  return self.key.split("-")[0]
49
47
 
50
- def to_dict(self) -> Dict[str, Union[str, None]]:
51
- return {
52
- "id": self.id,
53
- "key": self.key,
54
- "board_id": self.board_id,
55
- "project_id": self.project_id,
56
- }
57
-
58
48
 
59
49
  class FieldGetter(dict):
60
50
  def __getitem__(self, key):
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
77
67
  return obj
78
68
 
79
69
 
80
- def issues_fetcher_wrapper(func, results_key="results", number_of_issues_to_fetch: int = 100):
81
- """
82
- A decorator function that wraps around a function to fetch issues from Jira API in a paginated
83
- manner. This is required because the Jira API has a limit of 100 issues per request.
84
-
85
- Args:
86
- func (callable): The function to be wrapped. This function should accept `limit` and `start`
87
- as keyword arguments.
88
- results_key (str, optional): The key in the response dictionary that contains the list of
89
- results. Defaults to "results".
90
- number_of_issues_to_fetch (int, optional): The total number of issues to fetch. Defaults to
91
- 100.
92
-
93
- Returns:
94
- list: A list of all fetched issues.
95
-
96
- Raises:
97
- KeyError: If the response dictionary does not contain the specified `results_key`.
98
- TypeError: If the response type from the Jira API is neither list nor dict.
99
- """
100
-
101
- def wrapper(*args, **kwargs) -> list:
102
- kwargs["limit"] = min(100, number_of_issues_to_fetch)
103
- kwargs["start"] = kwargs.get("start", 0)
104
-
105
- all_results = []
106
- num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
107
-
108
- for _ in range(num_iterations):
109
- response = func(*args, **kwargs)
110
- if isinstance(response, list):
111
- all_results += response
112
- elif isinstance(response, dict):
113
- if results_key not in response:
114
- raise KeyError(f'Response object is missing "{results_key}" key.')
115
- all_results += response[results_key]
116
- else:
117
- raise TypeError(
118
- f"""Unexpected response type from Jira API.
119
- Response type has to be either list or dict, got: {type(response).__name__}."""
120
- )
121
- kwargs["start"] += kwargs["limit"]
122
-
123
- return all_results
124
-
125
- return wrapper
70
+ def api_token_based_generator(
71
+ fn: Callable, key: str = "issues", **kwargs
72
+ ) -> Generator[dict, None, None]:
73
+ nextPageToken = kwargs.pop("nextPageToken", None)
74
+ while True:
75
+ resp = fn(nextPageToken=nextPageToken, **kwargs)
76
+ issues = resp.get(key, [])
77
+ for issue in issues:
78
+ yield issue
79
+ nextPageToken = resp.get("nextPageToken")
80
+ if not nextPageToken:
81
+ break
82
+
83
+
84
+ def api_page_based_generator(
85
+ fn: Callable, key: str = "issues", **kwargs
86
+ ) -> Generator[dict, None, None]:
87
+ start = kwargs.pop("start", 0)
88
+ while True:
89
+ resp = fn(start=start, **kwargs)
90
+ issues = resp.get(key, [])
91
+ if not issues:
92
+ break
93
+ for issue in issues:
94
+ yield issue
95
+ start += len(issues)
126
96
 
127
97
 
128
98
  class JiraAccessConfig(AccessConfig):
@@ -169,28 +139,8 @@ class JiraConnectionConfig(ConnectionConfig):
169
139
  def get_client(self) -> Generator["Jira", None, None]:
170
140
  from atlassian import Jira
171
141
 
172
- class CustomJira(Jira):
173
- """
174
- Custom Jira class to fix the issue with the get_project_issues_count method.
175
- This class inherits from the original Jira class and overrides the method to
176
- handle the response correctly.
177
- Once the issue is fixed in the original library, this class can be removed.
178
- """
179
-
180
- def __init__(self, *args, **kwargs):
181
- super().__init__(*args, **kwargs)
182
-
183
- def get_project_issues_count(self, project: str) -> int:
184
- jql = f'project = "{project}" '
185
- response = self.jql(jql, fields="*none")
186
- response = cast("dict", response)
187
- if "total" in response:
188
- return response["total"]
189
- else:
190
- return len(response["issues"])
191
-
192
142
  access_configs = self.access_config.get_secret_value()
193
- with CustomJira(
143
+ with Jira(
194
144
  url=self.url,
195
145
  username=self.username,
196
146
  password=access_configs.password,
@@ -201,9 +151,17 @@ class JiraConnectionConfig(ConnectionConfig):
201
151
 
202
152
 
203
153
  class JiraIndexerConfig(IndexerConfig):
204
- projects: Optional[List[str]] = Field(None, description="List of project keys")
205
- boards: Optional[List[str]] = Field(None, description="List of board IDs")
206
- issues: Optional[List[str]] = Field(None, description="List of issue keys or IDs")
154
+ projects: Optional[list[str]] = Field(None, description="List of project keys")
155
+ boards: Optional[list[str]] = Field(None, description="List of board IDs")
156
+ issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
157
+ status_filters: Optional[list[str]] = Field(
158
+ default=None,
159
+ description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
160
+ )
161
+
162
+ def model_post_init(self, context: Any, /) -> None:
163
+ if not self.projects and not self.boards and not self.issues:
164
+ raise ValueError("At least one of projects, boards, or issues must be provided.")
207
165
 
208
166
 
209
167
  @dataclass
@@ -228,122 +186,111 @@ class JiraIndexer(Indexer):
228
186
  )
229
187
  logger.info("Connection to Jira successful.")
230
188
 
231
- def _get_issues_within_single_project(self, project_key: str) -> List[JiraIssueMetadata]:
189
+ def run_jql(self, jql: str, **kwargs) -> Generator[JiraIssueMetadata, None, None]:
232
190
  with self.connection_config.get_client() as client:
233
- number_of_issues_to_fetch = client.get_project_issues_count(project=project_key)
234
- if isinstance(number_of_issues_to_fetch, dict):
235
- if "total" not in number_of_issues_to_fetch:
236
- raise KeyError('Response object is missing "total" key.')
237
- number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
238
- if not number_of_issues_to_fetch:
239
- logger.warning(f"No issues found in project: {project_key}. Skipping!")
240
- return []
241
- get_project_issues = issues_fetcher_wrapper(
242
- client.get_all_project_issues,
243
- results_key="issues",
244
- number_of_issues_to_fetch=number_of_issues_to_fetch,
245
- )
246
- issues = get_project_issues(project=project_key, fields=["key", "id"])
247
- logger.debug(f"Found {len(issues)} issues in project: {project_key}")
248
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
249
-
250
- def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
251
- project_keys = self.index_config.projects
252
- if not project_keys:
253
- # for when a component list is provided, without any projects
254
- if self.index_config.boards or self.index_config.issues:
255
- return []
256
- # for when no components are provided. all projects will be ingested
191
+ if client.cloud:
192
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, **kwargs):
193
+ yield JiraIssueMetadata.model_validate(issue)
257
194
  else:
258
- with self.connection_config.get_client() as client:
259
- project_keys = [project["key"] for project in client.projects()]
260
- return [
261
- issue
262
- for project_key in project_keys
263
- for issue in self._get_issues_within_single_project(project_key)
264
- ]
195
+ for issue in api_page_based_generator(client.jql, jql=jql, **kwargs):
196
+ yield JiraIssueMetadata.model_validate(issue)
197
+
198
+ def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
199
+ fields = ["key", "id", "status"]
200
+ jql = "project in ({})".format(", ".join(self.index_config.projects))
201
+ jql = self._update_jql(jql)
202
+ logger.debug(f"running jql: {jql}")
203
+ return self.run_jql(jql=jql, fields=fields)
265
204
 
266
205
  def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
267
206
  with self.connection_config.get_client() as client:
268
- get_board_issues = issues_fetcher_wrapper(
269
- client.get_issues_for_board,
270
- results_key="issues",
271
- )
272
- issues = get_board_issues(board_id=board_id, fields=["key", "id"], jql=None)
273
- logger.debug(f"Found {len(issues)} issues in board: {board_id}")
274
- return [
275
- JiraIssueMetadata(id=issue["id"], key=issue["key"], board_id=board_id)
276
- for issue in issues
277
- ]
278
-
279
- def _get_issues_within_boards(self) -> List[JiraIssueMetadata]:
207
+ fields = ["key", "id"]
208
+ if self.index_config.status_filters:
209
+ jql = "status in ({}) ORDER BY id".format(
210
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
211
+ )
212
+ else:
213
+ jql = "ORDER BY id"
214
+ logger.debug(f"running jql for board {board_id}: {jql}")
215
+ for issue in api_page_based_generator(
216
+ fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
217
+ ):
218
+ yield JiraIssueMetadata.model_validate(issue)
219
+
220
+ def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
280
221
  if not self.index_config.boards:
281
- return []
282
- return [
283
- issue
284
- for board_id in self.index_config.boards
285
- for issue in self._get_issues_within_single_board(board_id)
286
- ]
287
-
288
- def _get_issues(self) -> List[JiraIssueMetadata]:
289
- with self.connection_config.get_client() as client:
290
- issues = [
291
- client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
292
- for issue_key in self.index_config.issues or []
293
- ]
294
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
295
-
296
- def get_issues(self) -> List[JiraIssueMetadata]:
297
- issues = [
298
- *self._get_issues_within_boards(),
299
- *self._get_issues_within_projects(),
300
- *self._get_issues(),
301
- ]
302
- # Select unique issues by issue 'id'.
303
- # Since boards issues are fetched first,
304
- # if there are duplicates, the board issues will be kept,
305
- # in order to keep issue 'board_id' information.
306
- seen = set()
307
- unique_issues: List[JiraIssueMetadata] = []
308
- for issue in issues:
309
- if issue.id not in seen:
310
- unique_issues.append(issue)
311
- seen.add(issue.id)
312
- return unique_issues
313
-
314
- def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
315
- from time import time
316
-
317
- issues = self.get_issues()
318
- for issue in issues:
319
- # Build metadata
320
- metadata = FileDataSourceMetadata(
321
- date_processed=str(time()),
322
- record_locator=issue.to_dict(),
222
+ yield
223
+ for board_id in self.index_config.boards:
224
+ for issue in self._get_issues_within_single_board(board_id=board_id):
225
+ yield issue
226
+
227
+ def _update_jql(self, jql: str) -> str:
228
+ if self.index_config.status_filters:
229
+ jql += " and status in ({})".format(
230
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
323
231
  )
232
+ jql = jql + " ORDER BY id"
233
+ return jql
234
+
235
+ def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
236
+ fields = ["key", "id"]
237
+ jql = "key in ({})".format(", ".join(self.index_config.issues))
238
+ jql = self._update_jql(jql)
239
+ logger.debug(f"running jql: {jql}")
240
+ return self.run_jql(jql=jql, fields=fields)
241
+
242
+ def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
243
+ # Build metadata
244
+ metadata = FileDataSourceMetadata(
245
+ date_processed=str(time()),
246
+ record_locator=issue.model_dump(),
247
+ )
324
248
 
325
- # Construct relative path and filename
326
- filename = f"{issue.id}.txt"
327
- relative_path = str(Path(issue.project_id) / filename)
249
+ # Construct relative path and filename
250
+ filename = f"{issue.id}.txt"
251
+ relative_path = str(Path(issue.get_project_id()) / filename)
328
252
 
329
- source_identifiers = SourceIdentifiers(
330
- filename=filename,
331
- fullpath=relative_path,
332
- rel_path=relative_path,
333
- )
253
+ source_identifiers = SourceIdentifiers(
254
+ filename=filename,
255
+ fullpath=relative_path,
256
+ rel_path=relative_path,
257
+ )
334
258
 
335
- file_data = FileData(
336
- identifier=issue.id,
337
- connector_type=self.connector_type,
338
- metadata=metadata,
339
- additional_metadata=issue.to_dict(),
340
- source_identifiers=source_identifiers,
341
- )
342
- yield file_data
259
+ file_data = FileData(
260
+ identifier=issue.id,
261
+ connector_type=self.connector_type,
262
+ metadata=metadata,
263
+ additional_metadata=issue.model_dump(),
264
+ source_identifiers=source_identifiers,
265
+ )
266
+ return file_data
267
+
268
+ def get_generators(self) -> List[Callable]:
269
+ generators = []
270
+ if self.index_config.boards:
271
+ generators.append(self._get_issues_within_boards)
272
+ if self.index_config.issues:
273
+ generators.append(self._get_issues_by_keys)
274
+ if self.index_config.projects:
275
+ generators.append(self._get_issues_within_projects)
276
+ return generators
277
+
278
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
279
+ seen_keys = []
280
+ for gen in self.get_generators():
281
+ for issue in gen():
282
+ if not issue:
283
+ continue
284
+ if issue.key in seen_keys:
285
+ continue
286
+ seen_keys.append(issue.key)
287
+ yield self._create_file_data_from_issue(issue=issue)
343
288
 
344
289
 
345
290
  class JiraDownloaderConfig(DownloaderConfig):
346
- pass
291
+ download_attachments: bool = Field(
292
+ default=False, description="If True, will download any attachments and process as well"
293
+ )
347
294
 
348
295
 
349
296
  @dataclass
@@ -448,7 +395,56 @@ class JiraDownloader(Downloader):
448
395
  logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
449
396
  raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
450
397
 
451
- def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
398
+ def generate_attachment_file_data(
399
+ self, attachment_dict: dict, parent_filedata: FileData
400
+ ) -> FileData:
401
+ new_filedata = parent_filedata.model_copy(deep=True)
402
+ if new_filedata.metadata.record_locator is None:
403
+ new_filedata.metadata.record_locator = {}
404
+ new_filedata.metadata.record_locator["parent_issue"] = (
405
+ parent_filedata.metadata.record_locator["id"]
406
+ )
407
+ # Append an identifier for attachment to not conflict with issue ids
408
+ new_filedata.identifier = "{}a".format(attachment_dict["id"])
409
+ filename = attachment_dict["filename"]
410
+ new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
411
+ new_filedata.metadata.date_created = attachment_dict.pop("created", None)
412
+ new_filedata.metadata.url = attachment_dict.pop("self", None)
413
+ new_filedata.metadata.record_locator = attachment_dict
414
+ new_filedata.source_identifiers = SourceIdentifiers(
415
+ filename=filename,
416
+ fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
417
+ )
418
+ return new_filedata
419
+
420
+ def process_attachments(
421
+ self, file_data: FileData, attachments: list[dict]
422
+ ) -> list[DownloadResponse]:
423
+ with self.connection_config.get_client() as client:
424
+ download_path = self.get_download_path(file_data)
425
+ attachment_download_dir = download_path.parent / "attachments"
426
+ attachment_download_dir.mkdir(parents=True, exist_ok=True)
427
+ download_responses = []
428
+ for attachment in attachments:
429
+ attachment_filename = Path(attachment["filename"])
430
+ attachment_id = attachment["id"]
431
+ attachment_download_path = attachment_download_dir / Path(
432
+ attachment_id
433
+ ).with_suffix(attachment_filename.suffix)
434
+ resp = client.get_attachment_content(attachment_id=attachment_id)
435
+ with open(attachment_download_path, "wb") as f:
436
+ f.write(resp)
437
+ attachment_filedata = self.generate_attachment_file_data(
438
+ attachment_dict=attachment, parent_filedata=file_data
439
+ )
440
+ download_responses.append(
441
+ self.generate_download_response(
442
+ file_data=attachment_filedata, download_path=attachment_download_path
443
+ )
444
+ )
445
+ return download_responses
446
+
447
+ def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
452
448
  issue_key = file_data.additional_metadata.get("key")
453
449
  if not issue_key:
454
450
  raise ValueError("Issue key not found in metadata.")
@@ -463,7 +459,17 @@ class JiraDownloader(Downloader):
463
459
  with open(download_path, "w") as f:
464
460
  f.write(issue_str)
465
461
  self.update_file_data(file_data, issue)
466
- return self.generate_download_response(file_data=file_data, download_path=download_path)
462
+ download_response = self.generate_download_response(
463
+ file_data=file_data, download_path=download_path
464
+ )
465
+ if self.download_config.download_attachments and (
466
+ attachments := issue.get("fields", {}).get("attachment")
467
+ ):
468
+ attachment_responses = self.process_attachments(
469
+ file_data=file_data, attachments=attachments
470
+ )
471
+ download_response = [download_response] + attachment_responses
472
+ return download_response
467
473
 
468
474
 
469
475
  jira_source_entry = SourceRegistryEntry(
@@ -1,5 +1,6 @@
1
1
  from typing import Dict
2
2
 
3
+ from unstructured_ingest.logger import logger
3
4
  from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
4
5
 
5
6
  from .checkbox import Checkbox, CheckboxCell
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
25
26
  from .url import URL, URLCell
26
27
  from .verification import Verification, VerificationCell
27
28
 
29
+ # It's possible to add 'button' property to Notion database.
30
+ # However, current Notion API documentation doesn't mention it.
31
+ # Buttons are only functional inside Notion UI. We can simply
32
+ # ignore them so that the we don't throw an error when trying
33
+ # to map 'button' properties.
34
+ unsupported_db_prop_types = ["button"]
35
+
28
36
  db_prop_type_mapping = {
29
37
  "checkbox": Checkbox,
30
38
  "created_by": CreatedBy,
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
55
63
  mapped_dict = {}
56
64
  for k, v in props.items():
57
65
  try:
58
- mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore
66
+ property_type = v["type"]
67
+ if property_type in unsupported_db_prop_types:
68
+ logger.warning(
69
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
70
+ )
71
+ continue
72
+ mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
59
73
  except KeyError as ke:
60
74
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
61
75
 
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
92
106
  mapped_dict = {}
93
107
  for k, v in props.items():
94
108
  try:
95
- t = v["type"]
96
- mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore
109
+ property_type = v["type"]
110
+ if property_type in unsupported_db_prop_types:
111
+ logger.warning(
112
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
113
+ )
114
+ continue
115
+ mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
97
116
  except KeyError as ke:
98
117
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
99
118
 
@@ -13,6 +13,7 @@ class Checkbox(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "checkbox"
16
+ description: Optional[str] = None
16
17
  checkbox: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -13,6 +13,7 @@ class CreatedBy(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "created_by"
16
+ description: Optional[str] = None
16
17
  created_by: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class CreatedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "created_time"
15
+ description: Optional[str] = None
15
16
  created_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Date(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "date"
16
+ description: Optional[str] = None
16
17
  date: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class Email(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "email"
15
+ description: Optional[str] = None
15
16
  email: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Files(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "files"
16
+ description: Optional[str] = None
16
17
  files: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -26,6 +26,7 @@ class Formula(DBPropertyBase):
26
26
  name: str
27
27
  formula: FormulaProp
28
28
  type: str = "formula"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -23,7 +23,7 @@ class LastEditedByCell(DBCellBase):
23
23
  id: str
24
24
  last_edited_by: People
25
25
  type: str = "last_edited_by"
26
-
26
+ description: Optional[str] = None
27
27
  name: Optional[str] = None
28
28
 
29
29
  @classmethod
@@ -12,6 +12,7 @@ class LastEditedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "last_edited_time"
15
+ description: Optional[str] = None
15
16
  last_edited_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -38,6 +38,7 @@ class MultiSelect(DBPropertyBase):
38
38
  name: str
39
39
  multi_select: MultiSelectProp
40
40
  type: str = "multi_select"
41
+ description: Optional[str] = None
41
42
 
42
43
  @classmethod
43
44
  def from_dict(cls, data: dict):
@@ -26,6 +26,7 @@ class Number(DBPropertyBase):
26
26
  name: str
27
27
  number: NumberProp
28
28
  type: str = "number"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class People(DBPropertyBase):
14
14
  name: str
15
15
  description: Optional[str] = None
16
16
  type: str = "people"
17
+ description: Optional[str] = None
17
18
  people: dict = field(default_factory=dict)
18
19
 
19
20
  @classmethod
@@ -12,6 +12,7 @@ class PhoneNumber(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "phone_number"
15
+ description: Optional[str] = None
15
16
  phone_number: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -45,6 +45,7 @@ class Relation(DBPropertyBase):
45
45
  name: str
46
46
  relation: RelationProp
47
47
  type: str = "relation"
48
+ description: Optional[str] = None
48
49
 
49
50
  @classmethod
50
51
  def from_dict(cls, data: dict):
@@ -15,6 +15,7 @@ class RichText(DBPropertyBase):
15
15
  id: str
16
16
  name: str
17
17
  type: str = "rich_text"
18
+ description: Optional[str] = None
18
19
  rich_text: dict = field(default_factory=dict)
19
20
 
20
21
  @classmethod
@@ -30,6 +30,7 @@ class Rollup(DBPropertyBase):
30
30
  name: str
31
31
  rollup: RollupProp
32
32
  type: str = "rollup"
33
+ description: Optional[str] = None
33
34
 
34
35
  @classmethod
35
36
  def from_dict(cls, data: dict):
@@ -38,8 +38,8 @@ class Select(DBPropertyBase):
38
38
  id: str
39
39
  name: str
40
40
  select: SelectProp
41
- description: Optional[str] = None
42
41
  type: str = "select"
42
+ description: Optional[str] = None
43
43
 
44
44
  @classmethod
45
45
  def from_dict(cls, data: dict):
@@ -55,6 +55,7 @@ class Status(DBPropertyBase):
55
55
  name: str
56
56
  status: StatusProp
57
57
  type: str = "status"
58
+ description: Optional[str] = None
58
59
 
59
60
  @classmethod
60
61
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class Title(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "title"
16
16
  title: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -17,6 +17,7 @@ class UniqueID(DBPropertyBase):
17
17
  name: str
18
18
  type: str = "unique_id"
19
19
  unique_id: dict = field(default_factory=dict)
20
+ description: Optional[str] = None
20
21
 
21
22
  @classmethod
22
23
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class URL(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "url"
16
16
  url: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -20,6 +20,7 @@ class Verification(DBPropertyBase):
20
20
  name: str
21
21
  type: str = "verification"
22
22
  verification: dict = field(default_factory=dict)
23
+ description: Optional[str] = None
23
24
 
24
25
  @classmethod
25
26
  def from_dict(cls, data: dict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.21
3
+ Version: 1.0.24
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
114
114
  Provides-Extra: milvus
115
115
  Requires-Dist: pymilvus; extra == 'milvus'
116
116
  Provides-Extra: mixedbreadai
117
- Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
117
+ Requires-Dist: mixedbread; extra == 'mixedbreadai'
118
118
  Provides-Extra: mongodb
119
119
  Requires-Dist: pymongo; extra == 'mongodb'
120
120
  Provides-Extra: msg
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=_fAo4tbdJV7k_s1lgXUPPmLFVpxbTy7HhoN9KbPxQ4Y,43
2
+ unstructured_ingest/__version__.py,sha256=p1Nz9H4WBA_aI3GL1htUsWwzMmx5t9ktPqeOxmax3ms,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -26,7 +26,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9
26
26
  unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
27
27
  unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
28
28
  unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
29
- unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
29
+ unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
30
30
  unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
31
31
  unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
32
32
  unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
73
73
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
74
74
  unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
75
75
  unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
76
- unstructured_ingest/processes/connectors/jira.py,sha256=alnwUYyID-mUIlGq1xh5QGEw2iZ2RwbOIyptev3dI6Q,18011
76
+ unstructured_ingest/processes/connectors/jira.py,sha256=a7OuVi4RFfr22Tqgk60lwmtWTRBw2fI1m8KPqfA8Ffo,18504
77
77
  unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
78
78
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
79
79
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
@@ -174,29 +174,29 @@ unstructured_ingest/processes/connectors/notion/types/blocks/todo.py,sha256=Kiga
174
174
  unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py,sha256=6ae_eR3SOfUgTw-XO_F3JRBaczSp8UZfLBFMRMO5NHo,1188
175
175
  unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py,sha256=q_p9XH8sQB8xwFqi9yEl6Fvur3fTLdeVdQCh0gSju58,442
176
176
  unstructured_ingest/processes/connectors/notion/types/blocks/video.py,sha256=XK-O7XPs5ejTUWrg2FTLvbOZajs-yDtVhR79HSEcxvo,779
177
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=iUdtLGlHe52daXBWVlGghXcGSxCOCDiFASsuKb4_UAM,3225
178
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=1GiebXsYYoQmM1GZJBrzv9dnM9P9dtQJ-dwbQDo6PdI,1010
179
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=NvOuM_1SVBdn-6acYxKJ1ThMuWJ935aYtaKV1TOBTFQ,949
180
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=HPf6Dp7wWWRQ-j7AGFWMm-wkakuJ4R8_rfz9RPNZuec,834
181
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=5nYyh8S9HzFyZb2bZZttSZj8CE0Q2dc73cA27m-H1k4,1067
182
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=0powD6-ZftD0WSHxjTHMQ388RK-WfWLaYuEJIT19mdM,831
183
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=1-Jnx1YyWptNW3737oOzr7WGItS7aNj68BYaWyBRljo,1020
184
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=LV9d6NRLGu7eIr6wpfnkLxgQug107LGAFC1Z9ooc3xw,1069
185
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=1TMFWA5GIb6pJbYAF_Q2ZPOWNa9y0p8ELk6UzLMZ2FY,926
186
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=MvZgXTBx1uS-vn9vITFCIRPFnV26_rcao1YhBiZu5bU,864
187
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=WnEJyDmEaI39ETopeoXwFp6Iog-rF2TYWG3o-DRv2ic,1917
188
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=LbrTryg5z8Q1dLekxwXi_qCvOwH6n_LD4iVYsVBNGB4,1056
189
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=LXNbhBo53Lf7jM_bLYgarwLao7ymEJjZhJ-U5xMBbLc,1184
190
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=NoIVkd31mvvZYa0DOkE4JibJdhJjIEL8C78jNxcxtVo,909
191
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=e4zQmjtP2w1RNfKDaWu2-AOhwjM_QP_TrjyxAQUnpN4,1528
192
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=EeWvzLx9zYT3crH_eshAOf1L60k5gMqJUqQGiI-is7c,1171
193
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=xyaA2yIvDfe-tu_TXi6ZjCz2jCawFS2DVKvfSr-nxjQ,1277
194
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=Qy-t9kmJq5cZKohxNX2DbvIftX95CvDX6JMDVWzVYl4,1794
195
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=GneqbmGaBqOecoApH6I4YWf5flvuVk2m5wMJrRw7Nvo,2044
196
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=4Tnfxxx_Nr-Wkwd7urotp1xTGO9itWe8qI3OnfooLrE,1023
197
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=i95msSj2-l3yG8tw02dIChADDEy695MWmNWXJq0jGD4,1173
198
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=azWD7VwkYlI4vFYgsuMvr2618skr4MCtagSTuJ509bg,873
199
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=bge_vjWUYh9-CJ_gmVg7ESVLAxHeq_wicp-6vlqNRwQ,2367
177
+ unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=6kUXmCI58R1e50b1U-_xqrrPw3g2Mqtbt02aC7DVAxw,4118
178
+ unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=uS2B4nQ-ISt8QGxw7nNwst8MX5xRTecSvqokZ23DKyA,1048
179
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=RcpjFijEwyuGrPhSjrXT1nxaLoX2mnCvjveZ0f5Ke3c,987
180
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=eJjkK1nKb0-Ohi4lpCplbrUTkCgf4D2gWbFxEhDI_G8,872
181
+ unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=DECAkkhR6qQ-WKsOzQf2VPdYGcyrnAJNk4y4JHDVDuc,1105
182
+ unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=yOHIV_fpF9xzqcrkRIC4cF_aC8C7RsJJvRtEgSn30a8,869
183
+ unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=5_6FpFWoKNriaBRLtNDRUxu1ZO1UTvAFeu4H55VNY68,1058
184
+ unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=zcTeVuXpmuMNh4FHJHW5zgKWAqo0Wx7s9UsSEvA_wR8,1107
185
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=WpWlXz9AwS1rugpvoDoVOo055dVEAt3XmvudD17HJu8,963
186
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=kvmMmEXj3WndR9BG9MHwuM40luA4XhGfnF6rKDpYiF0,902
187
+ unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=LSDfUXpgUox7Z77_TKIlKHqYPUgO8Y06lVgvju6NXx8,1955
188
+ unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=y4ocq8_yX_yKkAtM3qcqIueM9y96-47gshM2mra_tgw,1094
189
+ unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=HBTbqw1L1h8XHKuuS0e0aoz0dAZXSLDy7zRwM1_rRps,1222
190
+ unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=mBEcwNLCI-FLU6t2FqR_tNTvrJFIQ7hqYeTB51HavBc,947
191
+ unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=fYYyzBhi1fmXrhdxu6W6uMr2e6HaDCfrvY7yZIFvgmM,1566
192
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=D2y-98U7MP-YmsrAPeT4vqG3m7HB4zoOzMMhhYN8VHY,1209
193
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=r8UXCW7Y-eE5W6RpXiyyMszCMRDtiwBmYOmYHZ_9-VY,1315
194
+ unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=-UAIuddoyKol45epuOYNlS8dchuwL0wMGwash4BwuH4,1794
195
+ unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=kUFZsWGQZAApEbs5qI37t8LPN0vUM5vcu4pPbEvIGkE,2082
196
+ unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=yd1vPbCBgGIbtUsC3zOu3-Cdpcst0dEkuFVdtS97hxA,1061
197
+ unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=e8WslwVD6ccf4x_3NihX4BWtH7y4zMAFH7Ur4jS3dH8,1211
198
+ unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=Bxu8x7mmH28l2nQSaAmygal8dZdUdHEFbUYIk75B0iQ,911
199
+ unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=B9J4zXHVJzzIcn_QD04Z9eibEij1ornw5RhZ2qmUdDU,2405
200
200
  unstructured_ingest/processes/connectors/qdrant/__init__.py,sha256=7WN_3M3qQ0O7pUJSXIKtPqAvKX2tQ_WxClCHbFeqPfc,757
201
201
  unstructured_ingest/processes/connectors/qdrant/cloud.py,sha256=H5Plp2xqFheESLertj56o78CL4exyCQhBDE1TGAzcWU,1618
202
202
  unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRiIHMPcctKyVBdsaLi8KXloAwq76o,1582
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.21.dist-info/METADATA,sha256=lYMmxWJ0ySauI_NWrAQo4YZQ7pXAK4bZ0dX0XIsgacE,8694
235
- unstructured_ingest-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.21.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.21.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.24.dist-info/METADATA,sha256=Ssmaf7onq6HIFmhR7f2mMPoS2gqGy6dmvxo605W_dWU,8691
235
+ unstructured_ingest-1.0.24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.24.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.24.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.24.dist-info/RECORD,,