unstructured-ingest 1.0.19__py3-none-any.whl → 1.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (32) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/mixedbreadai.py +28 -45
  3. unstructured_ingest/processes/connectors/jira.py +209 -171
  4. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
  5. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
  6. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
  7. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
  8. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
  9. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
  10. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
  11. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
  12. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
  13. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
  14. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
  15. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
  16. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
  17. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
  18. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
  19. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
  20. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
  21. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
  22. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
  23. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
  24. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
  25. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
  26. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
  27. unstructured_ingest/processes/connectors/weaviate/weaviate.py +30 -13
  28. {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/METADATA +2 -2
  29. {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/RECORD +32 -32
  30. {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/WHEEL +0 -0
  31. {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/entry_points.txt +0 -0
  32. {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/licenses/LICENSE.md +0 -0
@@ -1 +1 @@
1
- __version__ = "1.0.19" # pragma: no cover
1
+ __version__ = "1.0.23" # pragma: no cover
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
19
19
 
20
20
 
21
21
  if TYPE_CHECKING:
22
- from mixedbread_ai.client import AsyncMixedbreadAI, MixedbreadAI
23
- from mixedbread_ai.core import RequestOptions
22
+ from mixedbread import AsyncMixedbread, Mixedbread
24
23
 
25
24
 
26
25
  class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
44
43
  )
45
44
 
46
45
  @requires_dependencies(
47
- ["mixedbread_ai"],
48
- extras="mixedbreadai",
46
+ ["mixedbread"],
47
+ extras="embed-mixedbreadai",
49
48
  )
50
- def get_client(self) -> "MixedbreadAI":
49
+ def get_client(self) -> "Mixedbread":
51
50
  """
52
51
  Create the Mixedbread AI client.
53
52
 
54
53
  Returns:
55
- MixedbreadAI: Initialized client.
54
+ Mixedbread: Initialized client.
56
55
  """
57
- from mixedbread_ai.client import MixedbreadAI
56
+ from mixedbread import Mixedbread
58
57
 
59
- return MixedbreadAI(
58
+ return Mixedbread(
60
59
  api_key=self.api_key.get_secret_value(),
60
+ max_retries=MAX_RETRIES,
61
61
  )
62
62
 
63
63
  @requires_dependencies(
64
- ["mixedbread_ai"],
65
- extras="mixedbreadai",
64
+ ["mixedbread"],
65
+ extras="embed-mixedbreadai",
66
66
  )
67
- def get_async_client(self) -> "AsyncMixedbreadAI":
68
- from mixedbread_ai.client import AsyncMixedbreadAI
67
+ def get_async_client(self) -> "AsyncMixedbread":
68
+ from mixedbread import AsyncMixedbread
69
69
 
70
- return AsyncMixedbreadAI(
70
+ return AsyncMixedbread(
71
71
  api_key=self.api_key.get_secret_value(),
72
+ max_retries=MAX_RETRIES,
72
73
  )
73
74
 
74
75
 
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
88
89
  return self.embed_query(query="Q")
89
90
 
90
91
  @requires_dependencies(
91
- ["mixedbread_ai"],
92
+ ["mixedbread"],
92
93
  extras="embed-mixedbreadai",
93
94
  )
94
- def get_request_options(self) -> "RequestOptions":
95
- from mixedbread_ai.core import RequestOptions
96
-
97
- return RequestOptions(
98
- max_retries=MAX_RETRIES,
99
- timeout_in_seconds=TIMEOUT,
100
- additional_headers={"User-Agent": USER_AGENT},
101
- )
102
-
103
- def get_client(self) -> "MixedbreadAI":
95
+ def get_client(self) -> "Mixedbread":
104
96
  return self.config.get_client()
105
97
 
106
- def embed_batch(self, client: "MixedbreadAI", batch: list[str]) -> list[list[float]]:
107
- response = client.embeddings(
98
+ def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
99
+ response = client.embed(
108
100
  model=self.config.embedder_model_name,
101
+ input=batch,
109
102
  normalized=True,
110
103
  encoding_format=ENCODING_FORMAT,
111
- truncation_strategy=TRUNCATION_STRATEGY,
112
- request_options=self.get_request_options(),
113
- input=batch,
104
+ extra_headers={"User-Agent": USER_AGENT},
105
+ timeout=TIMEOUT,
114
106
  )
115
107
  return [datum.embedding for datum in response.data]
116
108
 
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
124
116
  return await self.embed_query(query="Q")
125
117
 
126
118
  @requires_dependencies(
127
- ["mixedbread_ai"],
119
+ ["mixedbread"],
128
120
  extras="embed-mixedbreadai",
129
121
  )
130
- def get_request_options(self) -> "RequestOptions":
131
- from mixedbread_ai.core import RequestOptions
132
-
133
- return RequestOptions(
134
- max_retries=MAX_RETRIES,
135
- timeout_in_seconds=TIMEOUT,
136
- additional_headers={"User-Agent": USER_AGENT},
137
- )
138
-
139
- def get_client(self) -> "AsyncMixedbreadAI":
122
+ def get_client(self) -> "AsyncMixedbread":
140
123
  return self.config.get_async_client()
141
124
 
142
- async def embed_batch(self, client: "AsyncMixedbreadAI", batch: list[str]) -> list[list[float]]:
143
- response = await client.embeddings(
125
+ async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
126
+ response = await client.embed(
144
127
  model=self.config.embedder_model_name,
128
+ input=batch,
145
129
  normalized=True,
146
130
  encoding_format=ENCODING_FORMAT,
147
- truncation_strategy=TRUNCATION_STRATEGY,
148
- request_options=self.get_request_options(),
149
- input=batch,
131
+ extra_headers={"User-Agent": USER_AGENT},
132
+ timeout=TIMEOUT,
150
133
  )
151
134
  return [datum.embedding for datum in response.data]
@@ -1,11 +1,11 @@
1
- import math
2
1
  from collections import abc
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
5
4
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union
5
+ from time import time
6
+ from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union, cast
7
7
 
8
- from pydantic import Field, Secret
8
+ from pydantic import BaseModel, Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import (
11
11
  FileData,
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
21
21
  DownloadResponse,
22
22
  Indexer,
23
23
  IndexerConfig,
24
+ download_responses,
24
25
  )
25
26
  from unstructured_ingest.logger import logger
26
27
  from unstructured_ingest.processes.connector_registry import (
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
37
38
  DEFAULT_R_SEP = "\n"
38
39
 
39
40
 
40
- @dataclass
41
- class JiraIssueMetadata:
41
+ class JiraIssueMetadata(BaseModel):
42
42
  id: str
43
43
  key: str
44
- board_id: Optional[str] = None
45
44
 
46
- @property
47
- def project_id(self) -> str:
45
+ def get_project_id(self) -> str:
48
46
  return self.key.split("-")[0]
49
47
 
50
- def to_dict(self) -> Dict[str, Union[str, None]]:
51
- return {
52
- "id": self.id,
53
- "key": self.key,
54
- "board_id": self.board_id,
55
- "project_id": self.project_id,
56
- }
57
-
58
48
 
59
49
  class FieldGetter(dict):
60
50
  def __getitem__(self, key):
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
77
67
  return obj
78
68
 
79
69
 
80
- def issues_fetcher_wrapper(func, results_key="results", number_of_issues_to_fetch: int = 100):
81
- """
82
- A decorator function that wraps around a function to fetch issues from Jira API in a paginated
83
- manner. This is required because the Jira API has a limit of 100 issues per request.
84
-
85
- Args:
86
- func (callable): The function to be wrapped. This function should accept `limit` and `start`
87
- as keyword arguments.
88
- results_key (str, optional): The key in the response dictionary that contains the list of
89
- results. Defaults to "results".
90
- number_of_issues_to_fetch (int, optional): The total number of issues to fetch. Defaults to
91
- 100.
92
-
93
- Returns:
94
- list: A list of all fetched issues.
95
-
96
- Raises:
97
- KeyError: If the response dictionary does not contain the specified `results_key`.
98
- TypeError: If the response type from the Jira API is neither list nor dict.
99
- """
100
-
101
- def wrapper(*args, **kwargs) -> list:
102
- kwargs["limit"] = min(100, number_of_issues_to_fetch)
103
- kwargs["start"] = kwargs.get("start", 0)
104
-
105
- all_results = []
106
- num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
107
-
108
- for _ in range(num_iterations):
109
- response = func(*args, **kwargs)
110
- if isinstance(response, list):
111
- all_results += response
112
- elif isinstance(response, dict):
113
- if results_key not in response:
114
- raise KeyError(f'Response object is missing "{results_key}" key.')
115
- all_results += response[results_key]
116
- else:
117
- raise TypeError(
118
- f"""Unexpected response type from Jira API.
119
- Response type has to be either list or dict, got: {type(response).__name__}."""
120
- )
121
- kwargs["start"] += kwargs["limit"]
122
-
123
- return all_results
124
-
125
- return wrapper
70
+ def api_token_based_generator(
71
+ fn: Callable, key: str = "issues", **kwargs
72
+ ) -> Generator[dict, None, None]:
73
+ nextPageToken = kwargs.pop("nextPageToken", None)
74
+ while True:
75
+ resp = fn(nextPageToken=nextPageToken, **kwargs)
76
+ issues = resp.get(key, [])
77
+ for issue in issues:
78
+ yield issue
79
+ nextPageToken = resp.get("nextPageToken")
80
+ if not nextPageToken:
81
+ break
82
+
83
+
84
+ def api_page_based_generator(
85
+ fn: Callable, key: str = "issues", **kwargs
86
+ ) -> Generator[dict, None, None]:
87
+ start = kwargs.pop("start", 0)
88
+ while True:
89
+ resp = fn(start=start, **kwargs)
90
+ issues = resp.get(key, [])
91
+ if not issues:
92
+ break
93
+ for issue in issues:
94
+ yield issue
95
+ start += len(issues)
126
96
 
127
97
 
128
98
  class JiraAccessConfig(AccessConfig):
@@ -169,8 +139,28 @@ class JiraConnectionConfig(ConnectionConfig):
169
139
  def get_client(self) -> Generator["Jira", None, None]:
170
140
  from atlassian import Jira
171
141
 
142
+ class CustomJira(Jira):
143
+ """
144
+ Custom Jira class to fix the issue with the get_project_issues_count method.
145
+ This class inherits from the original Jira class and overrides the method to
146
+ handle the response correctly.
147
+ Once the issue is fixed in the original library, this class can be removed.
148
+ """
149
+
150
+ def __init__(self, *args, **kwargs):
151
+ super().__init__(*args, **kwargs)
152
+
153
+ def get_project_issues_count(self, project: str) -> int:
154
+ jql = f'project = "{project}" '
155
+ response = self.jql(jql, fields="*none")
156
+ response = cast("dict", response)
157
+ if "total" in response:
158
+ return response["total"]
159
+ else:
160
+ return len(response["issues"])
161
+
172
162
  access_configs = self.access_config.get_secret_value()
173
- with Jira(
163
+ with CustomJira(
174
164
  url=self.url,
175
165
  username=self.username,
176
166
  password=access_configs.password,
@@ -181,9 +171,17 @@ class JiraConnectionConfig(ConnectionConfig):
181
171
 
182
172
 
183
173
  class JiraIndexerConfig(IndexerConfig):
184
- projects: Optional[List[str]] = Field(None, description="List of project keys")
185
- boards: Optional[List[str]] = Field(None, description="List of board IDs")
186
- issues: Optional[List[str]] = Field(None, description="List of issue keys or IDs")
174
+ projects: Optional[list[str]] = Field(None, description="List of project keys")
175
+ boards: Optional[list[str]] = Field(None, description="List of board IDs")
176
+ issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
177
+ status_filters: Optional[list[str]] = Field(
178
+ default=None,
179
+ description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
180
+ )
181
+
182
+ def model_post_init(self, context: Any, /) -> None:
183
+ if not self.projects and not self.boards and not self.issues:
184
+ raise ValueError("At least one of projects, boards, or issues must be provided.")
187
185
 
188
186
 
189
187
  @dataclass
@@ -208,122 +206,103 @@ class JiraIndexer(Indexer):
208
206
  )
209
207
  logger.info("Connection to Jira successful.")
210
208
 
211
- def _get_issues_within_single_project(self, project_key: str) -> List[JiraIssueMetadata]:
209
+ def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
212
210
  with self.connection_config.get_client() as client:
213
- number_of_issues_to_fetch = client.get_project_issues_count(project=project_key)
214
- if isinstance(number_of_issues_to_fetch, dict):
215
- if "total" not in number_of_issues_to_fetch:
216
- raise KeyError('Response object is missing "total" key.')
217
- number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
218
- if not number_of_issues_to_fetch:
219
- logger.warning(f"No issues found in project: {project_key}. Skipping!")
220
- return []
221
- get_project_issues = issues_fetcher_wrapper(
222
- client.get_all_project_issues,
223
- results_key="issues",
224
- number_of_issues_to_fetch=number_of_issues_to_fetch,
225
- )
226
- issues = get_project_issues(project=project_key, fields=["key", "id"])
227
- logger.debug(f"Found {len(issues)} issues in project: {project_key}")
228
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
229
-
230
- def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
231
- project_keys = self.index_config.projects
232
- if not project_keys:
233
- # for when a component list is provided, without any projects
234
- if self.index_config.boards or self.index_config.issues:
235
- return []
236
- # for when no components are provided. all projects will be ingested
237
- else:
238
- with self.connection_config.get_client() as client:
239
- project_keys = [project["key"] for project in client.projects()]
240
- return [
241
- issue
242
- for project_key in project_keys
243
- for issue in self._get_issues_within_single_project(project_key)
244
- ]
211
+ fields = ["key", "id"]
212
+ jql = "project in ({})".format(", ".join(self.index_config.projects))
213
+ jql = self._update_jql(jql)
214
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
215
+ yield JiraIssueMetadata.model_validate(issue)
245
216
 
246
217
  def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
247
218
  with self.connection_config.get_client() as client:
248
- get_board_issues = issues_fetcher_wrapper(
249
- client.get_issues_for_board,
250
- results_key="issues",
251
- )
252
- issues = get_board_issues(board_id=board_id, fields=["key", "id"], jql=None)
253
- logger.debug(f"Found {len(issues)} issues in board: {board_id}")
254
- return [
255
- JiraIssueMetadata(id=issue["id"], key=issue["key"], board_id=board_id)
256
- for issue in issues
257
- ]
258
-
259
- def _get_issues_within_boards(self) -> List[JiraIssueMetadata]:
219
+ fields = ["key", "id"]
220
+ if self.index_config.status_filters:
221
+ jql = "status in ({}) ORDER BY id".format(
222
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
223
+ )
224
+ else:
225
+ jql = "ORDER BY id"
226
+ for issue in api_page_based_generator(
227
+ fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
228
+ ):
229
+ yield JiraIssueMetadata.model_validate(issue)
230
+
231
+ def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
260
232
  if not self.index_config.boards:
261
- return []
262
- return [
263
- issue
264
- for board_id in self.index_config.boards
265
- for issue in self._get_issues_within_single_board(board_id)
266
- ]
267
-
268
- def _get_issues(self) -> List[JiraIssueMetadata]:
269
- with self.connection_config.get_client() as client:
270
- issues = [
271
- client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
272
- for issue_key in self.index_config.issues or []
273
- ]
274
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
275
-
276
- def get_issues(self) -> List[JiraIssueMetadata]:
277
- issues = [
278
- *self._get_issues_within_boards(),
279
- *self._get_issues_within_projects(),
280
- *self._get_issues(),
281
- ]
282
- # Select unique issues by issue 'id'.
283
- # Since boards issues are fetched first,
284
- # if there are duplicates, the board issues will be kept,
285
- # in order to keep issue 'board_id' information.
286
- seen = set()
287
- unique_issues: List[JiraIssueMetadata] = []
288
- for issue in issues:
289
- if issue.id not in seen:
290
- unique_issues.append(issue)
291
- seen.add(issue.id)
292
- return unique_issues
233
+ yield
234
+ for board_id in self.index_config.boards:
235
+ for issue in self._get_issues_within_single_board(board_id=board_id):
236
+ yield issue
237
+
238
+ def _update_jql(self, jql: str) -> str:
239
+ if self.index_config.status_filters:
240
+ jql += " and status in ({})".format(
241
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
242
+ )
243
+ jql = jql + " ORDER BY id"
244
+ return jql
293
245
 
294
- def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
295
- from time import time
246
+ def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
247
+ with self.connection_config.get_client() as client:
248
+ fields = ["key", "id"]
249
+ jql = "key in ({})".format(", ".join(self.index_config.issues))
250
+ jql = self._update_jql(jql)
251
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
252
+ yield JiraIssueMetadata.model_validate(issue)
253
+
254
+ def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
255
+ # Build metadata
256
+ metadata = FileDataSourceMetadata(
257
+ date_processed=str(time()),
258
+ record_locator=issue.model_dump(),
259
+ )
296
260
 
297
- issues = self.get_issues()
298
- for issue in issues:
299
- # Build metadata
300
- metadata = FileDataSourceMetadata(
301
- date_processed=str(time()),
302
- record_locator=issue.to_dict(),
303
- )
261
+ # Construct relative path and filename
262
+ filename = f"{issue.id}.txt"
263
+ relative_path = str(Path(issue.get_project_id()) / filename)
304
264
 
305
- # Construct relative path and filename
306
- filename = f"{issue.id}.txt"
307
- relative_path = str(Path(issue.project_id) / filename)
265
+ source_identifiers = SourceIdentifiers(
266
+ filename=filename,
267
+ fullpath=relative_path,
268
+ rel_path=relative_path,
269
+ )
308
270
 
309
- source_identifiers = SourceIdentifiers(
310
- filename=filename,
311
- fullpath=relative_path,
312
- rel_path=relative_path,
313
- )
271
+ file_data = FileData(
272
+ identifier=issue.id,
273
+ connector_type=self.connector_type,
274
+ metadata=metadata,
275
+ additional_metadata=issue.model_dump(),
276
+ source_identifiers=source_identifiers,
277
+ )
278
+ return file_data
279
+
280
+ def get_generators(self) -> List[Callable]:
281
+ generators = []
282
+ if self.index_config.boards:
283
+ generators.append(self._get_issues_within_boards)
284
+ if self.index_config.issues:
285
+ generators.append(self._get_issues_by_keys)
286
+ if self.index_config.projects:
287
+ generators.append(self._get_issues_within_projects)
288
+ return generators
314
289
 
315
- file_data = FileData(
316
- identifier=issue.id,
317
- connector_type=self.connector_type,
318
- metadata=metadata,
319
- additional_metadata=issue.to_dict(),
320
- source_identifiers=source_identifiers,
321
- )
322
- yield file_data
290
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
291
+ seen_keys = []
292
+ for gen in self.get_generators():
293
+ for issue in gen():
294
+ if not issue:
295
+ continue
296
+ if issue.key in seen_keys:
297
+ continue
298
+ seen_keys.append(issue.key)
299
+ yield self._create_file_data_from_issue(issue=issue)
323
300
 
324
301
 
325
302
  class JiraDownloaderConfig(DownloaderConfig):
326
- pass
303
+ download_attachments: bool = Field(
304
+ default=False, description="If True, will download any attachments and process as well"
305
+ )
327
306
 
328
307
 
329
308
  @dataclass
@@ -428,7 +407,56 @@ class JiraDownloader(Downloader):
428
407
  logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
429
408
  raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
430
409
 
431
- def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
410
+ def generate_attachment_file_data(
411
+ self, attachment_dict: dict, parent_filedata: FileData
412
+ ) -> FileData:
413
+ new_filedata = parent_filedata.model_copy(deep=True)
414
+ if new_filedata.metadata.record_locator is None:
415
+ new_filedata.metadata.record_locator = {}
416
+ new_filedata.metadata.record_locator["parent_issue"] = (
417
+ parent_filedata.metadata.record_locator["id"]
418
+ )
419
+ # Append an identifier for attachment to not conflict with issue ids
420
+ new_filedata.identifier = "{}a".format(attachment_dict["id"])
421
+ filename = attachment_dict["filename"]
422
+ new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
423
+ new_filedata.metadata.date_created = attachment_dict.pop("created", None)
424
+ new_filedata.metadata.url = attachment_dict.pop("self", None)
425
+ new_filedata.metadata.record_locator = attachment_dict
426
+ new_filedata.source_identifiers = SourceIdentifiers(
427
+ filename=filename,
428
+ fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
429
+ )
430
+ return new_filedata
431
+
432
+ def process_attachments(
433
+ self, file_data: FileData, attachments: list[dict]
434
+ ) -> list[DownloadResponse]:
435
+ with self.connection_config.get_client() as client:
436
+ download_path = self.get_download_path(file_data)
437
+ attachment_download_dir = download_path.parent / "attachments"
438
+ attachment_download_dir.mkdir(parents=True, exist_ok=True)
439
+ download_responses = []
440
+ for attachment in attachments:
441
+ attachment_filename = Path(attachment["filename"])
442
+ attachment_id = attachment["id"]
443
+ attachment_download_path = attachment_download_dir / Path(
444
+ attachment_id
445
+ ).with_suffix(attachment_filename.suffix)
446
+ resp = client.get_attachment_content(attachment_id=attachment_id)
447
+ with open(attachment_download_path, "wb") as f:
448
+ f.write(resp)
449
+ attachment_filedata = self.generate_attachment_file_data(
450
+ attachment_dict=attachment, parent_filedata=file_data
451
+ )
452
+ download_responses.append(
453
+ self.generate_download_response(
454
+ file_data=attachment_filedata, download_path=attachment_download_path
455
+ )
456
+ )
457
+ return download_responses
458
+
459
+ def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
432
460
  issue_key = file_data.additional_metadata.get("key")
433
461
  if not issue_key:
434
462
  raise ValueError("Issue key not found in metadata.")
@@ -443,7 +471,17 @@ class JiraDownloader(Downloader):
443
471
  with open(download_path, "w") as f:
444
472
  f.write(issue_str)
445
473
  self.update_file_data(file_data, issue)
446
- return self.generate_download_response(file_data=file_data, download_path=download_path)
474
+ download_response = self.generate_download_response(
475
+ file_data=file_data, download_path=download_path
476
+ )
477
+ if self.download_config.download_attachments and (
478
+ attachments := issue.get("fields", {}).get("attachment")
479
+ ):
480
+ attachment_responses = self.process_attachments(
481
+ file_data=file_data, attachments=attachments
482
+ )
483
+ download_response = [download_response] + attachment_responses
484
+ return download_response
447
485
 
448
486
 
449
487
  jira_source_entry = SourceRegistryEntry(
@@ -1,5 +1,6 @@
1
1
  from typing import Dict
2
2
 
3
+ from unstructured_ingest.logger import logger
3
4
  from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
4
5
 
5
6
  from .checkbox import Checkbox, CheckboxCell
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
25
26
  from .url import URL, URLCell
26
27
  from .verification import Verification, VerificationCell
27
28
 
29
+ # It's possible to add 'button' property to Notion database.
30
+ # However, current Notion API documentation doesn't mention it.
31
+ # Buttons are only functional inside Notion UI. We can simply
32
+ # ignore them so that the we don't throw an error when trying
33
+ # to map 'button' properties.
34
+ unsupported_db_prop_types = ["button"]
35
+
28
36
  db_prop_type_mapping = {
29
37
  "checkbox": Checkbox,
30
38
  "created_by": CreatedBy,
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
55
63
  mapped_dict = {}
56
64
  for k, v in props.items():
57
65
  try:
58
- mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore
66
+ property_type = v["type"]
67
+ if property_type in unsupported_db_prop_types:
68
+ logger.warning(
69
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
70
+ )
71
+ continue
72
+ mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
59
73
  except KeyError as ke:
60
74
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
61
75
 
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
92
106
  mapped_dict = {}
93
107
  for k, v in props.items():
94
108
  try:
95
- t = v["type"]
96
- mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore
109
+ property_type = v["type"]
110
+ if property_type in unsupported_db_prop_types:
111
+ logger.warning(
112
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
113
+ )
114
+ continue
115
+ mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
97
116
  except KeyError as ke:
98
117
  raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
99
118
 
@@ -13,6 +13,7 @@ class Checkbox(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "checkbox"
16
+ description: Optional[str] = None
16
17
  checkbox: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -13,6 +13,7 @@ class CreatedBy(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "created_by"
16
+ description: Optional[str] = None
16
17
  created_by: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class CreatedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "created_time"
15
+ description: Optional[str] = None
15
16
  created_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Date(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "date"
16
+ description: Optional[str] = None
16
17
  date: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -12,6 +12,7 @@ class Email(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "email"
15
+ description: Optional[str] = None
15
16
  email: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -13,6 +13,7 @@ class Files(DBPropertyBase):
13
13
  id: str
14
14
  name: str
15
15
  type: str = "files"
16
+ description: Optional[str] = None
16
17
  files: dict = field(default_factory=dict)
17
18
 
18
19
  @classmethod
@@ -26,6 +26,7 @@ class Formula(DBPropertyBase):
26
26
  name: str
27
27
  formula: FormulaProp
28
28
  type: str = "formula"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -23,7 +23,7 @@ class LastEditedByCell(DBCellBase):
23
23
  id: str
24
24
  last_edited_by: People
25
25
  type: str = "last_edited_by"
26
-
26
+ description: Optional[str] = None
27
27
  name: Optional[str] = None
28
28
 
29
29
  @classmethod
@@ -12,6 +12,7 @@ class LastEditedTime(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "last_edited_time"
15
+ description: Optional[str] = None
15
16
  last_edited_time: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -38,6 +38,7 @@ class MultiSelect(DBPropertyBase):
38
38
  name: str
39
39
  multi_select: MultiSelectProp
40
40
  type: str = "multi_select"
41
+ description: Optional[str] = None
41
42
 
42
43
  @classmethod
43
44
  def from_dict(cls, data: dict):
@@ -26,6 +26,7 @@ class Number(DBPropertyBase):
26
26
  name: str
27
27
  number: NumberProp
28
28
  type: str = "number"
29
+ description: Optional[str] = None
29
30
 
30
31
  @classmethod
31
32
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class People(DBPropertyBase):
14
14
  name: str
15
15
  description: Optional[str] = None
16
16
  type: str = "people"
17
+ description: Optional[str] = None
17
18
  people: dict = field(default_factory=dict)
18
19
 
19
20
  @classmethod
@@ -12,6 +12,7 @@ class PhoneNumber(DBPropertyBase):
12
12
  id: str
13
13
  name: str
14
14
  type: str = "phone_number"
15
+ description: Optional[str] = None
15
16
  phone_number: dict = field(default_factory=dict)
16
17
 
17
18
  @classmethod
@@ -45,6 +45,7 @@ class Relation(DBPropertyBase):
45
45
  name: str
46
46
  relation: RelationProp
47
47
  type: str = "relation"
48
+ description: Optional[str] = None
48
49
 
49
50
  @classmethod
50
51
  def from_dict(cls, data: dict):
@@ -15,6 +15,7 @@ class RichText(DBPropertyBase):
15
15
  id: str
16
16
  name: str
17
17
  type: str = "rich_text"
18
+ description: Optional[str] = None
18
19
  rich_text: dict = field(default_factory=dict)
19
20
 
20
21
  @classmethod
@@ -30,6 +30,7 @@ class Rollup(DBPropertyBase):
30
30
  name: str
31
31
  rollup: RollupProp
32
32
  type: str = "rollup"
33
+ description: Optional[str] = None
33
34
 
34
35
  @classmethod
35
36
  def from_dict(cls, data: dict):
@@ -38,8 +38,8 @@ class Select(DBPropertyBase):
38
38
  id: str
39
39
  name: str
40
40
  select: SelectProp
41
- description: Optional[str] = None
42
41
  type: str = "select"
42
+ description: Optional[str] = None
43
43
 
44
44
  @classmethod
45
45
  def from_dict(cls, data: dict):
@@ -55,6 +55,7 @@ class Status(DBPropertyBase):
55
55
  name: str
56
56
  status: StatusProp
57
57
  type: str = "status"
58
+ description: Optional[str] = None
58
59
 
59
60
  @classmethod
60
61
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class Title(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "title"
16
16
  title: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -17,6 +17,7 @@ class UniqueID(DBPropertyBase):
17
17
  name: str
18
18
  type: str = "unique_id"
19
19
  unique_id: dict = field(default_factory=dict)
20
+ description: Optional[str] = None
20
21
 
21
22
  @classmethod
22
23
  def from_dict(cls, data: dict):
@@ -14,6 +14,7 @@ class URL(DBPropertyBase):
14
14
  name: str
15
15
  type: str = "url"
16
16
  url: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
17
18
 
18
19
  @classmethod
19
20
  def from_dict(cls, data: dict):
@@ -20,6 +20,7 @@ class Verification(DBPropertyBase):
20
20
  name: str
21
21
  type: str = "verification"
22
22
  verification: dict = field(default_factory=dict)
23
+ description: Optional[str] = None
23
24
 
24
25
  @classmethod
25
26
  def from_dict(cls, data: dict):
@@ -234,15 +234,32 @@ class WeaviateUploader(VectorDBUploader, ABC):
234
234
  self.create_destination(**kwargs)
235
235
 
236
236
  def format_destination_name(self, destination_name: str) -> str:
237
- # Weaviate naming requirements:
238
- # must be alphanumeric and underscores only
237
+ """
238
+ Weaviate Collection naming conventions:
239
+ 1. must begin with an uppercase letter
240
+ 2. must be alphanumeric and underscores only
241
+ """
242
+
243
+ # Check if the first character is an uppercase letter
244
+ if not re.match(r"^[a-zA-Z]", destination_name):
245
+ raise ValueError("Collection name must start with an uppercase letter")
246
+ # Replace all non-alphanumeric characters with underscores
239
247
  formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
240
- # must begin with capital letter
241
- return formatted.capitalize()
248
+ # Make the first character uppercase and leave the rest as is
249
+ if len(formatted) == 1:
250
+ formatted = formatted.capitalize()
251
+ else:
252
+ formatted = formatted[0].capitalize() + formatted[1:]
253
+ if formatted != destination_name:
254
+ logger.warning(
255
+ f"Given Collection name '{destination_name}' doesn't follow naming conventions. "
256
+ f"Renaming to '{formatted}'"
257
+ )
258
+ return formatted
242
259
 
243
260
  def create_destination(
244
261
  self,
245
- destination_name: str = "unstructuredautocreated",
262
+ destination_name: str = "Unstructuredautocreated",
246
263
  vector_length: Optional[int] = None,
247
264
  **kwargs: Any,
248
265
  ) -> bool:
@@ -250,18 +267,18 @@ class WeaviateUploader(VectorDBUploader, ABC):
250
267
  collection_name = self.format_destination_name(collection_name)
251
268
  self.upload_config.collection = collection_name
252
269
 
253
- connectors_dir = Path(__file__).parents[1]
254
- collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
255
- with collection_config_file.open() as f:
256
- collection_config = json.load(f)
257
- collection_config["class"] = collection_name
258
-
259
270
  if not self._collection_exists():
260
- logger.info(f"creating weaviate collection '{collection_name}' with default configs")
271
+ connectors_dir = Path(__file__).parents[1]
272
+ collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
273
+ with collection_config_file.open() as f:
274
+ collection_config = json.load(f)
275
+ collection_config["class"] = collection_name
276
+
277
+ logger.info(f"Creating weaviate collection '{collection_name}' with default configs")
261
278
  with self.connection_config.get_client() as weaviate_client:
262
279
  weaviate_client.collections.create_from_dict(config=collection_config)
263
280
  return True
264
- logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
281
+ logger.debug(f"Collection with name '{collection_name}' already exists, skipping creation")
265
282
  return False
266
283
 
267
284
  def check_for_errors(self, client: "WeaviateClient") -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.19
3
+ Version: 1.0.23
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
114
114
  Provides-Extra: milvus
115
115
  Requires-Dist: pymilvus; extra == 'milvus'
116
116
  Provides-Extra: mixedbreadai
117
- Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
117
+ Requires-Dist: mixedbread; extra == 'mixedbreadai'
118
118
  Provides-Extra: mongodb
119
119
  Requires-Dist: pymongo; extra == 'mongodb'
120
120
  Provides-Extra: msg
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=QEY4I6tpDtP0kidFO1nzGaJrkm23PnuMCi1-QfQdUBQ,43
2
+ unstructured_ingest/__version__.py,sha256=xbdPxvOGZJUW_s_LZYTaPijNvLNKSjZuHlwNDGHpDjE,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -26,7 +26,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9
26
26
  unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
27
27
  unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
28
28
  unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
29
- unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
29
+ unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
30
30
  unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
31
31
  unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
32
32
  unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
73
73
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
74
74
  unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
75
75
  unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
76
- unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
76
+ unstructured_ingest/processes/connectors/jira.py,sha256=Hw07c2HT2vA2l2wpoYWXPNtLbnWreXCIRimAxm0Gfpw,19055
77
77
  unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
78
78
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
79
79
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
@@ -174,29 +174,29 @@ unstructured_ingest/processes/connectors/notion/types/blocks/todo.py,sha256=Kiga
174
174
  unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py,sha256=6ae_eR3SOfUgTw-XO_F3JRBaczSp8UZfLBFMRMO5NHo,1188
175
175
  unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py,sha256=q_p9XH8sQB8xwFqi9yEl6Fvur3fTLdeVdQCh0gSju58,442
176
176
  unstructured_ingest/processes/connectors/notion/types/blocks/video.py,sha256=XK-O7XPs5ejTUWrg2FTLvbOZajs-yDtVhR79HSEcxvo,779
177
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=iUdtLGlHe52daXBWVlGghXcGSxCOCDiFASsuKb4_UAM,3225
178
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=1GiebXsYYoQmM1GZJBrzv9dnM9P9dtQJ-dwbQDo6PdI,1010
179
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=NvOuM_1SVBdn-6acYxKJ1ThMuWJ935aYtaKV1TOBTFQ,949
180
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=HPf6Dp7wWWRQ-j7AGFWMm-wkakuJ4R8_rfz9RPNZuec,834
181
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=5nYyh8S9HzFyZb2bZZttSZj8CE0Q2dc73cA27m-H1k4,1067
182
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=0powD6-ZftD0WSHxjTHMQ388RK-WfWLaYuEJIT19mdM,831
183
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=1-Jnx1YyWptNW3737oOzr7WGItS7aNj68BYaWyBRljo,1020
184
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=LV9d6NRLGu7eIr6wpfnkLxgQug107LGAFC1Z9ooc3xw,1069
185
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=1TMFWA5GIb6pJbYAF_Q2ZPOWNa9y0p8ELk6UzLMZ2FY,926
186
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=MvZgXTBx1uS-vn9vITFCIRPFnV26_rcao1YhBiZu5bU,864
187
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=WnEJyDmEaI39ETopeoXwFp6Iog-rF2TYWG3o-DRv2ic,1917
188
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=LbrTryg5z8Q1dLekxwXi_qCvOwH6n_LD4iVYsVBNGB4,1056
189
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=LXNbhBo53Lf7jM_bLYgarwLao7ymEJjZhJ-U5xMBbLc,1184
190
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=NoIVkd31mvvZYa0DOkE4JibJdhJjIEL8C78jNxcxtVo,909
191
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=e4zQmjtP2w1RNfKDaWu2-AOhwjM_QP_TrjyxAQUnpN4,1528
192
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=EeWvzLx9zYT3crH_eshAOf1L60k5gMqJUqQGiI-is7c,1171
193
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=xyaA2yIvDfe-tu_TXi6ZjCz2jCawFS2DVKvfSr-nxjQ,1277
194
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=Qy-t9kmJq5cZKohxNX2DbvIftX95CvDX6JMDVWzVYl4,1794
195
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=GneqbmGaBqOecoApH6I4YWf5flvuVk2m5wMJrRw7Nvo,2044
196
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=4Tnfxxx_Nr-Wkwd7urotp1xTGO9itWe8qI3OnfooLrE,1023
197
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=i95msSj2-l3yG8tw02dIChADDEy695MWmNWXJq0jGD4,1173
198
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=azWD7VwkYlI4vFYgsuMvr2618skr4MCtagSTuJ509bg,873
199
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=bge_vjWUYh9-CJ_gmVg7ESVLAxHeq_wicp-6vlqNRwQ,2367
177
+ unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=6kUXmCI58R1e50b1U-_xqrrPw3g2Mqtbt02aC7DVAxw,4118
178
+ unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=uS2B4nQ-ISt8QGxw7nNwst8MX5xRTecSvqokZ23DKyA,1048
179
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=RcpjFijEwyuGrPhSjrXT1nxaLoX2mnCvjveZ0f5Ke3c,987
180
+ unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=eJjkK1nKb0-Ohi4lpCplbrUTkCgf4D2gWbFxEhDI_G8,872
181
+ unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=DECAkkhR6qQ-WKsOzQf2VPdYGcyrnAJNk4y4JHDVDuc,1105
182
+ unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=yOHIV_fpF9xzqcrkRIC4cF_aC8C7RsJJvRtEgSn30a8,869
183
+ unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=5_6FpFWoKNriaBRLtNDRUxu1ZO1UTvAFeu4H55VNY68,1058
184
+ unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=zcTeVuXpmuMNh4FHJHW5zgKWAqo0Wx7s9UsSEvA_wR8,1107
185
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=WpWlXz9AwS1rugpvoDoVOo055dVEAt3XmvudD17HJu8,963
186
+ unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=kvmMmEXj3WndR9BG9MHwuM40luA4XhGfnF6rKDpYiF0,902
187
+ unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=LSDfUXpgUox7Z77_TKIlKHqYPUgO8Y06lVgvju6NXx8,1955
188
+ unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=y4ocq8_yX_yKkAtM3qcqIueM9y96-47gshM2mra_tgw,1094
189
+ unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=HBTbqw1L1h8XHKuuS0e0aoz0dAZXSLDy7zRwM1_rRps,1222
190
+ unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=mBEcwNLCI-FLU6t2FqR_tNTvrJFIQ7hqYeTB51HavBc,947
191
+ unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=fYYyzBhi1fmXrhdxu6W6uMr2e6HaDCfrvY7yZIFvgmM,1566
192
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=D2y-98U7MP-YmsrAPeT4vqG3m7HB4zoOzMMhhYN8VHY,1209
193
+ unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=r8UXCW7Y-eE5W6RpXiyyMszCMRDtiwBmYOmYHZ_9-VY,1315
194
+ unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=-UAIuddoyKol45epuOYNlS8dchuwL0wMGwash4BwuH4,1794
195
+ unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=kUFZsWGQZAApEbs5qI37t8LPN0vUM5vcu4pPbEvIGkE,2082
196
+ unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=yd1vPbCBgGIbtUsC3zOu3-Cdpcst0dEkuFVdtS97hxA,1061
197
+ unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=e8WslwVD6ccf4x_3NihX4BWtH7y4zMAFH7Ur4jS3dH8,1211
198
+ unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=Bxu8x7mmH28l2nQSaAmygal8dZdUdHEFbUYIk75B0iQ,911
199
+ unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=B9J4zXHVJzzIcn_QD04Z9eibEij1ornw5RhZ2qmUdDU,2405
200
200
  unstructured_ingest/processes/connectors/qdrant/__init__.py,sha256=7WN_3M3qQ0O7pUJSXIKtPqAvKX2tQ_WxClCHbFeqPfc,757
201
201
  unstructured_ingest/processes/connectors/qdrant/cloud.py,sha256=H5Plp2xqFheESLertj56o78CL4exyCQhBDE1TGAzcWU,1618
202
202
  unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRiIHMPcctKyVBdsaLi8KXloAwq76o,1582
@@ -214,7 +214,7 @@ unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3
214
214
  unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
215
215
  unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
216
216
  unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaSNqVlKROm-S3Ql3naLmKvigLBgUQdw,2195
217
- unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
217
+ unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=yB67gxvo3X0UaP_mNeB0HbSWXst7ur0E2QKwLA0gIS4,13647
218
218
  unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
219
  unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
220
220
  unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.19.dist-info/METADATA,sha256=KYCpNnFQGIb6yuOkgP9qKKvLxkQ0Mw2qdWz_I124nYM,8694
235
- unstructured_ingest-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.19.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.19.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.19.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.23.dist-info/METADATA,sha256=b0LZ3XzhlhUgDZd4mEUPxxhOT-lqKAOnDfiQeJhCgoA,8691
235
+ unstructured_ingest-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.23.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.23.dist-info/RECORD,,