unstructured-ingest 1.0.21__py3-none-any.whl → 1.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/mixedbreadai.py +28 -45
- unstructured_ingest/processes/connectors/jira.py +197 -191
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
- {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/METADATA +2 -2
- {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/RECORD +31 -31
- {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.24" # pragma: no cover
|
|
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
23
|
-
from mixedbread_ai.core import RequestOptions
|
|
22
|
+
from mixedbread import AsyncMixedbread, Mixedbread
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
|
44
43
|
)
|
|
45
44
|
|
|
46
45
|
@requires_dependencies(
|
|
47
|
-
["
|
|
48
|
-
extras="mixedbreadai",
|
|
46
|
+
["mixedbread"],
|
|
47
|
+
extras="embed-mixedbreadai",
|
|
49
48
|
)
|
|
50
|
-
def get_client(self) -> "
|
|
49
|
+
def get_client(self) -> "Mixedbread":
|
|
51
50
|
"""
|
|
52
51
|
Create the Mixedbread AI client.
|
|
53
52
|
|
|
54
53
|
Returns:
|
|
55
|
-
|
|
54
|
+
Mixedbread: Initialized client.
|
|
56
55
|
"""
|
|
57
|
-
from
|
|
56
|
+
from mixedbread import Mixedbread
|
|
58
57
|
|
|
59
|
-
return
|
|
58
|
+
return Mixedbread(
|
|
60
59
|
api_key=self.api_key.get_secret_value(),
|
|
60
|
+
max_retries=MAX_RETRIES,
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
@requires_dependencies(
|
|
64
|
-
["
|
|
65
|
-
extras="mixedbreadai",
|
|
64
|
+
["mixedbread"],
|
|
65
|
+
extras="embed-mixedbreadai",
|
|
66
66
|
)
|
|
67
|
-
def get_async_client(self) -> "
|
|
68
|
-
from
|
|
67
|
+
def get_async_client(self) -> "AsyncMixedbread":
|
|
68
|
+
from mixedbread import AsyncMixedbread
|
|
69
69
|
|
|
70
|
-
return
|
|
70
|
+
return AsyncMixedbread(
|
|
71
71
|
api_key=self.api_key.get_secret_value(),
|
|
72
|
+
max_retries=MAX_RETRIES,
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
|
|
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
88
89
|
return self.embed_query(query="Q")
|
|
89
90
|
|
|
90
91
|
@requires_dependencies(
|
|
91
|
-
["
|
|
92
|
+
["mixedbread"],
|
|
92
93
|
extras="embed-mixedbreadai",
|
|
93
94
|
)
|
|
94
|
-
def
|
|
95
|
-
from mixedbread_ai.core import RequestOptions
|
|
96
|
-
|
|
97
|
-
return RequestOptions(
|
|
98
|
-
max_retries=MAX_RETRIES,
|
|
99
|
-
timeout_in_seconds=TIMEOUT,
|
|
100
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def get_client(self) -> "MixedbreadAI":
|
|
95
|
+
def get_client(self) -> "Mixedbread":
|
|
104
96
|
return self.config.get_client()
|
|
105
97
|
|
|
106
|
-
def embed_batch(self, client: "
|
|
107
|
-
response = client.
|
|
98
|
+
def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
|
|
99
|
+
response = client.embed(
|
|
108
100
|
model=self.config.embedder_model_name,
|
|
101
|
+
input=batch,
|
|
109
102
|
normalized=True,
|
|
110
103
|
encoding_format=ENCODING_FORMAT,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
input=batch,
|
|
104
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
105
|
+
timeout=TIMEOUT,
|
|
114
106
|
)
|
|
115
107
|
return [datum.embedding for datum in response.data]
|
|
116
108
|
|
|
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
|
124
116
|
return await self.embed_query(query="Q")
|
|
125
117
|
|
|
126
118
|
@requires_dependencies(
|
|
127
|
-
["
|
|
119
|
+
["mixedbread"],
|
|
128
120
|
extras="embed-mixedbreadai",
|
|
129
121
|
)
|
|
130
|
-
def
|
|
131
|
-
from mixedbread_ai.core import RequestOptions
|
|
132
|
-
|
|
133
|
-
return RequestOptions(
|
|
134
|
-
max_retries=MAX_RETRIES,
|
|
135
|
-
timeout_in_seconds=TIMEOUT,
|
|
136
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def get_client(self) -> "AsyncMixedbreadAI":
|
|
122
|
+
def get_client(self) -> "AsyncMixedbread":
|
|
140
123
|
return self.config.get_async_client()
|
|
141
124
|
|
|
142
|
-
async def embed_batch(self, client: "
|
|
143
|
-
response = await client.
|
|
125
|
+
async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
|
|
126
|
+
response = await client.embed(
|
|
144
127
|
model=self.config.embedder_model_name,
|
|
128
|
+
input=batch,
|
|
145
129
|
normalized=True,
|
|
146
130
|
encoding_format=ENCODING_FORMAT,
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
input=batch,
|
|
131
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
132
|
+
timeout=TIMEOUT,
|
|
150
133
|
)
|
|
151
134
|
return [datum.embedding for datum in response.data]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import math
|
|
2
1
|
from collections import abc
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from
|
|
5
|
+
from time import time
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union
|
|
7
7
|
|
|
8
|
-
from pydantic import Field, Secret
|
|
8
|
+
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.data_types.file_data import (
|
|
11
11
|
FileData,
|
|
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
|
|
|
21
21
|
DownloadResponse,
|
|
22
22
|
Indexer,
|
|
23
23
|
IndexerConfig,
|
|
24
|
+
download_responses,
|
|
24
25
|
)
|
|
25
26
|
from unstructured_ingest.logger import logger
|
|
26
27
|
from unstructured_ingest.processes.connector_registry import (
|
|
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
|
|
|
37
38
|
DEFAULT_R_SEP = "\n"
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
class JiraIssueMetadata:
|
|
41
|
+
class JiraIssueMetadata(BaseModel):
|
|
42
42
|
id: str
|
|
43
43
|
key: str
|
|
44
|
-
board_id: Optional[str] = None
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
def project_id(self) -> str:
|
|
45
|
+
def get_project_id(self) -> str:
|
|
48
46
|
return self.key.split("-")[0]
|
|
49
47
|
|
|
50
|
-
def to_dict(self) -> Dict[str, Union[str, None]]:
|
|
51
|
-
return {
|
|
52
|
-
"id": self.id,
|
|
53
|
-
"key": self.key,
|
|
54
|
-
"board_id": self.board_id,
|
|
55
|
-
"project_id": self.project_id,
|
|
56
|
-
}
|
|
57
|
-
|
|
58
48
|
|
|
59
49
|
class FieldGetter(dict):
|
|
60
50
|
def __getitem__(self, key):
|
|
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
|
|
|
77
67
|
return obj
|
|
78
68
|
|
|
79
69
|
|
|
80
|
-
def
|
|
81
|
-
""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
|
|
107
|
-
|
|
108
|
-
for _ in range(num_iterations):
|
|
109
|
-
response = func(*args, **kwargs)
|
|
110
|
-
if isinstance(response, list):
|
|
111
|
-
all_results += response
|
|
112
|
-
elif isinstance(response, dict):
|
|
113
|
-
if results_key not in response:
|
|
114
|
-
raise KeyError(f'Response object is missing "{results_key}" key.')
|
|
115
|
-
all_results += response[results_key]
|
|
116
|
-
else:
|
|
117
|
-
raise TypeError(
|
|
118
|
-
f"""Unexpected response type from Jira API.
|
|
119
|
-
Response type has to be either list or dict, got: {type(response).__name__}."""
|
|
120
|
-
)
|
|
121
|
-
kwargs["start"] += kwargs["limit"]
|
|
122
|
-
|
|
123
|
-
return all_results
|
|
124
|
-
|
|
125
|
-
return wrapper
|
|
70
|
+
def api_token_based_generator(
|
|
71
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
72
|
+
) -> Generator[dict, None, None]:
|
|
73
|
+
nextPageToken = kwargs.pop("nextPageToken", None)
|
|
74
|
+
while True:
|
|
75
|
+
resp = fn(nextPageToken=nextPageToken, **kwargs)
|
|
76
|
+
issues = resp.get(key, [])
|
|
77
|
+
for issue in issues:
|
|
78
|
+
yield issue
|
|
79
|
+
nextPageToken = resp.get("nextPageToken")
|
|
80
|
+
if not nextPageToken:
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def api_page_based_generator(
|
|
85
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
86
|
+
) -> Generator[dict, None, None]:
|
|
87
|
+
start = kwargs.pop("start", 0)
|
|
88
|
+
while True:
|
|
89
|
+
resp = fn(start=start, **kwargs)
|
|
90
|
+
issues = resp.get(key, [])
|
|
91
|
+
if not issues:
|
|
92
|
+
break
|
|
93
|
+
for issue in issues:
|
|
94
|
+
yield issue
|
|
95
|
+
start += len(issues)
|
|
126
96
|
|
|
127
97
|
|
|
128
98
|
class JiraAccessConfig(AccessConfig):
|
|
@@ -169,28 +139,8 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
169
139
|
def get_client(self) -> Generator["Jira", None, None]:
|
|
170
140
|
from atlassian import Jira
|
|
171
141
|
|
|
172
|
-
class CustomJira(Jira):
|
|
173
|
-
"""
|
|
174
|
-
Custom Jira class to fix the issue with the get_project_issues_count method.
|
|
175
|
-
This class inherits from the original Jira class and overrides the method to
|
|
176
|
-
handle the response correctly.
|
|
177
|
-
Once the issue is fixed in the original library, this class can be removed.
|
|
178
|
-
"""
|
|
179
|
-
|
|
180
|
-
def __init__(self, *args, **kwargs):
|
|
181
|
-
super().__init__(*args, **kwargs)
|
|
182
|
-
|
|
183
|
-
def get_project_issues_count(self, project: str) -> int:
|
|
184
|
-
jql = f'project = "{project}" '
|
|
185
|
-
response = self.jql(jql, fields="*none")
|
|
186
|
-
response = cast("dict", response)
|
|
187
|
-
if "total" in response:
|
|
188
|
-
return response["total"]
|
|
189
|
-
else:
|
|
190
|
-
return len(response["issues"])
|
|
191
|
-
|
|
192
142
|
access_configs = self.access_config.get_secret_value()
|
|
193
|
-
with
|
|
143
|
+
with Jira(
|
|
194
144
|
url=self.url,
|
|
195
145
|
username=self.username,
|
|
196
146
|
password=access_configs.password,
|
|
@@ -201,9 +151,17 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
201
151
|
|
|
202
152
|
|
|
203
153
|
class JiraIndexerConfig(IndexerConfig):
|
|
204
|
-
projects: Optional[
|
|
205
|
-
boards: Optional[
|
|
206
|
-
issues: Optional[
|
|
154
|
+
projects: Optional[list[str]] = Field(None, description="List of project keys")
|
|
155
|
+
boards: Optional[list[str]] = Field(None, description="List of board IDs")
|
|
156
|
+
issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
|
|
157
|
+
status_filters: Optional[list[str]] = Field(
|
|
158
|
+
default=None,
|
|
159
|
+
description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def model_post_init(self, context: Any, /) -> None:
|
|
163
|
+
if not self.projects and not self.boards and not self.issues:
|
|
164
|
+
raise ValueError("At least one of projects, boards, or issues must be provided.")
|
|
207
165
|
|
|
208
166
|
|
|
209
167
|
@dataclass
|
|
@@ -228,122 +186,111 @@ class JiraIndexer(Indexer):
|
|
|
228
186
|
)
|
|
229
187
|
logger.info("Connection to Jira successful.")
|
|
230
188
|
|
|
231
|
-
def
|
|
189
|
+
def run_jql(self, jql: str, **kwargs) -> Generator[JiraIssueMetadata, None, None]:
|
|
232
190
|
with self.connection_config.get_client() as client:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
raise KeyError('Response object is missing "total" key.')
|
|
237
|
-
number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
|
|
238
|
-
if not number_of_issues_to_fetch:
|
|
239
|
-
logger.warning(f"No issues found in project: {project_key}. Skipping!")
|
|
240
|
-
return []
|
|
241
|
-
get_project_issues = issues_fetcher_wrapper(
|
|
242
|
-
client.get_all_project_issues,
|
|
243
|
-
results_key="issues",
|
|
244
|
-
number_of_issues_to_fetch=number_of_issues_to_fetch,
|
|
245
|
-
)
|
|
246
|
-
issues = get_project_issues(project=project_key, fields=["key", "id"])
|
|
247
|
-
logger.debug(f"Found {len(issues)} issues in project: {project_key}")
|
|
248
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
249
|
-
|
|
250
|
-
def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
|
|
251
|
-
project_keys = self.index_config.projects
|
|
252
|
-
if not project_keys:
|
|
253
|
-
# for when a component list is provided, without any projects
|
|
254
|
-
if self.index_config.boards or self.index_config.issues:
|
|
255
|
-
return []
|
|
256
|
-
# for when no components are provided. all projects will be ingested
|
|
191
|
+
if client.cloud:
|
|
192
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, **kwargs):
|
|
193
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
257
194
|
else:
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
195
|
+
for issue in api_page_based_generator(client.jql, jql=jql, **kwargs):
|
|
196
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
197
|
+
|
|
198
|
+
def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
199
|
+
fields = ["key", "id", "status"]
|
|
200
|
+
jql = "project in ({})".format(", ".join(self.index_config.projects))
|
|
201
|
+
jql = self._update_jql(jql)
|
|
202
|
+
logger.debug(f"running jql: {jql}")
|
|
203
|
+
return self.run_jql(jql=jql, fields=fields)
|
|
265
204
|
|
|
266
205
|
def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
|
|
267
206
|
with self.connection_config.get_client() as client:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
207
|
+
fields = ["key", "id"]
|
|
208
|
+
if self.index_config.status_filters:
|
|
209
|
+
jql = "status in ({}) ORDER BY id".format(
|
|
210
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
211
|
+
)
|
|
212
|
+
else:
|
|
213
|
+
jql = "ORDER BY id"
|
|
214
|
+
logger.debug(f"running jql for board {board_id}: {jql}")
|
|
215
|
+
for issue in api_page_based_generator(
|
|
216
|
+
fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
|
|
217
|
+
):
|
|
218
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
219
|
+
|
|
220
|
+
def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
280
221
|
if not self.index_config.boards:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
issue
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
issues = [
|
|
291
|
-
client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
|
|
292
|
-
for issue_key in self.index_config.issues or []
|
|
293
|
-
]
|
|
294
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
295
|
-
|
|
296
|
-
def get_issues(self) -> List[JiraIssueMetadata]:
|
|
297
|
-
issues = [
|
|
298
|
-
*self._get_issues_within_boards(),
|
|
299
|
-
*self._get_issues_within_projects(),
|
|
300
|
-
*self._get_issues(),
|
|
301
|
-
]
|
|
302
|
-
# Select unique issues by issue 'id'.
|
|
303
|
-
# Since boards issues are fetched first,
|
|
304
|
-
# if there are duplicates, the board issues will be kept,
|
|
305
|
-
# in order to keep issue 'board_id' information.
|
|
306
|
-
seen = set()
|
|
307
|
-
unique_issues: List[JiraIssueMetadata] = []
|
|
308
|
-
for issue in issues:
|
|
309
|
-
if issue.id not in seen:
|
|
310
|
-
unique_issues.append(issue)
|
|
311
|
-
seen.add(issue.id)
|
|
312
|
-
return unique_issues
|
|
313
|
-
|
|
314
|
-
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
315
|
-
from time import time
|
|
316
|
-
|
|
317
|
-
issues = self.get_issues()
|
|
318
|
-
for issue in issues:
|
|
319
|
-
# Build metadata
|
|
320
|
-
metadata = FileDataSourceMetadata(
|
|
321
|
-
date_processed=str(time()),
|
|
322
|
-
record_locator=issue.to_dict(),
|
|
222
|
+
yield
|
|
223
|
+
for board_id in self.index_config.boards:
|
|
224
|
+
for issue in self._get_issues_within_single_board(board_id=board_id):
|
|
225
|
+
yield issue
|
|
226
|
+
|
|
227
|
+
def _update_jql(self, jql: str) -> str:
|
|
228
|
+
if self.index_config.status_filters:
|
|
229
|
+
jql += " and status in ({})".format(
|
|
230
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
323
231
|
)
|
|
232
|
+
jql = jql + " ORDER BY id"
|
|
233
|
+
return jql
|
|
234
|
+
|
|
235
|
+
def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
236
|
+
fields = ["key", "id"]
|
|
237
|
+
jql = "key in ({})".format(", ".join(self.index_config.issues))
|
|
238
|
+
jql = self._update_jql(jql)
|
|
239
|
+
logger.debug(f"running jql: {jql}")
|
|
240
|
+
return self.run_jql(jql=jql, fields=fields)
|
|
241
|
+
|
|
242
|
+
def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
|
|
243
|
+
# Build metadata
|
|
244
|
+
metadata = FileDataSourceMetadata(
|
|
245
|
+
date_processed=str(time()),
|
|
246
|
+
record_locator=issue.model_dump(),
|
|
247
|
+
)
|
|
324
248
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
249
|
+
# Construct relative path and filename
|
|
250
|
+
filename = f"{issue.id}.txt"
|
|
251
|
+
relative_path = str(Path(issue.get_project_id()) / filename)
|
|
328
252
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
253
|
+
source_identifiers = SourceIdentifiers(
|
|
254
|
+
filename=filename,
|
|
255
|
+
fullpath=relative_path,
|
|
256
|
+
rel_path=relative_path,
|
|
257
|
+
)
|
|
334
258
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
259
|
+
file_data = FileData(
|
|
260
|
+
identifier=issue.id,
|
|
261
|
+
connector_type=self.connector_type,
|
|
262
|
+
metadata=metadata,
|
|
263
|
+
additional_metadata=issue.model_dump(),
|
|
264
|
+
source_identifiers=source_identifiers,
|
|
265
|
+
)
|
|
266
|
+
return file_data
|
|
267
|
+
|
|
268
|
+
def get_generators(self) -> List[Callable]:
|
|
269
|
+
generators = []
|
|
270
|
+
if self.index_config.boards:
|
|
271
|
+
generators.append(self._get_issues_within_boards)
|
|
272
|
+
if self.index_config.issues:
|
|
273
|
+
generators.append(self._get_issues_by_keys)
|
|
274
|
+
if self.index_config.projects:
|
|
275
|
+
generators.append(self._get_issues_within_projects)
|
|
276
|
+
return generators
|
|
277
|
+
|
|
278
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
279
|
+
seen_keys = []
|
|
280
|
+
for gen in self.get_generators():
|
|
281
|
+
for issue in gen():
|
|
282
|
+
if not issue:
|
|
283
|
+
continue
|
|
284
|
+
if issue.key in seen_keys:
|
|
285
|
+
continue
|
|
286
|
+
seen_keys.append(issue.key)
|
|
287
|
+
yield self._create_file_data_from_issue(issue=issue)
|
|
343
288
|
|
|
344
289
|
|
|
345
290
|
class JiraDownloaderConfig(DownloaderConfig):
|
|
346
|
-
|
|
291
|
+
download_attachments: bool = Field(
|
|
292
|
+
default=False, description="If True, will download any attachments and process as well"
|
|
293
|
+
)
|
|
347
294
|
|
|
348
295
|
|
|
349
296
|
@dataclass
|
|
@@ -448,7 +395,56 @@ class JiraDownloader(Downloader):
|
|
|
448
395
|
logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
|
|
449
396
|
raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
|
|
450
397
|
|
|
451
|
-
def
|
|
398
|
+
def generate_attachment_file_data(
|
|
399
|
+
self, attachment_dict: dict, parent_filedata: FileData
|
|
400
|
+
) -> FileData:
|
|
401
|
+
new_filedata = parent_filedata.model_copy(deep=True)
|
|
402
|
+
if new_filedata.metadata.record_locator is None:
|
|
403
|
+
new_filedata.metadata.record_locator = {}
|
|
404
|
+
new_filedata.metadata.record_locator["parent_issue"] = (
|
|
405
|
+
parent_filedata.metadata.record_locator["id"]
|
|
406
|
+
)
|
|
407
|
+
# Append an identifier for attachment to not conflict with issue ids
|
|
408
|
+
new_filedata.identifier = "{}a".format(attachment_dict["id"])
|
|
409
|
+
filename = attachment_dict["filename"]
|
|
410
|
+
new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
|
|
411
|
+
new_filedata.metadata.date_created = attachment_dict.pop("created", None)
|
|
412
|
+
new_filedata.metadata.url = attachment_dict.pop("self", None)
|
|
413
|
+
new_filedata.metadata.record_locator = attachment_dict
|
|
414
|
+
new_filedata.source_identifiers = SourceIdentifiers(
|
|
415
|
+
filename=filename,
|
|
416
|
+
fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
|
|
417
|
+
)
|
|
418
|
+
return new_filedata
|
|
419
|
+
|
|
420
|
+
def process_attachments(
|
|
421
|
+
self, file_data: FileData, attachments: list[dict]
|
|
422
|
+
) -> list[DownloadResponse]:
|
|
423
|
+
with self.connection_config.get_client() as client:
|
|
424
|
+
download_path = self.get_download_path(file_data)
|
|
425
|
+
attachment_download_dir = download_path.parent / "attachments"
|
|
426
|
+
attachment_download_dir.mkdir(parents=True, exist_ok=True)
|
|
427
|
+
download_responses = []
|
|
428
|
+
for attachment in attachments:
|
|
429
|
+
attachment_filename = Path(attachment["filename"])
|
|
430
|
+
attachment_id = attachment["id"]
|
|
431
|
+
attachment_download_path = attachment_download_dir / Path(
|
|
432
|
+
attachment_id
|
|
433
|
+
).with_suffix(attachment_filename.suffix)
|
|
434
|
+
resp = client.get_attachment_content(attachment_id=attachment_id)
|
|
435
|
+
with open(attachment_download_path, "wb") as f:
|
|
436
|
+
f.write(resp)
|
|
437
|
+
attachment_filedata = self.generate_attachment_file_data(
|
|
438
|
+
attachment_dict=attachment, parent_filedata=file_data
|
|
439
|
+
)
|
|
440
|
+
download_responses.append(
|
|
441
|
+
self.generate_download_response(
|
|
442
|
+
file_data=attachment_filedata, download_path=attachment_download_path
|
|
443
|
+
)
|
|
444
|
+
)
|
|
445
|
+
return download_responses
|
|
446
|
+
|
|
447
|
+
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
452
448
|
issue_key = file_data.additional_metadata.get("key")
|
|
453
449
|
if not issue_key:
|
|
454
450
|
raise ValueError("Issue key not found in metadata.")
|
|
@@ -463,7 +459,17 @@ class JiraDownloader(Downloader):
|
|
|
463
459
|
with open(download_path, "w") as f:
|
|
464
460
|
f.write(issue_str)
|
|
465
461
|
self.update_file_data(file_data, issue)
|
|
466
|
-
|
|
462
|
+
download_response = self.generate_download_response(
|
|
463
|
+
file_data=file_data, download_path=download_path
|
|
464
|
+
)
|
|
465
|
+
if self.download_config.download_attachments and (
|
|
466
|
+
attachments := issue.get("fields", {}).get("attachment")
|
|
467
|
+
):
|
|
468
|
+
attachment_responses = self.process_attachments(
|
|
469
|
+
file_data=file_data, attachments=attachments
|
|
470
|
+
)
|
|
471
|
+
download_response = [download_response] + attachment_responses
|
|
472
|
+
return download_response
|
|
467
473
|
|
|
468
474
|
|
|
469
475
|
jira_source_entry = SourceRegistryEntry(
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
+
from unstructured_ingest.logger import logger
|
|
3
4
|
from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
|
|
4
5
|
|
|
5
6
|
from .checkbox import Checkbox, CheckboxCell
|
|
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
|
|
|
25
26
|
from .url import URL, URLCell
|
|
26
27
|
from .verification import Verification, VerificationCell
|
|
27
28
|
|
|
29
|
+
# It's possible to add 'button' property to Notion database.
|
|
30
|
+
# However, current Notion API documentation doesn't mention it.
|
|
31
|
+
# Buttons are only functional inside Notion UI. We can simply
|
|
32
|
+
# ignore them so that the we don't throw an error when trying
|
|
33
|
+
# to map 'button' properties.
|
|
34
|
+
unsupported_db_prop_types = ["button"]
|
|
35
|
+
|
|
28
36
|
db_prop_type_mapping = {
|
|
29
37
|
"checkbox": Checkbox,
|
|
30
38
|
"created_by": CreatedBy,
|
|
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
|
|
|
55
63
|
mapped_dict = {}
|
|
56
64
|
for k, v in props.items():
|
|
57
65
|
try:
|
|
58
|
-
|
|
66
|
+
property_type = v["type"]
|
|
67
|
+
if property_type in unsupported_db_prop_types:
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
70
|
+
)
|
|
71
|
+
continue
|
|
72
|
+
mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
|
|
59
73
|
except KeyError as ke:
|
|
60
74
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
61
75
|
|
|
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
|
|
|
92
106
|
mapped_dict = {}
|
|
93
107
|
for k, v in props.items():
|
|
94
108
|
try:
|
|
95
|
-
|
|
96
|
-
|
|
109
|
+
property_type = v["type"]
|
|
110
|
+
if property_type in unsupported_db_prop_types:
|
|
111
|
+
logger.warning(
|
|
112
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
113
|
+
)
|
|
114
|
+
continue
|
|
115
|
+
mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
|
|
97
116
|
except KeyError as ke:
|
|
98
117
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
99
118
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.24
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
|
|
|
114
114
|
Provides-Extra: milvus
|
|
115
115
|
Requires-Dist: pymilvus; extra == 'milvus'
|
|
116
116
|
Provides-Extra: mixedbreadai
|
|
117
|
-
Requires-Dist: mixedbread
|
|
117
|
+
Requires-Dist: mixedbread; extra == 'mixedbreadai'
|
|
118
118
|
Provides-Extra: mongodb
|
|
119
119
|
Requires-Dist: pymongo; extra == 'mongodb'
|
|
120
120
|
Provides-Extra: msg
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=p1Nz9H4WBA_aI3GL1htUsWwzMmx5t9ktPqeOxmax3ms,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -26,7 +26,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9
|
|
|
26
26
|
unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
|
|
27
27
|
unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
|
|
28
28
|
unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
|
|
29
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256=
|
|
29
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
|
|
30
30
|
unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
|
|
31
31
|
unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
|
|
32
32
|
unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
|
|
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
|
|
|
73
73
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
74
74
|
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
75
75
|
unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
|
|
76
|
-
unstructured_ingest/processes/connectors/jira.py,sha256=
|
|
76
|
+
unstructured_ingest/processes/connectors/jira.py,sha256=a7OuVi4RFfr22Tqgk60lwmtWTRBw2fI1m8KPqfA8Ffo,18504
|
|
77
77
|
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
78
78
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
79
79
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
@@ -174,29 +174,29 @@ unstructured_ingest/processes/connectors/notion/types/blocks/todo.py,sha256=Kiga
|
|
|
174
174
|
unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py,sha256=6ae_eR3SOfUgTw-XO_F3JRBaczSp8UZfLBFMRMO5NHo,1188
|
|
175
175
|
unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py,sha256=q_p9XH8sQB8xwFqi9yEl6Fvur3fTLdeVdQCh0gSju58,442
|
|
176
176
|
unstructured_ingest/processes/connectors/notion/types/blocks/video.py,sha256=XK-O7XPs5ejTUWrg2FTLvbOZajs-yDtVhR79HSEcxvo,779
|
|
177
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=
|
|
178
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=
|
|
179
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=
|
|
180
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=
|
|
181
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=
|
|
182
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=
|
|
183
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=
|
|
184
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=
|
|
185
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=
|
|
186
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=
|
|
187
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=
|
|
188
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=
|
|
189
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=
|
|
190
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=
|
|
191
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=
|
|
192
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=
|
|
193
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=
|
|
194
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256
|
|
195
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=
|
|
196
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=
|
|
197
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=
|
|
198
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=
|
|
199
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=
|
|
177
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=6kUXmCI58R1e50b1U-_xqrrPw3g2Mqtbt02aC7DVAxw,4118
|
|
178
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=uS2B4nQ-ISt8QGxw7nNwst8MX5xRTecSvqokZ23DKyA,1048
|
|
179
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=RcpjFijEwyuGrPhSjrXT1nxaLoX2mnCvjveZ0f5Ke3c,987
|
|
180
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=eJjkK1nKb0-Ohi4lpCplbrUTkCgf4D2gWbFxEhDI_G8,872
|
|
181
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=DECAkkhR6qQ-WKsOzQf2VPdYGcyrnAJNk4y4JHDVDuc,1105
|
|
182
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=yOHIV_fpF9xzqcrkRIC4cF_aC8C7RsJJvRtEgSn30a8,869
|
|
183
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=5_6FpFWoKNriaBRLtNDRUxu1ZO1UTvAFeu4H55VNY68,1058
|
|
184
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=zcTeVuXpmuMNh4FHJHW5zgKWAqo0Wx7s9UsSEvA_wR8,1107
|
|
185
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=WpWlXz9AwS1rugpvoDoVOo055dVEAt3XmvudD17HJu8,963
|
|
186
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=kvmMmEXj3WndR9BG9MHwuM40luA4XhGfnF6rKDpYiF0,902
|
|
187
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=LSDfUXpgUox7Z77_TKIlKHqYPUgO8Y06lVgvju6NXx8,1955
|
|
188
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=y4ocq8_yX_yKkAtM3qcqIueM9y96-47gshM2mra_tgw,1094
|
|
189
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=HBTbqw1L1h8XHKuuS0e0aoz0dAZXSLDy7zRwM1_rRps,1222
|
|
190
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=mBEcwNLCI-FLU6t2FqR_tNTvrJFIQ7hqYeTB51HavBc,947
|
|
191
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=fYYyzBhi1fmXrhdxu6W6uMr2e6HaDCfrvY7yZIFvgmM,1566
|
|
192
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=D2y-98U7MP-YmsrAPeT4vqG3m7HB4zoOzMMhhYN8VHY,1209
|
|
193
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=r8UXCW7Y-eE5W6RpXiyyMszCMRDtiwBmYOmYHZ_9-VY,1315
|
|
194
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=-UAIuddoyKol45epuOYNlS8dchuwL0wMGwash4BwuH4,1794
|
|
195
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=kUFZsWGQZAApEbs5qI37t8LPN0vUM5vcu4pPbEvIGkE,2082
|
|
196
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=yd1vPbCBgGIbtUsC3zOu3-Cdpcst0dEkuFVdtS97hxA,1061
|
|
197
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=e8WslwVD6ccf4x_3NihX4BWtH7y4zMAFH7Ur4jS3dH8,1211
|
|
198
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=Bxu8x7mmH28l2nQSaAmygal8dZdUdHEFbUYIk75B0iQ,911
|
|
199
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=B9J4zXHVJzzIcn_QD04Z9eibEij1ornw5RhZ2qmUdDU,2405
|
|
200
200
|
unstructured_ingest/processes/connectors/qdrant/__init__.py,sha256=7WN_3M3qQ0O7pUJSXIKtPqAvKX2tQ_WxClCHbFeqPfc,757
|
|
201
201
|
unstructured_ingest/processes/connectors/qdrant/cloud.py,sha256=H5Plp2xqFheESLertj56o78CL4exyCQhBDE1TGAzcWU,1618
|
|
202
202
|
unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRiIHMPcctKyVBdsaLi8KXloAwq76o,1582
|
|
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
231
231
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
232
232
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
233
233
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
238
|
-
unstructured_ingest-1.0.
|
|
234
|
+
unstructured_ingest-1.0.24.dist-info/METADATA,sha256=Ssmaf7onq6HIFmhR7f2mMPoS2gqGy6dmvxo605W_dWU,8691
|
|
235
|
+
unstructured_ingest-1.0.24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
236
|
+
unstructured_ingest-1.0.24.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
237
|
+
unstructured_ingest-1.0.24.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
238
|
+
unstructured_ingest-1.0.24.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.21.dist-info → unstructured_ingest-1.0.24.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|