unstructured-ingest 1.0.19__py3-none-any.whl → 1.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/mixedbreadai.py +28 -45
- unstructured_ingest/processes/connectors/jira.py +209 -171
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
- unstructured_ingest/processes/connectors/weaviate/weaviate.py +30 -13
- {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/METADATA +2 -2
- {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/RECORD +32 -32
- {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.23" # pragma: no cover
|
|
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
23
|
-
from mixedbread_ai.core import RequestOptions
|
|
22
|
+
from mixedbread import AsyncMixedbread, Mixedbread
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
|
44
43
|
)
|
|
45
44
|
|
|
46
45
|
@requires_dependencies(
|
|
47
|
-
["
|
|
48
|
-
extras="mixedbreadai",
|
|
46
|
+
["mixedbread"],
|
|
47
|
+
extras="embed-mixedbreadai",
|
|
49
48
|
)
|
|
50
|
-
def get_client(self) -> "
|
|
49
|
+
def get_client(self) -> "Mixedbread":
|
|
51
50
|
"""
|
|
52
51
|
Create the Mixedbread AI client.
|
|
53
52
|
|
|
54
53
|
Returns:
|
|
55
|
-
|
|
54
|
+
Mixedbread: Initialized client.
|
|
56
55
|
"""
|
|
57
|
-
from
|
|
56
|
+
from mixedbread import Mixedbread
|
|
58
57
|
|
|
59
|
-
return
|
|
58
|
+
return Mixedbread(
|
|
60
59
|
api_key=self.api_key.get_secret_value(),
|
|
60
|
+
max_retries=MAX_RETRIES,
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
@requires_dependencies(
|
|
64
|
-
["
|
|
65
|
-
extras="mixedbreadai",
|
|
64
|
+
["mixedbread"],
|
|
65
|
+
extras="embed-mixedbreadai",
|
|
66
66
|
)
|
|
67
|
-
def get_async_client(self) -> "
|
|
68
|
-
from
|
|
67
|
+
def get_async_client(self) -> "AsyncMixedbread":
|
|
68
|
+
from mixedbread import AsyncMixedbread
|
|
69
69
|
|
|
70
|
-
return
|
|
70
|
+
return AsyncMixedbread(
|
|
71
71
|
api_key=self.api_key.get_secret_value(),
|
|
72
|
+
max_retries=MAX_RETRIES,
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
|
|
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
88
89
|
return self.embed_query(query="Q")
|
|
89
90
|
|
|
90
91
|
@requires_dependencies(
|
|
91
|
-
["
|
|
92
|
+
["mixedbread"],
|
|
92
93
|
extras="embed-mixedbreadai",
|
|
93
94
|
)
|
|
94
|
-
def
|
|
95
|
-
from mixedbread_ai.core import RequestOptions
|
|
96
|
-
|
|
97
|
-
return RequestOptions(
|
|
98
|
-
max_retries=MAX_RETRIES,
|
|
99
|
-
timeout_in_seconds=TIMEOUT,
|
|
100
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def get_client(self) -> "MixedbreadAI":
|
|
95
|
+
def get_client(self) -> "Mixedbread":
|
|
104
96
|
return self.config.get_client()
|
|
105
97
|
|
|
106
|
-
def embed_batch(self, client: "
|
|
107
|
-
response = client.
|
|
98
|
+
def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
|
|
99
|
+
response = client.embed(
|
|
108
100
|
model=self.config.embedder_model_name,
|
|
101
|
+
input=batch,
|
|
109
102
|
normalized=True,
|
|
110
103
|
encoding_format=ENCODING_FORMAT,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
input=batch,
|
|
104
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
105
|
+
timeout=TIMEOUT,
|
|
114
106
|
)
|
|
115
107
|
return [datum.embedding for datum in response.data]
|
|
116
108
|
|
|
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
|
124
116
|
return await self.embed_query(query="Q")
|
|
125
117
|
|
|
126
118
|
@requires_dependencies(
|
|
127
|
-
["
|
|
119
|
+
["mixedbread"],
|
|
128
120
|
extras="embed-mixedbreadai",
|
|
129
121
|
)
|
|
130
|
-
def
|
|
131
|
-
from mixedbread_ai.core import RequestOptions
|
|
132
|
-
|
|
133
|
-
return RequestOptions(
|
|
134
|
-
max_retries=MAX_RETRIES,
|
|
135
|
-
timeout_in_seconds=TIMEOUT,
|
|
136
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def get_client(self) -> "AsyncMixedbreadAI":
|
|
122
|
+
def get_client(self) -> "AsyncMixedbread":
|
|
140
123
|
return self.config.get_async_client()
|
|
141
124
|
|
|
142
|
-
async def embed_batch(self, client: "
|
|
143
|
-
response = await client.
|
|
125
|
+
async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
|
|
126
|
+
response = await client.embed(
|
|
144
127
|
model=self.config.embedder_model_name,
|
|
128
|
+
input=batch,
|
|
145
129
|
normalized=True,
|
|
146
130
|
encoding_format=ENCODING_FORMAT,
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
input=batch,
|
|
131
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
132
|
+
timeout=TIMEOUT,
|
|
150
133
|
)
|
|
151
134
|
return [datum.embedding for datum in response.data]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import math
|
|
2
1
|
from collections import abc
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from
|
|
5
|
+
from time import time
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union, cast
|
|
7
7
|
|
|
8
|
-
from pydantic import Field, Secret
|
|
8
|
+
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.data_types.file_data import (
|
|
11
11
|
FileData,
|
|
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
|
|
|
21
21
|
DownloadResponse,
|
|
22
22
|
Indexer,
|
|
23
23
|
IndexerConfig,
|
|
24
|
+
download_responses,
|
|
24
25
|
)
|
|
25
26
|
from unstructured_ingest.logger import logger
|
|
26
27
|
from unstructured_ingest.processes.connector_registry import (
|
|
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
|
|
|
37
38
|
DEFAULT_R_SEP = "\n"
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
class JiraIssueMetadata:
|
|
41
|
+
class JiraIssueMetadata(BaseModel):
|
|
42
42
|
id: str
|
|
43
43
|
key: str
|
|
44
|
-
board_id: Optional[str] = None
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
def project_id(self) -> str:
|
|
45
|
+
def get_project_id(self) -> str:
|
|
48
46
|
return self.key.split("-")[0]
|
|
49
47
|
|
|
50
|
-
def to_dict(self) -> Dict[str, Union[str, None]]:
|
|
51
|
-
return {
|
|
52
|
-
"id": self.id,
|
|
53
|
-
"key": self.key,
|
|
54
|
-
"board_id": self.board_id,
|
|
55
|
-
"project_id": self.project_id,
|
|
56
|
-
}
|
|
57
|
-
|
|
58
48
|
|
|
59
49
|
class FieldGetter(dict):
|
|
60
50
|
def __getitem__(self, key):
|
|
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
|
|
|
77
67
|
return obj
|
|
78
68
|
|
|
79
69
|
|
|
80
|
-
def
|
|
81
|
-
""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
|
|
107
|
-
|
|
108
|
-
for _ in range(num_iterations):
|
|
109
|
-
response = func(*args, **kwargs)
|
|
110
|
-
if isinstance(response, list):
|
|
111
|
-
all_results += response
|
|
112
|
-
elif isinstance(response, dict):
|
|
113
|
-
if results_key not in response:
|
|
114
|
-
raise KeyError(f'Response object is missing "{results_key}" key.')
|
|
115
|
-
all_results += response[results_key]
|
|
116
|
-
else:
|
|
117
|
-
raise TypeError(
|
|
118
|
-
f"""Unexpected response type from Jira API.
|
|
119
|
-
Response type has to be either list or dict, got: {type(response).__name__}."""
|
|
120
|
-
)
|
|
121
|
-
kwargs["start"] += kwargs["limit"]
|
|
122
|
-
|
|
123
|
-
return all_results
|
|
124
|
-
|
|
125
|
-
return wrapper
|
|
70
|
+
def api_token_based_generator(
|
|
71
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
72
|
+
) -> Generator[dict, None, None]:
|
|
73
|
+
nextPageToken = kwargs.pop("nextPageToken", None)
|
|
74
|
+
while True:
|
|
75
|
+
resp = fn(nextPageToken=nextPageToken, **kwargs)
|
|
76
|
+
issues = resp.get(key, [])
|
|
77
|
+
for issue in issues:
|
|
78
|
+
yield issue
|
|
79
|
+
nextPageToken = resp.get("nextPageToken")
|
|
80
|
+
if not nextPageToken:
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def api_page_based_generator(
|
|
85
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
86
|
+
) -> Generator[dict, None, None]:
|
|
87
|
+
start = kwargs.pop("start", 0)
|
|
88
|
+
while True:
|
|
89
|
+
resp = fn(start=start, **kwargs)
|
|
90
|
+
issues = resp.get(key, [])
|
|
91
|
+
if not issues:
|
|
92
|
+
break
|
|
93
|
+
for issue in issues:
|
|
94
|
+
yield issue
|
|
95
|
+
start += len(issues)
|
|
126
96
|
|
|
127
97
|
|
|
128
98
|
class JiraAccessConfig(AccessConfig):
|
|
@@ -169,8 +139,28 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
169
139
|
def get_client(self) -> Generator["Jira", None, None]:
|
|
170
140
|
from atlassian import Jira
|
|
171
141
|
|
|
142
|
+
class CustomJira(Jira):
|
|
143
|
+
"""
|
|
144
|
+
Custom Jira class to fix the issue with the get_project_issues_count method.
|
|
145
|
+
This class inherits from the original Jira class and overrides the method to
|
|
146
|
+
handle the response correctly.
|
|
147
|
+
Once the issue is fixed in the original library, this class can be removed.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(self, *args, **kwargs):
|
|
151
|
+
super().__init__(*args, **kwargs)
|
|
152
|
+
|
|
153
|
+
def get_project_issues_count(self, project: str) -> int:
|
|
154
|
+
jql = f'project = "{project}" '
|
|
155
|
+
response = self.jql(jql, fields="*none")
|
|
156
|
+
response = cast("dict", response)
|
|
157
|
+
if "total" in response:
|
|
158
|
+
return response["total"]
|
|
159
|
+
else:
|
|
160
|
+
return len(response["issues"])
|
|
161
|
+
|
|
172
162
|
access_configs = self.access_config.get_secret_value()
|
|
173
|
-
with
|
|
163
|
+
with CustomJira(
|
|
174
164
|
url=self.url,
|
|
175
165
|
username=self.username,
|
|
176
166
|
password=access_configs.password,
|
|
@@ -181,9 +171,17 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
181
171
|
|
|
182
172
|
|
|
183
173
|
class JiraIndexerConfig(IndexerConfig):
|
|
184
|
-
projects: Optional[
|
|
185
|
-
boards: Optional[
|
|
186
|
-
issues: Optional[
|
|
174
|
+
projects: Optional[list[str]] = Field(None, description="List of project keys")
|
|
175
|
+
boards: Optional[list[str]] = Field(None, description="List of board IDs")
|
|
176
|
+
issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
|
|
177
|
+
status_filters: Optional[list[str]] = Field(
|
|
178
|
+
default=None,
|
|
179
|
+
description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def model_post_init(self, context: Any, /) -> None:
|
|
183
|
+
if not self.projects and not self.boards and not self.issues:
|
|
184
|
+
raise ValueError("At least one of projects, boards, or issues must be provided.")
|
|
187
185
|
|
|
188
186
|
|
|
189
187
|
@dataclass
|
|
@@ -208,122 +206,103 @@ class JiraIndexer(Indexer):
|
|
|
208
206
|
)
|
|
209
207
|
logger.info("Connection to Jira successful.")
|
|
210
208
|
|
|
211
|
-
def
|
|
209
|
+
def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
212
210
|
with self.connection_config.get_client() as client:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if not number_of_issues_to_fetch:
|
|
219
|
-
logger.warning(f"No issues found in project: {project_key}. Skipping!")
|
|
220
|
-
return []
|
|
221
|
-
get_project_issues = issues_fetcher_wrapper(
|
|
222
|
-
client.get_all_project_issues,
|
|
223
|
-
results_key="issues",
|
|
224
|
-
number_of_issues_to_fetch=number_of_issues_to_fetch,
|
|
225
|
-
)
|
|
226
|
-
issues = get_project_issues(project=project_key, fields=["key", "id"])
|
|
227
|
-
logger.debug(f"Found {len(issues)} issues in project: {project_key}")
|
|
228
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
229
|
-
|
|
230
|
-
def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
|
|
231
|
-
project_keys = self.index_config.projects
|
|
232
|
-
if not project_keys:
|
|
233
|
-
# for when a component list is provided, without any projects
|
|
234
|
-
if self.index_config.boards or self.index_config.issues:
|
|
235
|
-
return []
|
|
236
|
-
# for when no components are provided. all projects will be ingested
|
|
237
|
-
else:
|
|
238
|
-
with self.connection_config.get_client() as client:
|
|
239
|
-
project_keys = [project["key"] for project in client.projects()]
|
|
240
|
-
return [
|
|
241
|
-
issue
|
|
242
|
-
for project_key in project_keys
|
|
243
|
-
for issue in self._get_issues_within_single_project(project_key)
|
|
244
|
-
]
|
|
211
|
+
fields = ["key", "id"]
|
|
212
|
+
jql = "project in ({})".format(", ".join(self.index_config.projects))
|
|
213
|
+
jql = self._update_jql(jql)
|
|
214
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
215
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
245
216
|
|
|
246
217
|
def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
|
|
247
218
|
with self.connection_config.get_client() as client:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
219
|
+
fields = ["key", "id"]
|
|
220
|
+
if self.index_config.status_filters:
|
|
221
|
+
jql = "status in ({}) ORDER BY id".format(
|
|
222
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
jql = "ORDER BY id"
|
|
226
|
+
for issue in api_page_based_generator(
|
|
227
|
+
fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
|
|
228
|
+
):
|
|
229
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
230
|
+
|
|
231
|
+
def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
260
232
|
if not self.index_config.boards:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
issue
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
]
|
|
274
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
275
|
-
|
|
276
|
-
def get_issues(self) -> List[JiraIssueMetadata]:
|
|
277
|
-
issues = [
|
|
278
|
-
*self._get_issues_within_boards(),
|
|
279
|
-
*self._get_issues_within_projects(),
|
|
280
|
-
*self._get_issues(),
|
|
281
|
-
]
|
|
282
|
-
# Select unique issues by issue 'id'.
|
|
283
|
-
# Since boards issues are fetched first,
|
|
284
|
-
# if there are duplicates, the board issues will be kept,
|
|
285
|
-
# in order to keep issue 'board_id' information.
|
|
286
|
-
seen = set()
|
|
287
|
-
unique_issues: List[JiraIssueMetadata] = []
|
|
288
|
-
for issue in issues:
|
|
289
|
-
if issue.id not in seen:
|
|
290
|
-
unique_issues.append(issue)
|
|
291
|
-
seen.add(issue.id)
|
|
292
|
-
return unique_issues
|
|
233
|
+
yield
|
|
234
|
+
for board_id in self.index_config.boards:
|
|
235
|
+
for issue in self._get_issues_within_single_board(board_id=board_id):
|
|
236
|
+
yield issue
|
|
237
|
+
|
|
238
|
+
def _update_jql(self, jql: str) -> str:
|
|
239
|
+
if self.index_config.status_filters:
|
|
240
|
+
jql += " and status in ({})".format(
|
|
241
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
242
|
+
)
|
|
243
|
+
jql = jql + " ORDER BY id"
|
|
244
|
+
return jql
|
|
293
245
|
|
|
294
|
-
def
|
|
295
|
-
|
|
246
|
+
def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
247
|
+
with self.connection_config.get_client() as client:
|
|
248
|
+
fields = ["key", "id"]
|
|
249
|
+
jql = "key in ({})".format(", ".join(self.index_config.issues))
|
|
250
|
+
jql = self._update_jql(jql)
|
|
251
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
252
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
253
|
+
|
|
254
|
+
def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
|
|
255
|
+
# Build metadata
|
|
256
|
+
metadata = FileDataSourceMetadata(
|
|
257
|
+
date_processed=str(time()),
|
|
258
|
+
record_locator=issue.model_dump(),
|
|
259
|
+
)
|
|
296
260
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
metadata = FileDataSourceMetadata(
|
|
301
|
-
date_processed=str(time()),
|
|
302
|
-
record_locator=issue.to_dict(),
|
|
303
|
-
)
|
|
261
|
+
# Construct relative path and filename
|
|
262
|
+
filename = f"{issue.id}.txt"
|
|
263
|
+
relative_path = str(Path(issue.get_project_id()) / filename)
|
|
304
264
|
|
|
305
|
-
|
|
306
|
-
filename
|
|
307
|
-
relative_path
|
|
265
|
+
source_identifiers = SourceIdentifiers(
|
|
266
|
+
filename=filename,
|
|
267
|
+
fullpath=relative_path,
|
|
268
|
+
rel_path=relative_path,
|
|
269
|
+
)
|
|
308
270
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
)
|
|
271
|
+
file_data = FileData(
|
|
272
|
+
identifier=issue.id,
|
|
273
|
+
connector_type=self.connector_type,
|
|
274
|
+
metadata=metadata,
|
|
275
|
+
additional_metadata=issue.model_dump(),
|
|
276
|
+
source_identifiers=source_identifiers,
|
|
277
|
+
)
|
|
278
|
+
return file_data
|
|
279
|
+
|
|
280
|
+
def get_generators(self) -> List[Callable]:
|
|
281
|
+
generators = []
|
|
282
|
+
if self.index_config.boards:
|
|
283
|
+
generators.append(self._get_issues_within_boards)
|
|
284
|
+
if self.index_config.issues:
|
|
285
|
+
generators.append(self._get_issues_by_keys)
|
|
286
|
+
if self.index_config.projects:
|
|
287
|
+
generators.append(self._get_issues_within_projects)
|
|
288
|
+
return generators
|
|
314
289
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
290
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
291
|
+
seen_keys = []
|
|
292
|
+
for gen in self.get_generators():
|
|
293
|
+
for issue in gen():
|
|
294
|
+
if not issue:
|
|
295
|
+
continue
|
|
296
|
+
if issue.key in seen_keys:
|
|
297
|
+
continue
|
|
298
|
+
seen_keys.append(issue.key)
|
|
299
|
+
yield self._create_file_data_from_issue(issue=issue)
|
|
323
300
|
|
|
324
301
|
|
|
325
302
|
class JiraDownloaderConfig(DownloaderConfig):
|
|
326
|
-
|
|
303
|
+
download_attachments: bool = Field(
|
|
304
|
+
default=False, description="If True, will download any attachments and process as well"
|
|
305
|
+
)
|
|
327
306
|
|
|
328
307
|
|
|
329
308
|
@dataclass
|
|
@@ -428,7 +407,56 @@ class JiraDownloader(Downloader):
|
|
|
428
407
|
logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
|
|
429
408
|
raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
|
|
430
409
|
|
|
431
|
-
def
|
|
410
|
+
def generate_attachment_file_data(
|
|
411
|
+
self, attachment_dict: dict, parent_filedata: FileData
|
|
412
|
+
) -> FileData:
|
|
413
|
+
new_filedata = parent_filedata.model_copy(deep=True)
|
|
414
|
+
if new_filedata.metadata.record_locator is None:
|
|
415
|
+
new_filedata.metadata.record_locator = {}
|
|
416
|
+
new_filedata.metadata.record_locator["parent_issue"] = (
|
|
417
|
+
parent_filedata.metadata.record_locator["id"]
|
|
418
|
+
)
|
|
419
|
+
# Append an identifier for attachment to not conflict with issue ids
|
|
420
|
+
new_filedata.identifier = "{}a".format(attachment_dict["id"])
|
|
421
|
+
filename = attachment_dict["filename"]
|
|
422
|
+
new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
|
|
423
|
+
new_filedata.metadata.date_created = attachment_dict.pop("created", None)
|
|
424
|
+
new_filedata.metadata.url = attachment_dict.pop("self", None)
|
|
425
|
+
new_filedata.metadata.record_locator = attachment_dict
|
|
426
|
+
new_filedata.source_identifiers = SourceIdentifiers(
|
|
427
|
+
filename=filename,
|
|
428
|
+
fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
|
|
429
|
+
)
|
|
430
|
+
return new_filedata
|
|
431
|
+
|
|
432
|
+
def process_attachments(
|
|
433
|
+
self, file_data: FileData, attachments: list[dict]
|
|
434
|
+
) -> list[DownloadResponse]:
|
|
435
|
+
with self.connection_config.get_client() as client:
|
|
436
|
+
download_path = self.get_download_path(file_data)
|
|
437
|
+
attachment_download_dir = download_path.parent / "attachments"
|
|
438
|
+
attachment_download_dir.mkdir(parents=True, exist_ok=True)
|
|
439
|
+
download_responses = []
|
|
440
|
+
for attachment in attachments:
|
|
441
|
+
attachment_filename = Path(attachment["filename"])
|
|
442
|
+
attachment_id = attachment["id"]
|
|
443
|
+
attachment_download_path = attachment_download_dir / Path(
|
|
444
|
+
attachment_id
|
|
445
|
+
).with_suffix(attachment_filename.suffix)
|
|
446
|
+
resp = client.get_attachment_content(attachment_id=attachment_id)
|
|
447
|
+
with open(attachment_download_path, "wb") as f:
|
|
448
|
+
f.write(resp)
|
|
449
|
+
attachment_filedata = self.generate_attachment_file_data(
|
|
450
|
+
attachment_dict=attachment, parent_filedata=file_data
|
|
451
|
+
)
|
|
452
|
+
download_responses.append(
|
|
453
|
+
self.generate_download_response(
|
|
454
|
+
file_data=attachment_filedata, download_path=attachment_download_path
|
|
455
|
+
)
|
|
456
|
+
)
|
|
457
|
+
return download_responses
|
|
458
|
+
|
|
459
|
+
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
432
460
|
issue_key = file_data.additional_metadata.get("key")
|
|
433
461
|
if not issue_key:
|
|
434
462
|
raise ValueError("Issue key not found in metadata.")
|
|
@@ -443,7 +471,17 @@ class JiraDownloader(Downloader):
|
|
|
443
471
|
with open(download_path, "w") as f:
|
|
444
472
|
f.write(issue_str)
|
|
445
473
|
self.update_file_data(file_data, issue)
|
|
446
|
-
|
|
474
|
+
download_response = self.generate_download_response(
|
|
475
|
+
file_data=file_data, download_path=download_path
|
|
476
|
+
)
|
|
477
|
+
if self.download_config.download_attachments and (
|
|
478
|
+
attachments := issue.get("fields", {}).get("attachment")
|
|
479
|
+
):
|
|
480
|
+
attachment_responses = self.process_attachments(
|
|
481
|
+
file_data=file_data, attachments=attachments
|
|
482
|
+
)
|
|
483
|
+
download_response = [download_response] + attachment_responses
|
|
484
|
+
return download_response
|
|
447
485
|
|
|
448
486
|
|
|
449
487
|
jira_source_entry = SourceRegistryEntry(
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
+
from unstructured_ingest.logger import logger
|
|
3
4
|
from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
|
|
4
5
|
|
|
5
6
|
from .checkbox import Checkbox, CheckboxCell
|
|
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
|
|
|
25
26
|
from .url import URL, URLCell
|
|
26
27
|
from .verification import Verification, VerificationCell
|
|
27
28
|
|
|
29
|
+
# It's possible to add 'button' property to Notion database.
|
|
30
|
+
# However, current Notion API documentation doesn't mention it.
|
|
31
|
+
# Buttons are only functional inside Notion UI. We can simply
|
|
32
|
+
# ignore them so that the we don't throw an error when trying
|
|
33
|
+
# to map 'button' properties.
|
|
34
|
+
unsupported_db_prop_types = ["button"]
|
|
35
|
+
|
|
28
36
|
db_prop_type_mapping = {
|
|
29
37
|
"checkbox": Checkbox,
|
|
30
38
|
"created_by": CreatedBy,
|
|
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
|
|
|
55
63
|
mapped_dict = {}
|
|
56
64
|
for k, v in props.items():
|
|
57
65
|
try:
|
|
58
|
-
|
|
66
|
+
property_type = v["type"]
|
|
67
|
+
if property_type in unsupported_db_prop_types:
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
70
|
+
)
|
|
71
|
+
continue
|
|
72
|
+
mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
|
|
59
73
|
except KeyError as ke:
|
|
60
74
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
61
75
|
|
|
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
|
|
|
92
106
|
mapped_dict = {}
|
|
93
107
|
for k, v in props.items():
|
|
94
108
|
try:
|
|
95
|
-
|
|
96
|
-
|
|
109
|
+
property_type = v["type"]
|
|
110
|
+
if property_type in unsupported_db_prop_types:
|
|
111
|
+
logger.warning(
|
|
112
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
113
|
+
)
|
|
114
|
+
continue
|
|
115
|
+
mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
|
|
97
116
|
except KeyError as ke:
|
|
98
117
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
99
118
|
|
|
@@ -234,15 +234,32 @@ class WeaviateUploader(VectorDBUploader, ABC):
|
|
|
234
234
|
self.create_destination(**kwargs)
|
|
235
235
|
|
|
236
236
|
def format_destination_name(self, destination_name: str) -> str:
|
|
237
|
-
|
|
238
|
-
|
|
237
|
+
"""
|
|
238
|
+
Weaviate Collection naming conventions:
|
|
239
|
+
1. must begin with an uppercase letter
|
|
240
|
+
2. must be alphanumeric and underscores only
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
# Check if the first character is an uppercase letter
|
|
244
|
+
if not re.match(r"^[a-zA-Z]", destination_name):
|
|
245
|
+
raise ValueError("Collection name must start with an uppercase letter")
|
|
246
|
+
# Replace all non-alphanumeric characters with underscores
|
|
239
247
|
formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
|
|
240
|
-
#
|
|
241
|
-
|
|
248
|
+
# Make the first character uppercase and leave the rest as is
|
|
249
|
+
if len(formatted) == 1:
|
|
250
|
+
formatted = formatted.capitalize()
|
|
251
|
+
else:
|
|
252
|
+
formatted = formatted[0].capitalize() + formatted[1:]
|
|
253
|
+
if formatted != destination_name:
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"Given Collection name '{destination_name}' doesn't follow naming conventions. "
|
|
256
|
+
f"Renaming to '{formatted}'"
|
|
257
|
+
)
|
|
258
|
+
return formatted
|
|
242
259
|
|
|
243
260
|
def create_destination(
|
|
244
261
|
self,
|
|
245
|
-
destination_name: str = "
|
|
262
|
+
destination_name: str = "Unstructuredautocreated",
|
|
246
263
|
vector_length: Optional[int] = None,
|
|
247
264
|
**kwargs: Any,
|
|
248
265
|
) -> bool:
|
|
@@ -250,18 +267,18 @@ class WeaviateUploader(VectorDBUploader, ABC):
|
|
|
250
267
|
collection_name = self.format_destination_name(collection_name)
|
|
251
268
|
self.upload_config.collection = collection_name
|
|
252
269
|
|
|
253
|
-
connectors_dir = Path(__file__).parents[1]
|
|
254
|
-
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
255
|
-
with collection_config_file.open() as f:
|
|
256
|
-
collection_config = json.load(f)
|
|
257
|
-
collection_config["class"] = collection_name
|
|
258
|
-
|
|
259
270
|
if not self._collection_exists():
|
|
260
|
-
|
|
271
|
+
connectors_dir = Path(__file__).parents[1]
|
|
272
|
+
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
273
|
+
with collection_config_file.open() as f:
|
|
274
|
+
collection_config = json.load(f)
|
|
275
|
+
collection_config["class"] = collection_name
|
|
276
|
+
|
|
277
|
+
logger.info(f"Creating weaviate collection '{collection_name}' with default configs")
|
|
261
278
|
with self.connection_config.get_client() as weaviate_client:
|
|
262
279
|
weaviate_client.collections.create_from_dict(config=collection_config)
|
|
263
280
|
return True
|
|
264
|
-
logger.debug(f"
|
|
281
|
+
logger.debug(f"Collection with name '{collection_name}' already exists, skipping creation")
|
|
265
282
|
return False
|
|
266
283
|
|
|
267
284
|
def check_for_errors(self, client: "WeaviateClient") -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.23
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
|
|
|
114
114
|
Provides-Extra: milvus
|
|
115
115
|
Requires-Dist: pymilvus; extra == 'milvus'
|
|
116
116
|
Provides-Extra: mixedbreadai
|
|
117
|
-
Requires-Dist: mixedbread
|
|
117
|
+
Requires-Dist: mixedbread; extra == 'mixedbreadai'
|
|
118
118
|
Provides-Extra: mongodb
|
|
119
119
|
Requires-Dist: pymongo; extra == 'mongodb'
|
|
120
120
|
Provides-Extra: msg
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=xbdPxvOGZJUW_s_LZYTaPijNvLNKSjZuHlwNDGHpDjE,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -26,7 +26,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9
|
|
|
26
26
|
unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
|
|
27
27
|
unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
|
|
28
28
|
unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
|
|
29
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256=
|
|
29
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
|
|
30
30
|
unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
|
|
31
31
|
unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
|
|
32
32
|
unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
|
|
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
|
|
|
73
73
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
74
74
|
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
75
75
|
unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
|
|
76
|
-
unstructured_ingest/processes/connectors/jira.py,sha256=
|
|
76
|
+
unstructured_ingest/processes/connectors/jira.py,sha256=Hw07c2HT2vA2l2wpoYWXPNtLbnWreXCIRimAxm0Gfpw,19055
|
|
77
77
|
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
78
78
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
79
79
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
@@ -174,29 +174,29 @@ unstructured_ingest/processes/connectors/notion/types/blocks/todo.py,sha256=Kiga
|
|
|
174
174
|
unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py,sha256=6ae_eR3SOfUgTw-XO_F3JRBaczSp8UZfLBFMRMO5NHo,1188
|
|
175
175
|
unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py,sha256=q_p9XH8sQB8xwFqi9yEl6Fvur3fTLdeVdQCh0gSju58,442
|
|
176
176
|
unstructured_ingest/processes/connectors/notion/types/blocks/video.py,sha256=XK-O7XPs5ejTUWrg2FTLvbOZajs-yDtVhR79HSEcxvo,779
|
|
177
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=
|
|
178
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=
|
|
179
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=
|
|
180
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=
|
|
181
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=
|
|
182
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=
|
|
183
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=
|
|
184
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=
|
|
185
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=
|
|
186
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=
|
|
187
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=
|
|
188
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=
|
|
189
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=
|
|
190
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=
|
|
191
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=
|
|
192
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=
|
|
193
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=
|
|
194
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256
|
|
195
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=
|
|
196
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=
|
|
197
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=
|
|
198
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=
|
|
199
|
-
unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=
|
|
177
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py,sha256=6kUXmCI58R1e50b1U-_xqrrPw3g2Mqtbt02aC7DVAxw,4118
|
|
178
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py,sha256=uS2B4nQ-ISt8QGxw7nNwst8MX5xRTecSvqokZ23DKyA,1048
|
|
179
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py,sha256=RcpjFijEwyuGrPhSjrXT1nxaLoX2mnCvjveZ0f5Ke3c,987
|
|
180
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py,sha256=eJjkK1nKb0-Ohi4lpCplbrUTkCgf4D2gWbFxEhDI_G8,872
|
|
181
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/date.py,sha256=DECAkkhR6qQ-WKsOzQf2VPdYGcyrnAJNk4y4JHDVDuc,1105
|
|
182
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/email.py,sha256=yOHIV_fpF9xzqcrkRIC4cF_aC8C7RsJJvRtEgSn30a8,869
|
|
183
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/files.py,sha256=5_6FpFWoKNriaBRLtNDRUxu1ZO1UTvAFeu4H55VNY68,1058
|
|
184
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py,sha256=zcTeVuXpmuMNh4FHJHW5zgKWAqo0Wx7s9UsSEvA_wR8,1107
|
|
185
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py,sha256=WpWlXz9AwS1rugpvoDoVOo055dVEAt3XmvudD17HJu8,963
|
|
186
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py,sha256=kvmMmEXj3WndR9BG9MHwuM40luA4XhGfnF6rKDpYiF0,902
|
|
187
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py,sha256=LSDfUXpgUox7Z77_TKIlKHqYPUgO8Y06lVgvju6NXx8,1955
|
|
188
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/number.py,sha256=y4ocq8_yX_yKkAtM3qcqIueM9y96-47gshM2mra_tgw,1094
|
|
189
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/people.py,sha256=HBTbqw1L1h8XHKuuS0e0aoz0dAZXSLDy7zRwM1_rRps,1222
|
|
190
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py,sha256=mBEcwNLCI-FLU6t2FqR_tNTvrJFIQ7hqYeTB51HavBc,947
|
|
191
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py,sha256=fYYyzBhi1fmXrhdxu6W6uMr2e6HaDCfrvY7yZIFvgmM,1566
|
|
192
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py,sha256=D2y-98U7MP-YmsrAPeT4vqG3m7HB4zoOzMMhhYN8VHY,1209
|
|
193
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py,sha256=r8UXCW7Y-eE5W6RpXiyyMszCMRDtiwBmYOmYHZ_9-VY,1315
|
|
194
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/select.py,sha256=-UAIuddoyKol45epuOYNlS8dchuwL0wMGwash4BwuH4,1794
|
|
195
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/status.py,sha256=kUFZsWGQZAApEbs5qI37t8LPN0vUM5vcu4pPbEvIGkE,2082
|
|
196
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/title.py,sha256=yd1vPbCBgGIbtUsC3zOu3-Cdpcst0dEkuFVdtS97hxA,1061
|
|
197
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py,sha256=e8WslwVD6ccf4x_3NihX4BWtH7y4zMAFH7Ur4jS3dH8,1211
|
|
198
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/url.py,sha256=Bxu8x7mmH28l2nQSaAmygal8dZdUdHEFbUYIk75B0iQ,911
|
|
199
|
+
unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py,sha256=B9J4zXHVJzzIcn_QD04Z9eibEij1ornw5RhZ2qmUdDU,2405
|
|
200
200
|
unstructured_ingest/processes/connectors/qdrant/__init__.py,sha256=7WN_3M3qQ0O7pUJSXIKtPqAvKX2tQ_WxClCHbFeqPfc,757
|
|
201
201
|
unstructured_ingest/processes/connectors/qdrant/cloud.py,sha256=H5Plp2xqFheESLertj56o78CL4exyCQhBDE1TGAzcWU,1618
|
|
202
202
|
unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRiIHMPcctKyVBdsaLi8KXloAwq76o,1582
|
|
@@ -214,7 +214,7 @@ unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3
|
|
|
214
214
|
unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
|
|
215
215
|
unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
|
|
216
216
|
unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaSNqVlKROm-S3Ql3naLmKvigLBgUQdw,2195
|
|
217
|
-
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=
|
|
217
|
+
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=yB67gxvo3X0UaP_mNeB0HbSWXst7ur0E2QKwLA0gIS4,13647
|
|
218
218
|
unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
219
219
|
unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
|
|
220
220
|
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
|
|
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
231
231
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
232
232
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
233
233
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
238
|
-
unstructured_ingest-1.0.
|
|
234
|
+
unstructured_ingest-1.0.23.dist-info/METADATA,sha256=b0LZ3XzhlhUgDZd4mEUPxxhOT-lqKAOnDfiQeJhCgoA,8691
|
|
235
|
+
unstructured_ingest-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
236
|
+
unstructured_ingest-1.0.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
237
|
+
unstructured_ingest-1.0.23.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
238
|
+
unstructured_ingest-1.0.23.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.19.dist-info → unstructured_ingest-1.0.23.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|