unstructured-ingest 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_sharepoint.py +161 -10
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/astradb.py +2 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +7 -7
- unstructured_ingest/v2/processes/connectors/sharepoint.py +9 -4
- unstructured_ingest/v2/processes/connectors/utils.py +28 -0
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/METADATA +17 -17
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/RECORD +12 -12
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/top_level.txt +0 -0
|
@@ -19,24 +19,31 @@ from unstructured_ingest.v2.processes.connectors.sharepoint import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def sharepoint_config():
|
|
23
|
+
class SharepointTestConfig:
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.client_id = os.environ["SHAREPOINT_CLIENT_ID"]
|
|
26
|
+
self.client_cred = os.environ["SHAREPOINT_CRED"]
|
|
27
|
+
self.user_pname = os.environ["MS_USER_PNAME"]
|
|
28
|
+
self.tenant = os.environ["MS_TENANT_ID"]
|
|
29
|
+
|
|
30
|
+
return SharepointTestConfig()
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
@pytest.mark.asyncio
|
|
23
34
|
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
24
35
|
@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
25
36
|
async def test_sharepoint_source(temp_dir):
|
|
26
|
-
# Retrieve environment variables
|
|
27
37
|
site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
|
|
28
|
-
|
|
29
|
-
client_cred = os.environ["SHAREPOINT_CRED"]
|
|
30
|
-
user_pname = os.environ["MS_USER_PNAME"]
|
|
31
|
-
tenant = os.environ["MS_TENANT_ID"]
|
|
38
|
+
config = sharepoint_config()
|
|
32
39
|
|
|
33
40
|
# Create connection and indexer configurations
|
|
34
|
-
access_config = SharepointAccessConfig(client_cred=client_cred)
|
|
41
|
+
access_config = SharepointAccessConfig(client_cred=config.client_cred)
|
|
35
42
|
connection_config = SharepointConnectionConfig(
|
|
36
|
-
client_id=client_id,
|
|
43
|
+
client_id=config.client_id,
|
|
37
44
|
site=site,
|
|
38
|
-
tenant=tenant,
|
|
39
|
-
user_pname=user_pname,
|
|
45
|
+
tenant=config.tenant,
|
|
46
|
+
user_pname=config.user_pname,
|
|
40
47
|
access_config=access_config,
|
|
41
48
|
)
|
|
42
49
|
index_config = SharepointIndexerConfig(recursive=True)
|
|
@@ -58,7 +65,151 @@ async def test_sharepoint_source(temp_dir):
|
|
|
58
65
|
indexer=indexer,
|
|
59
66
|
downloader=downloader,
|
|
60
67
|
configs=SourceValidationConfigs(
|
|
61
|
-
test_id="
|
|
68
|
+
test_id="sharepoint1",
|
|
69
|
+
expected_num_files=4,
|
|
70
|
+
validate_downloaded_files=True,
|
|
71
|
+
exclude_fields_extend=[
|
|
72
|
+
"metadata.date_created",
|
|
73
|
+
"metadata.date_modified",
|
|
74
|
+
"additional_metadata.LastModified",
|
|
75
|
+
"additional_metadata.@microsoft.graph.downloadUrl",
|
|
76
|
+
],
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.mark.asyncio
|
|
82
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
83
|
+
@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
84
|
+
async def test_sharepoint_source_with_path(temp_dir):
|
|
85
|
+
site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
|
|
86
|
+
config = sharepoint_config()
|
|
87
|
+
|
|
88
|
+
# Create connection and indexer configurations
|
|
89
|
+
access_config = SharepointAccessConfig(client_cred=config.client_cred)
|
|
90
|
+
connection_config = SharepointConnectionConfig(
|
|
91
|
+
client_id=config.client_id,
|
|
92
|
+
site=site,
|
|
93
|
+
tenant=config.tenant,
|
|
94
|
+
user_pname=config.user_pname,
|
|
95
|
+
access_config=access_config,
|
|
96
|
+
)
|
|
97
|
+
index_config = SharepointIndexerConfig(recursive=True, path="Folder1")
|
|
98
|
+
|
|
99
|
+
download_config = SharepointDownloaderConfig(download_dir=temp_dir)
|
|
100
|
+
|
|
101
|
+
# Instantiate indexer and downloader
|
|
102
|
+
indexer = SharepointIndexer(
|
|
103
|
+
connection_config=connection_config,
|
|
104
|
+
index_config=index_config,
|
|
105
|
+
)
|
|
106
|
+
downloader = SharepointDownloader(
|
|
107
|
+
connection_config=connection_config,
|
|
108
|
+
download_config=download_config,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Run the source connector validation
|
|
112
|
+
await source_connector_validation(
|
|
113
|
+
indexer=indexer,
|
|
114
|
+
downloader=downloader,
|
|
115
|
+
configs=SourceValidationConfigs(
|
|
116
|
+
test_id="sharepoint2",
|
|
117
|
+
expected_num_files=2,
|
|
118
|
+
validate_downloaded_files=True,
|
|
119
|
+
exclude_fields_extend=[
|
|
120
|
+
"metadata.date_created",
|
|
121
|
+
"metadata.date_modified",
|
|
122
|
+
"additional_metadata.LastModified",
|
|
123
|
+
"additional_metadata.@microsoft.graph.downloadUrl",
|
|
124
|
+
],
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
131
|
+
@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
132
|
+
async def test_sharepoint_root_with_path(temp_dir):
|
|
133
|
+
site = "https://unstructuredio.sharepoint.com/"
|
|
134
|
+
config = sharepoint_config()
|
|
135
|
+
|
|
136
|
+
# Create connection and indexer configurations
|
|
137
|
+
access_config = SharepointAccessConfig(client_cred=config.client_cred)
|
|
138
|
+
connection_config = SharepointConnectionConfig(
|
|
139
|
+
client_id=config.client_id,
|
|
140
|
+
site=site,
|
|
141
|
+
tenant=config.tenant,
|
|
142
|
+
user_pname=config.user_pname,
|
|
143
|
+
access_config=access_config,
|
|
144
|
+
)
|
|
145
|
+
index_config = SharepointIndexerConfig(recursive=True, path="e2e-test-folder")
|
|
146
|
+
|
|
147
|
+
download_config = SharepointDownloaderConfig(download_dir=temp_dir)
|
|
148
|
+
|
|
149
|
+
# Instantiate indexer and downloader
|
|
150
|
+
indexer = SharepointIndexer(
|
|
151
|
+
connection_config=connection_config,
|
|
152
|
+
index_config=index_config,
|
|
153
|
+
)
|
|
154
|
+
downloader = SharepointDownloader(
|
|
155
|
+
connection_config=connection_config,
|
|
156
|
+
download_config=download_config,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Run the source connector validation
|
|
160
|
+
await source_connector_validation(
|
|
161
|
+
indexer=indexer,
|
|
162
|
+
downloader=downloader,
|
|
163
|
+
configs=SourceValidationConfigs(
|
|
164
|
+
test_id="sharepoint3",
|
|
165
|
+
expected_num_files=1,
|
|
166
|
+
validate_downloaded_files=True,
|
|
167
|
+
exclude_fields_extend=[
|
|
168
|
+
"metadata.date_created",
|
|
169
|
+
"metadata.date_modified",
|
|
170
|
+
"additional_metadata.LastModified",
|
|
171
|
+
"additional_metadata.@microsoft.graph.downloadUrl",
|
|
172
|
+
],
|
|
173
|
+
),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@pytest.mark.asyncio
|
|
178
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
179
|
+
@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
180
|
+
async def test_sharepoint_shared_documents(temp_dir):
|
|
181
|
+
site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
|
|
182
|
+
config = sharepoint_config()
|
|
183
|
+
|
|
184
|
+
# Create connection and indexer configurations
|
|
185
|
+
access_config = SharepointAccessConfig(client_cred=config.client_cred)
|
|
186
|
+
connection_config = SharepointConnectionConfig(
|
|
187
|
+
client_id=config.client_id,
|
|
188
|
+
site=site,
|
|
189
|
+
tenant=config.tenant,
|
|
190
|
+
user_pname=config.user_pname,
|
|
191
|
+
access_config=access_config,
|
|
192
|
+
)
|
|
193
|
+
index_config = SharepointIndexerConfig(recursive=True, path="Shared Documents")
|
|
194
|
+
|
|
195
|
+
download_config = SharepointDownloaderConfig(download_dir=temp_dir)
|
|
196
|
+
|
|
197
|
+
# Instantiate indexer and downloader
|
|
198
|
+
indexer = SharepointIndexer(
|
|
199
|
+
connection_config=connection_config,
|
|
200
|
+
index_config=index_config,
|
|
201
|
+
)
|
|
202
|
+
downloader = SharepointDownloader(
|
|
203
|
+
connection_config=connection_config,
|
|
204
|
+
download_config=download_config,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Run the source connector validation
|
|
208
|
+
await source_connector_validation(
|
|
209
|
+
indexer=indexer,
|
|
210
|
+
downloader=downloader,
|
|
211
|
+
configs=SourceValidationConfigs(
|
|
212
|
+
test_id="sharepoint4",
|
|
62
213
|
expected_num_files=4,
|
|
63
214
|
validate_downloaded_files=True,
|
|
64
215
|
exclude_fields_extend=[
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.5" # pragma: no cover
|
|
@@ -42,6 +42,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
42
42
|
DestinationRegistryEntry,
|
|
43
43
|
SourceRegistryEntry,
|
|
44
44
|
)
|
|
45
|
+
from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
|
|
45
46
|
|
|
46
47
|
if TYPE_CHECKING:
|
|
47
48
|
from astrapy import AsyncCollection as AstraDBAsyncCollection
|
|
@@ -318,6 +319,7 @@ class AstraDBUploadStager(UploadStager):
|
|
|
318
319
|
element_dict["metadata"]["text_as_html"] = truncate_string_bytes(
|
|
319
320
|
text_as_html, MAX_CONTENT_PARAM_BYTE_SIZE
|
|
320
321
|
)
|
|
322
|
+
metadata["original_elements"] = format_and_truncate_orig_elements(element_dict)
|
|
321
323
|
|
|
322
324
|
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
323
325
|
self.truncate_dict_elements(element_dict)
|
|
@@ -14,7 +14,6 @@ from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
|
|
|
14
14
|
|
|
15
15
|
from unstructured_ingest.error import DestinationConnectionError
|
|
16
16
|
from unstructured_ingest.logger import logger
|
|
17
|
-
from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
|
|
18
17
|
from unstructured_ingest.utils.data_prep import batch_generator
|
|
19
18
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
19
|
from unstructured_ingest.v2.interfaces import (
|
|
@@ -29,6 +28,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
29
28
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
30
29
|
DestinationRegistryEntry,
|
|
31
30
|
)
|
|
31
|
+
from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
|
|
32
32
|
|
|
33
33
|
SimilarityFunction = Literal["cosine"]
|
|
34
34
|
|
|
@@ -132,7 +132,7 @@ class Neo4jUploadStager(UploadStager):
|
|
|
132
132
|
if self._is_chunk(element):
|
|
133
133
|
origin_element_nodes = [
|
|
134
134
|
self._create_element_node(origin_element)
|
|
135
|
-
for origin_element in
|
|
135
|
+
for origin_element in format_and_truncate_orig_elements(element)
|
|
136
136
|
]
|
|
137
137
|
graph.add_edges_from(
|
|
138
138
|
[
|
|
@@ -166,7 +166,11 @@ class Neo4jUploadStager(UploadStager):
|
|
|
166
166
|
return _Node(id_=file_data.identifier, properties=properties, labels=[Label.DOCUMENT])
|
|
167
167
|
|
|
168
168
|
def _create_element_node(self, element: dict) -> _Node:
|
|
169
|
-
properties = {"id": element["element_id"]
|
|
169
|
+
properties = {"id": element["element_id"]}
|
|
170
|
+
|
|
171
|
+
if text := element.get("text"):
|
|
172
|
+
# if we have chunks, we won't have text here for the original elements
|
|
173
|
+
properties["text"] = text
|
|
170
174
|
|
|
171
175
|
if embeddings := element.get("embeddings"):
|
|
172
176
|
properties["embeddings"] = embeddings
|
|
@@ -174,10 +178,6 @@ class Neo4jUploadStager(UploadStager):
|
|
|
174
178
|
label = Label.CHUNK if self._is_chunk(element) else Label.UNSTRUCTURED_ELEMENT
|
|
175
179
|
return _Node(id_=element["element_id"], properties=properties, labels=[label])
|
|
176
180
|
|
|
177
|
-
def _get_origin_elements(self, chunk_element: dict) -> list[dict]:
|
|
178
|
-
orig_elements = chunk_element.get("metadata", {}).get("orig_elements")
|
|
179
|
-
return elements_from_base64_gzipped_json(raw_s=orig_elements)
|
|
180
|
-
|
|
181
181
|
|
|
182
182
|
class _GraphData(BaseModel):
|
|
183
183
|
nodes: list[_Node]
|
|
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
|
|
|
31
31
|
from office365.onedrive.driveitems.driveItem import DriveItem
|
|
32
32
|
|
|
33
33
|
CONNECTOR_TYPE = "sharepoint"
|
|
34
|
+
LEGACY_DEFAULT_PATH = "Shared Documents"
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class SharepointAccessConfig(OnedriveAccessConfig):
|
|
@@ -76,10 +77,14 @@ class SharepointIndexer(OnedriveIndexer):
|
|
|
76
77
|
except ClientRequestException:
|
|
77
78
|
logger.info("Site not found")
|
|
78
79
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
path = self.index_config.path
|
|
81
|
+
# Deprecated sharepoint sdk needed a default path. Microsoft Graph SDK does not.
|
|
82
|
+
if path and path != LEGACY_DEFAULT_PATH:
|
|
83
|
+
site_drive_item = site_drive_item.get_by_path(path).get().execute_query()
|
|
84
|
+
|
|
85
|
+
for drive_item in site_drive_item.get_files(
|
|
86
|
+
recursive=self.index_config.recursive
|
|
87
|
+
).execute_query():
|
|
83
88
|
file_data = await self.drive_item_to_file_data(drive_item=drive_item)
|
|
84
89
|
yield file_data
|
|
85
90
|
|
|
@@ -5,6 +5,8 @@ from typing import Any, Union
|
|
|
5
5
|
from dateutil import parser
|
|
6
6
|
from pydantic import ValidationError
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
|
|
10
12
|
if isinstance(date_value, datetime):
|
|
@@ -27,3 +29,29 @@ def conform_string_to_dict(value: Any) -> dict:
|
|
|
27
29
|
if isinstance(value, str):
|
|
28
30
|
return json.loads(value)
|
|
29
31
|
raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def format_and_truncate_orig_elements(element: dict) -> list[dict[str, Any]]:
|
|
35
|
+
"""
|
|
36
|
+
This function is used to format and truncate the orig_elements field in the metadata.
|
|
37
|
+
This is used to remove the text field and other larger fields from the orig_elements
|
|
38
|
+
that are not helpful in filtering/searching when used along with chunked elements.
|
|
39
|
+
"""
|
|
40
|
+
metadata = element.get("metadata", {})
|
|
41
|
+
raw_orig_elements = metadata.get("orig_elements", None)
|
|
42
|
+
orig_elements = []
|
|
43
|
+
if raw_orig_elements is not None:
|
|
44
|
+
for element in elements_from_base64_gzipped_json(raw_orig_elements):
|
|
45
|
+
element.pop("text", None)
|
|
46
|
+
for prop in (
|
|
47
|
+
"image_base64",
|
|
48
|
+
"text_as_html",
|
|
49
|
+
"table_as_cells",
|
|
50
|
+
"link_urls",
|
|
51
|
+
"link_texts",
|
|
52
|
+
"link_start_indexes",
|
|
53
|
+
"emphasized_text_contents",
|
|
54
|
+
):
|
|
55
|
+
element["metadata"].pop(prop, None)
|
|
56
|
+
orig_elements.append(element)
|
|
57
|
+
return orig_elements
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.5
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: click
|
|
25
|
+
Requires-Dist: tqdm
|
|
27
26
|
Requires-Dist: pydantic>=2.7
|
|
27
|
+
Requires-Dist: click
|
|
28
|
+
Requires-Dist: opentelemetry-sdk
|
|
28
29
|
Requires-Dist: python-dateutil
|
|
29
|
-
Requires-Dist: dataclasses_json
|
|
30
30
|
Requires-Dist: pandas
|
|
31
|
-
Requires-Dist:
|
|
31
|
+
Requires-Dist: dataclasses_json
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -71,8 +71,8 @@ Requires-Dist: adlfs; extra == "azure"
|
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
75
74
|
Requires-Dist: requests; extra == "biomed"
|
|
75
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
77
|
Requires-Dist: fsspec; extra == "box"
|
|
78
78
|
Requires-Dist: boxfs; extra == "box"
|
|
@@ -98,9 +98,9 @@ Requires-Dist: duckdb; extra == "duckdb"
|
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
101
102
|
Requires-Dist: fsspec; extra == "gcs"
|
|
102
103
|
Requires-Dist: bs4; extra == "gcs"
|
|
103
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
105
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
106
106
|
Requires-Dist: requests; extra == "github"
|
|
@@ -109,8 +109,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
109
109
|
Provides-Extra: google-drive
|
|
110
110
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
111
111
|
Provides-Extra: hubspot
|
|
112
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
113
112
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
113
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
114
114
|
Provides-Extra: jira
|
|
115
115
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
116
116
|
Provides-Extra: kafka
|
|
@@ -124,23 +124,23 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
124
124
|
Provides-Extra: mongodb
|
|
125
125
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
126
126
|
Provides-Extra: neo4j
|
|
127
|
-
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
128
127
|
Requires-Dist: cymple; extra == "neo4j"
|
|
128
|
+
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
129
|
Requires-Dist: networkx; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
131
132
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
132
|
-
Requires-Dist: httpx; extra == "notion"
|
|
133
133
|
Requires-Dist: backoff; extra == "notion"
|
|
134
|
-
Requires-Dist:
|
|
134
|
+
Requires-Dist: httpx; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
137
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
136
|
Requires-Dist: msal; extra == "onedrive"
|
|
137
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
142
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
143
142
|
Requires-Dist: msal; extra == "outlook"
|
|
143
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
144
144
|
Provides-Extra: pinecone
|
|
145
145
|
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
|
|
146
146
|
Provides-Extra: postgres
|
|
@@ -155,13 +155,13 @@ Provides-Extra: s3
|
|
|
155
155
|
Requires-Dist: fsspec; extra == "s3"
|
|
156
156
|
Requires-Dist: s3fs; extra == "s3"
|
|
157
157
|
Provides-Extra: sharepoint
|
|
158
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
159
158
|
Requires-Dist: msal; extra == "sharepoint"
|
|
159
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
160
160
|
Provides-Extra: salesforce
|
|
161
161
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
162
162
|
Provides-Extra: sftp
|
|
163
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
164
163
|
Requires-Dist: paramiko; extra == "sftp"
|
|
164
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
@@ -179,8 +179,8 @@ Provides-Extra: singlestore
|
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
181
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
182
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
183
182
|
Requires-Dist: requests; extra == "vectara"
|
|
183
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
185
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
186
186
|
Requires-Dist: ibis; extra == "vastdb"
|
|
@@ -21,7 +21,7 @@ test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_Yl
|
|
|
21
21
|
test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
|
|
22
22
|
test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
|
|
23
23
|
test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
|
|
24
|
-
test/integration/connectors/test_sharepoint.py,sha256=
|
|
24
|
+
test/integration/connectors/test_sharepoint.py,sha256=weGby5YD6se7R7KLEq96hxUZYPzwoqZqXXTPhtQWZsQ,7646
|
|
25
25
|
test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
|
|
26
26
|
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
|
|
@@ -107,7 +107,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
107
107
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
108
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
109
109
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
110
|
-
unstructured_ingest/__version__.py,sha256=
|
|
110
|
+
unstructured_ingest/__version__.py,sha256=hq-SHV5wD53er4rg53emQu0ZaUQDdyW7b_j9iCQj9es,42
|
|
111
111
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
112
112
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
113
113
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -424,7 +424,7 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=ZC9mt85I3o_SLR4DvE7vPBGph
|
|
|
424
424
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
425
425
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=KO1zn-96Qa49TOSZn-gv_RUMGMCmUcdtHoeJqCpxPLY,6219
|
|
426
426
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
427
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
427
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=v2M-xpI7NViikEaHCmuWUQU5XokDOOWbOFXYUXF63Ps,15002
|
|
428
428
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
429
429
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
430
430
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=_zkiST0FTggEKNORalCcZZIRGZKnCM0LLcavgQZfDVE,11112
|
|
@@ -437,15 +437,15 @@ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOG
|
|
|
437
437
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
|
|
438
438
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
439
439
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
440
|
-
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=
|
|
440
|
+
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=ijp5hjmDpLoIHL9UJzV4_4vVtQBlQ2R_vLatlUYivX4,17464
|
|
441
441
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
|
|
442
442
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=U5gSa8S08JvCwmAhE8aV0yxGTIFnUlKVsQDybE8Fqb8,10746
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
445
445
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
446
|
-
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=
|
|
446
|
+
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=2T9Bm1H_ALwHhG_YP7vsuUUW-mUg61zcaae3aa9BnN4,4827
|
|
447
447
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
448
|
-
unstructured_ingest/v2/processes/connectors/utils.py,sha256=
|
|
448
|
+
unstructured_ingest/v2/processes/connectors/utils.py,sha256=ru_4e5lo5t1jJhR8sGYa5nNhX3gKTgC5B7Oze9qQJjo,2000
|
|
449
449
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
450
450
|
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
451
451
|
unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
@@ -567,9 +567,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
567
567
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
568
568
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
569
569
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
|
|
570
|
-
unstructured_ingest-0.5.
|
|
571
|
-
unstructured_ingest-0.5.
|
|
572
|
-
unstructured_ingest-0.5.
|
|
573
|
-
unstructured_ingest-0.5.
|
|
574
|
-
unstructured_ingest-0.5.
|
|
575
|
-
unstructured_ingest-0.5.
|
|
570
|
+
unstructured_ingest-0.5.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
571
|
+
unstructured_ingest-0.5.5.dist-info/METADATA,sha256=6HKF0p1ZqHp-ZBAbflvLIHGWPk6i8QvmgA8ltgOPytI,8316
|
|
572
|
+
unstructured_ingest-0.5.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
573
|
+
unstructured_ingest-0.5.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
574
|
+
unstructured_ingest-0.5.5.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
575
|
+
unstructured_ingest-0.5.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.3.dist-info → unstructured_ingest-0.5.5.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|