unstructured-ingest 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -19,24 +19,31 @@ from unstructured_ingest.v2.processes.connectors.sharepoint import (
19
19
  )
20
20
 
21
21
 
22
+ def sharepoint_config():
23
+ class SharepointTestConfig:
24
+ def __init__(self):
25
+ self.client_id = os.environ["SHAREPOINT_CLIENT_ID"]
26
+ self.client_cred = os.environ["SHAREPOINT_CRED"]
27
+ self.user_pname = os.environ["MS_USER_PNAME"]
28
+ self.tenant = os.environ["MS_TENANT_ID"]
29
+
30
+ return SharepointTestConfig()
31
+
32
+
22
33
  @pytest.mark.asyncio
23
34
  @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
24
35
  @requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
25
36
  async def test_sharepoint_source(temp_dir):
26
- # Retrieve environment variables
27
37
  site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
28
- client_id = os.environ["SHAREPOINT_CLIENT_ID"]
29
- client_cred = os.environ["SHAREPOINT_CRED"]
30
- user_pname = os.environ["MS_USER_PNAME"]
31
- tenant = os.environ["MS_TENANT_ID"]
38
+ config = sharepoint_config()
32
39
 
33
40
  # Create connection and indexer configurations
34
- access_config = SharepointAccessConfig(client_cred=client_cred)
41
+ access_config = SharepointAccessConfig(client_cred=config.client_cred)
35
42
  connection_config = SharepointConnectionConfig(
36
- client_id=client_id,
43
+ client_id=config.client_id,
37
44
  site=site,
38
- tenant=tenant,
39
- user_pname=user_pname,
45
+ tenant=config.tenant,
46
+ user_pname=config.user_pname,
40
47
  access_config=access_config,
41
48
  )
42
49
  index_config = SharepointIndexerConfig(recursive=True)
@@ -58,7 +65,151 @@ async def test_sharepoint_source(temp_dir):
58
65
  indexer=indexer,
59
66
  downloader=downloader,
60
67
  configs=SourceValidationConfigs(
61
- test_id="sharepoint",
68
+ test_id="sharepoint1",
69
+ expected_num_files=4,
70
+ validate_downloaded_files=True,
71
+ exclude_fields_extend=[
72
+ "metadata.date_created",
73
+ "metadata.date_modified",
74
+ "additional_metadata.LastModified",
75
+ "additional_metadata.@microsoft.graph.downloadUrl",
76
+ ],
77
+ ),
78
+ )
79
+
80
+
81
+ @pytest.mark.asyncio
82
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
83
+ @requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
84
+ async def test_sharepoint_source_with_path(temp_dir):
85
+ site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
86
+ config = sharepoint_config()
87
+
88
+ # Create connection and indexer configurations
89
+ access_config = SharepointAccessConfig(client_cred=config.client_cred)
90
+ connection_config = SharepointConnectionConfig(
91
+ client_id=config.client_id,
92
+ site=site,
93
+ tenant=config.tenant,
94
+ user_pname=config.user_pname,
95
+ access_config=access_config,
96
+ )
97
+ index_config = SharepointIndexerConfig(recursive=True, path="Folder1")
98
+
99
+ download_config = SharepointDownloaderConfig(download_dir=temp_dir)
100
+
101
+ # Instantiate indexer and downloader
102
+ indexer = SharepointIndexer(
103
+ connection_config=connection_config,
104
+ index_config=index_config,
105
+ )
106
+ downloader = SharepointDownloader(
107
+ connection_config=connection_config,
108
+ download_config=download_config,
109
+ )
110
+
111
+ # Run the source connector validation
112
+ await source_connector_validation(
113
+ indexer=indexer,
114
+ downloader=downloader,
115
+ configs=SourceValidationConfigs(
116
+ test_id="sharepoint2",
117
+ expected_num_files=2,
118
+ validate_downloaded_files=True,
119
+ exclude_fields_extend=[
120
+ "metadata.date_created",
121
+ "metadata.date_modified",
122
+ "additional_metadata.LastModified",
123
+ "additional_metadata.@microsoft.graph.downloadUrl",
124
+ ],
125
+ ),
126
+ )
127
+
128
+
129
+ @pytest.mark.asyncio
130
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
131
+ @requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
132
+ async def test_sharepoint_root_with_path(temp_dir):
133
+ site = "https://unstructuredio.sharepoint.com/"
134
+ config = sharepoint_config()
135
+
136
+ # Create connection and indexer configurations
137
+ access_config = SharepointAccessConfig(client_cred=config.client_cred)
138
+ connection_config = SharepointConnectionConfig(
139
+ client_id=config.client_id,
140
+ site=site,
141
+ tenant=config.tenant,
142
+ user_pname=config.user_pname,
143
+ access_config=access_config,
144
+ )
145
+ index_config = SharepointIndexerConfig(recursive=True, path="e2e-test-folder")
146
+
147
+ download_config = SharepointDownloaderConfig(download_dir=temp_dir)
148
+
149
+ # Instantiate indexer and downloader
150
+ indexer = SharepointIndexer(
151
+ connection_config=connection_config,
152
+ index_config=index_config,
153
+ )
154
+ downloader = SharepointDownloader(
155
+ connection_config=connection_config,
156
+ download_config=download_config,
157
+ )
158
+
159
+ # Run the source connector validation
160
+ await source_connector_validation(
161
+ indexer=indexer,
162
+ downloader=downloader,
163
+ configs=SourceValidationConfigs(
164
+ test_id="sharepoint3",
165
+ expected_num_files=1,
166
+ validate_downloaded_files=True,
167
+ exclude_fields_extend=[
168
+ "metadata.date_created",
169
+ "metadata.date_modified",
170
+ "additional_metadata.LastModified",
171
+ "additional_metadata.@microsoft.graph.downloadUrl",
172
+ ],
173
+ ),
174
+ )
175
+
176
+
177
+ @pytest.mark.asyncio
178
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
179
+ @requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
180
+ async def test_sharepoint_shared_documents(temp_dir):
181
+ site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
182
+ config = sharepoint_config()
183
+
184
+ # Create connection and indexer configurations
185
+ access_config = SharepointAccessConfig(client_cred=config.client_cred)
186
+ connection_config = SharepointConnectionConfig(
187
+ client_id=config.client_id,
188
+ site=site,
189
+ tenant=config.tenant,
190
+ user_pname=config.user_pname,
191
+ access_config=access_config,
192
+ )
193
+ index_config = SharepointIndexerConfig(recursive=True, path="Shared Documents")
194
+
195
+ download_config = SharepointDownloaderConfig(download_dir=temp_dir)
196
+
197
+ # Instantiate indexer and downloader
198
+ indexer = SharepointIndexer(
199
+ connection_config=connection_config,
200
+ index_config=index_config,
201
+ )
202
+ downloader = SharepointDownloader(
203
+ connection_config=connection_config,
204
+ download_config=download_config,
205
+ )
206
+
207
+ # Run the source connector validation
208
+ await source_connector_validation(
209
+ indexer=indexer,
210
+ downloader=downloader,
211
+ configs=SourceValidationConfigs(
212
+ test_id="sharepoint4",
62
213
  expected_num_files=4,
63
214
  validate_downloaded_files=True,
64
215
  exclude_fields_extend=[
@@ -1 +1 @@
1
- __version__ = "0.5.3" # pragma: no cover
1
+ __version__ = "0.5.5" # pragma: no cover
@@ -42,6 +42,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
42
42
  DestinationRegistryEntry,
43
43
  SourceRegistryEntry,
44
44
  )
45
+ from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
45
46
 
46
47
  if TYPE_CHECKING:
47
48
  from astrapy import AsyncCollection as AstraDBAsyncCollection
@@ -318,6 +319,7 @@ class AstraDBUploadStager(UploadStager):
318
319
  element_dict["metadata"]["text_as_html"] = truncate_string_bytes(
319
320
  text_as_html, MAX_CONTENT_PARAM_BYTE_SIZE
320
321
  )
322
+ metadata["original_elements"] = format_and_truncate_orig_elements(element_dict)
321
323
 
322
324
  def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
323
325
  self.truncate_dict_elements(element_dict)
@@ -14,7 +14,6 @@ from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
14
14
 
15
15
  from unstructured_ingest.error import DestinationConnectionError
16
16
  from unstructured_ingest.logger import logger
17
- from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
18
17
  from unstructured_ingest.utils.data_prep import batch_generator
19
18
  from unstructured_ingest.utils.dep_check import requires_dependencies
20
19
  from unstructured_ingest.v2.interfaces import (
@@ -29,6 +28,7 @@ from unstructured_ingest.v2.interfaces import (
29
28
  from unstructured_ingest.v2.processes.connector_registry import (
30
29
  DestinationRegistryEntry,
31
30
  )
31
+ from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
32
32
 
33
33
  SimilarityFunction = Literal["cosine"]
34
34
 
@@ -132,7 +132,7 @@ class Neo4jUploadStager(UploadStager):
132
132
  if self._is_chunk(element):
133
133
  origin_element_nodes = [
134
134
  self._create_element_node(origin_element)
135
- for origin_element in self._get_origin_elements(element)
135
+ for origin_element in format_and_truncate_orig_elements(element)
136
136
  ]
137
137
  graph.add_edges_from(
138
138
  [
@@ -166,7 +166,11 @@ class Neo4jUploadStager(UploadStager):
166
166
  return _Node(id_=file_data.identifier, properties=properties, labels=[Label.DOCUMENT])
167
167
 
168
168
  def _create_element_node(self, element: dict) -> _Node:
169
- properties = {"id": element["element_id"], "text": element["text"]}
169
+ properties = {"id": element["element_id"]}
170
+
171
+ if text := element.get("text"):
172
+ # if we have chunks, we won't have text here for the original elements
173
+ properties["text"] = text
170
174
 
171
175
  if embeddings := element.get("embeddings"):
172
176
  properties["embeddings"] = embeddings
@@ -174,10 +178,6 @@ class Neo4jUploadStager(UploadStager):
174
178
  label = Label.CHUNK if self._is_chunk(element) else Label.UNSTRUCTURED_ELEMENT
175
179
  return _Node(id_=element["element_id"], properties=properties, labels=[label])
176
180
 
177
- def _get_origin_elements(self, chunk_element: dict) -> list[dict]:
178
- orig_elements = chunk_element.get("metadata", {}).get("orig_elements")
179
- return elements_from_base64_gzipped_json(raw_s=orig_elements)
180
-
181
181
 
182
182
  class _GraphData(BaseModel):
183
183
  nodes: list[_Node]
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
31
31
  from office365.onedrive.driveitems.driveItem import DriveItem
32
32
 
33
33
  CONNECTOR_TYPE = "sharepoint"
34
+ LEGACY_DEFAULT_PATH = "Shared Documents"
34
35
 
35
36
 
36
37
  class SharepointAccessConfig(OnedriveAccessConfig):
@@ -76,10 +77,14 @@ class SharepointIndexer(OnedriveIndexer):
76
77
  except ClientRequestException:
77
78
  logger.info("Site not found")
78
79
 
79
- drive_items = await self.list_objects(
80
- folder=site_drive_item, recursive=self.index_config.recursive
81
- )
82
- for drive_item in drive_items:
80
+ path = self.index_config.path
81
+ # Deprecated sharepoint sdk needed a default path. Microsoft Graph SDK does not.
82
+ if path and path != LEGACY_DEFAULT_PATH:
83
+ site_drive_item = site_drive_item.get_by_path(path).get().execute_query()
84
+
85
+ for drive_item in site_drive_item.get_files(
86
+ recursive=self.index_config.recursive
87
+ ).execute_query():
83
88
  file_data = await self.drive_item_to_file_data(drive_item=drive_item)
84
89
  yield file_data
85
90
 
@@ -5,6 +5,8 @@ from typing import Any, Union
5
5
  from dateutil import parser
6
6
  from pydantic import ValidationError
7
7
 
8
+ from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
9
+
8
10
 
9
11
  def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
10
12
  if isinstance(date_value, datetime):
@@ -27,3 +29,29 @@ def conform_string_to_dict(value: Any) -> dict:
27
29
  if isinstance(value, str):
28
30
  return json.loads(value)
29
31
  raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
32
+
33
+
34
+ def format_and_truncate_orig_elements(element: dict) -> list[dict[str, Any]]:
35
+ """
36
+ This function is used to format and truncate the orig_elements field in the metadata.
37
+ This is used to remove the text field and other larger fields from the orig_elements
38
+ that are not helpful in filtering/searching when used along with chunked elements.
39
+ """
40
+ metadata = element.get("metadata", {})
41
+ raw_orig_elements = metadata.get("orig_elements", None)
42
+ orig_elements = []
43
+ if raw_orig_elements is not None:
44
+ for element in elements_from_base64_gzipped_json(raw_orig_elements):
45
+ element.pop("text", None)
46
+ for prop in (
47
+ "image_base64",
48
+ "text_as_html",
49
+ "table_as_cells",
50
+ "link_urls",
51
+ "link_texts",
52
+ "link_start_indexes",
53
+ "emphasized_text_contents",
54
+ ):
55
+ element["metadata"].pop(prop, None)
56
+ orig_elements.append(element)
57
+ return orig_elements
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.3
3
+ Version: 0.5.5
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: opentelemetry-sdk
26
- Requires-Dist: click
25
+ Requires-Dist: tqdm
27
26
  Requires-Dist: pydantic>=2.7
27
+ Requires-Dist: click
28
+ Requires-Dist: opentelemetry-sdk
28
29
  Requires-Dist: python-dateutil
29
- Requires-Dist: dataclasses_json
30
30
  Requires-Dist: pandas
31
- Requires-Dist: tqdm
31
+ Requires-Dist: dataclasses_json
32
32
  Provides-Extra: remote
33
33
  Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
34
34
  Provides-Extra: csv
@@ -71,8 +71,8 @@ Requires-Dist: adlfs; extra == "azure"
71
71
  Provides-Extra: azure-ai-search
72
72
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
73
73
  Provides-Extra: biomed
74
- Requires-Dist: bs4; extra == "biomed"
75
74
  Requires-Dist: requests; extra == "biomed"
75
+ Requires-Dist: bs4; extra == "biomed"
76
76
  Provides-Extra: box
77
77
  Requires-Dist: fsspec; extra == "box"
78
78
  Requires-Dist: boxfs; extra == "box"
@@ -98,9 +98,9 @@ Requires-Dist: duckdb; extra == "duckdb"
98
98
  Provides-Extra: elasticsearch
99
99
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
100
100
  Provides-Extra: gcs
101
+ Requires-Dist: gcsfs; extra == "gcs"
101
102
  Requires-Dist: fsspec; extra == "gcs"
102
103
  Requires-Dist: bs4; extra == "gcs"
103
- Requires-Dist: gcsfs; extra == "gcs"
104
104
  Provides-Extra: github
105
105
  Requires-Dist: pygithub>1.58.0; extra == "github"
106
106
  Requires-Dist: requests; extra == "github"
@@ -109,8 +109,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
109
109
  Provides-Extra: google-drive
110
110
  Requires-Dist: google-api-python-client; extra == "google-drive"
111
111
  Provides-Extra: hubspot
112
- Requires-Dist: hubspot-api-client; extra == "hubspot"
113
112
  Requires-Dist: urllib3; extra == "hubspot"
113
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
114
114
  Provides-Extra: jira
115
115
  Requires-Dist: atlassian-python-api; extra == "jira"
116
116
  Provides-Extra: kafka
@@ -124,23 +124,23 @@ Requires-Dist: pymilvus; extra == "milvus"
124
124
  Provides-Extra: mongodb
125
125
  Requires-Dist: pymongo; extra == "mongodb"
126
126
  Provides-Extra: neo4j
127
- Requires-Dist: neo4j-rust-ext; extra == "neo4j"
128
127
  Requires-Dist: cymple; extra == "neo4j"
128
+ Requires-Dist: neo4j-rust-ext; extra == "neo4j"
129
129
  Requires-Dist: networkx; extra == "neo4j"
130
130
  Provides-Extra: notion
131
+ Requires-Dist: notion-client; extra == "notion"
131
132
  Requires-Dist: htmlBuilder; extra == "notion"
132
- Requires-Dist: httpx; extra == "notion"
133
133
  Requires-Dist: backoff; extra == "notion"
134
- Requires-Dist: notion-client; extra == "notion"
134
+ Requires-Dist: httpx; extra == "notion"
135
135
  Provides-Extra: onedrive
136
- Requires-Dist: bs4; extra == "onedrive"
137
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
138
136
  Requires-Dist: msal; extra == "onedrive"
137
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
138
+ Requires-Dist: bs4; extra == "onedrive"
139
139
  Provides-Extra: opensearch
140
140
  Requires-Dist: opensearch-py; extra == "opensearch"
141
141
  Provides-Extra: outlook
142
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
143
142
  Requires-Dist: msal; extra == "outlook"
143
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
144
144
  Provides-Extra: pinecone
145
145
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
146
146
  Provides-Extra: postgres
@@ -155,13 +155,13 @@ Provides-Extra: s3
155
155
  Requires-Dist: fsspec; extra == "s3"
156
156
  Requires-Dist: s3fs; extra == "s3"
157
157
  Provides-Extra: sharepoint
158
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
159
158
  Requires-Dist: msal; extra == "sharepoint"
159
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
160
160
  Provides-Extra: salesforce
161
161
  Requires-Dist: simple-salesforce; extra == "salesforce"
162
162
  Provides-Extra: sftp
163
- Requires-Dist: fsspec; extra == "sftp"
164
163
  Requires-Dist: paramiko; extra == "sftp"
164
+ Requires-Dist: fsspec; extra == "sftp"
165
165
  Provides-Extra: slack
166
166
  Requires-Dist: slack_sdk[optional]; extra == "slack"
167
167
  Provides-Extra: snowflake
@@ -179,8 +179,8 @@ Provides-Extra: singlestore
179
179
  Requires-Dist: singlestoredb; extra == "singlestore"
180
180
  Provides-Extra: vectara
181
181
  Requires-Dist: aiofiles; extra == "vectara"
182
- Requires-Dist: httpx; extra == "vectara"
183
182
  Requires-Dist: requests; extra == "vectara"
183
+ Requires-Dist: httpx; extra == "vectara"
184
184
  Provides-Extra: vastdb
185
185
  Requires-Dist: pyarrow; extra == "vastdb"
186
186
  Requires-Dist: ibis; extra == "vastdb"
@@ -21,7 +21,7 @@ test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_Yl
21
21
  test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
22
22
  test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
23
23
  test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
24
- test/integration/connectors/test_sharepoint.py,sha256=8HlcnrP4K8oPUzef6AA11P2cMlxSp7tiddTkT4JOeRU,2378
24
+ test/integration/connectors/test_sharepoint.py,sha256=weGby5YD6se7R7KLEq96hxUZYPzwoqZqXXTPhtQWZsQ,7646
25
25
  test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
26
26
  test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
@@ -107,7 +107,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
107
107
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
109
109
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
110
- unstructured_ingest/__version__.py,sha256=qWgfAaALtcimLu1dKMS-KpBB0Tnw60hH7XF06t-OzjI,42
110
+ unstructured_ingest/__version__.py,sha256=hq-SHV5wD53er4rg53emQu0ZaUQDdyW7b_j9iCQj9es,42
111
111
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
112
112
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
113
113
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -424,7 +424,7 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=ZC9mt85I3o_SLR4DvE7vPBGph
424
424
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
425
425
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=KO1zn-96Qa49TOSZn-gv_RUMGMCmUcdtHoeJqCpxPLY,6219
426
426
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
427
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
427
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=v2M-xpI7NViikEaHCmuWUQU5XokDOOWbOFXYUXF63Ps,15002
428
428
  unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
429
429
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
430
430
  unstructured_ingest/v2/processes/connectors/confluence.py,sha256=_zkiST0FTggEKNORalCcZZIRGZKnCM0LLcavgQZfDVE,11112
@@ -437,15 +437,15 @@ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOG
437
437
  unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
438
438
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
439
439
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
440
- unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=sjwQWp6gPP_MR8vh4aaMJUzPmkGT_3FODTlB5-7tVh0,17525
440
+ unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=ijp5hjmDpLoIHL9UJzV4_4vVtQBlQ2R_vLatlUYivX4,17464
441
441
  unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
442
442
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
443
443
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=U5gSa8S08JvCwmAhE8aV0yxGTIFnUlKVsQDybE8Fqb8,10746
444
444
  unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
445
445
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
446
- unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=f0F7KioXgucVc3tVASTa67ynlTa4s9_FKGPHop6Xm0A,4563
446
+ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=2T9Bm1H_ALwHhG_YP7vsuUUW-mUg61zcaae3aa9BnN4,4827
447
447
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
448
- unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
448
+ unstructured_ingest/v2/processes/connectors/utils.py,sha256=ru_4e5lo5t1jJhR8sGYa5nNhX3gKTgC5B7Oze9qQJjo,2000
449
449
  unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
450
450
  unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
451
451
  unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
@@ -567,9 +567,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
567
567
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
568
568
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
569
569
  unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
570
- unstructured_ingest-0.5.3.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
571
- unstructured_ingest-0.5.3.dist-info/METADATA,sha256=Ypb6HJHp4T_Y7kfL2hmU03MfYFJYe8LptQZ4JzzRFEY,8316
572
- unstructured_ingest-0.5.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
573
- unstructured_ingest-0.5.3.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
574
- unstructured_ingest-0.5.3.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
575
- unstructured_ingest-0.5.3.dist-info/RECORD,,
570
+ unstructured_ingest-0.5.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
571
+ unstructured_ingest-0.5.5.dist-info/METADATA,sha256=6HKF0p1ZqHp-ZBAbflvLIHGWPk6i8QvmgA8ltgOPytI,8316
572
+ unstructured_ingest-0.5.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
573
+ unstructured_ingest-0.5.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
574
+ unstructured_ingest-0.5.5.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
575
+ unstructured_ingest-0.5.5.dist-info/RECORD,,