unstructured-ingest 0.4.7__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -5,13 +5,25 @@ from pathlib import Path
5
5
  import pytest
6
6
  from office365.graph_client import GraphClient
7
7
 
8
- from test.integration.connectors.utils.constants import BLOB_STORAGE_TAG, DESTINATION_TAG
8
+ from test.integration.connectors.utils.constants import (
9
+ BLOB_STORAGE_TAG,
10
+ DESTINATION_TAG,
11
+ SOURCE_TAG,
12
+ )
13
+ from test.integration.connectors.utils.validation.source import (
14
+ SourceValidationConfigs,
15
+ source_connector_validation,
16
+ )
9
17
  from test.integration.utils import requires_env
10
18
  from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
11
19
  from unstructured_ingest.v2.processes.connectors.onedrive import (
12
20
  CONNECTOR_TYPE,
13
21
  OnedriveAccessConfig,
14
22
  OnedriveConnectionConfig,
23
+ OnedriveDownloader,
24
+ OnedriveDownloaderConfig,
25
+ OnedriveIndexer,
26
+ OnedriveIndexerConfig,
15
27
  OnedriveUploader,
16
28
  OnedriveUploaderConfig,
17
29
  )
@@ -62,9 +74,46 @@ def get_connection_config():
62
74
  return connection_config
63
75
 
64
76
 
77
+ @pytest.mark.asyncio
78
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
79
+ @requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
80
+ async def test_onedrive_source(temp_dir):
81
+ connection_config = get_connection_config()
82
+ index_config = OnedriveIndexerConfig(recursive=True, path="eml")
83
+
84
+ download_config = OnedriveDownloaderConfig(download_dir=temp_dir)
85
+
86
+ # Instantiate indexer and downloader
87
+ indexer = OnedriveIndexer(
88
+ connection_config=connection_config,
89
+ index_config=index_config,
90
+ )
91
+ downloader = OnedriveDownloader(
92
+ connection_config=connection_config,
93
+ download_config=download_config,
94
+ )
95
+
96
+ # Run the source connector validation
97
+ await source_connector_validation(
98
+ indexer=indexer,
99
+ downloader=downloader,
100
+ configs=SourceValidationConfigs(
101
+ test_id="onedrive",
102
+ expected_num_files=1,
103
+ validate_downloaded_files=True,
104
+ exclude_fields_extend=[
105
+ "metadata.date_created",
106
+ "metadata.date_modified",
107
+ "additional_metadata.LastModified",
108
+ "additional_metadata.@microsoft.graph.downloadUrl",
109
+ ],
110
+ ),
111
+ )
112
+
113
+
65
114
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
66
115
  @requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
67
- def test_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
116
+ def xtest_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
68
117
  """
69
118
  Integration test for the OneDrive destination connector.
70
119
 
@@ -0,0 +1,71 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from test.integration.connectors.utils.constants import BLOB_STORAGE_TAG, SOURCE_TAG
6
+ from test.integration.connectors.utils.validation.source import (
7
+ SourceValidationConfigs,
8
+ source_connector_validation,
9
+ )
10
+ from test.integration.utils import requires_env
11
+ from unstructured_ingest.v2.processes.connectors.sharepoint import (
12
+ CONNECTOR_TYPE,
13
+ SharepointAccessConfig,
14
+ SharepointConnectionConfig,
15
+ SharepointDownloader,
16
+ SharepointDownloaderConfig,
17
+ SharepointIndexer,
18
+ SharepointIndexerConfig,
19
+ )
20
+
21
+
22
+ @pytest.mark.asyncio
23
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
24
+ @requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
25
+ async def test_sharepoint_source(temp_dir):
26
+ # Retrieve environment variables
27
+ site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
28
+ client_id = os.environ["SHAREPOINT_CLIENT_ID"]
29
+ client_cred = os.environ["SHAREPOINT_CRED"]
30
+ user_pname = os.environ["MS_USER_PNAME"]
31
+ tenant = os.environ["MS_TENANT_ID"]
32
+
33
+ # Create connection and indexer configurations
34
+ access_config = SharepointAccessConfig(client_cred=client_cred)
35
+ connection_config = SharepointConnectionConfig(
36
+ client_id=client_id,
37
+ site=site,
38
+ tenant=tenant,
39
+ user_pname=user_pname,
40
+ access_config=access_config,
41
+ )
42
+ index_config = SharepointIndexerConfig(recursive=True)
43
+
44
+ download_config = SharepointDownloaderConfig(download_dir=temp_dir)
45
+
46
+ # Instantiate indexer and downloader
47
+ indexer = SharepointIndexer(
48
+ connection_config=connection_config,
49
+ index_config=index_config,
50
+ )
51
+ downloader = SharepointDownloader(
52
+ connection_config=connection_config,
53
+ download_config=download_config,
54
+ )
55
+
56
+ # Run the source connector validation
57
+ await source_connector_validation(
58
+ indexer=indexer,
59
+ downloader=downloader,
60
+ configs=SourceValidationConfigs(
61
+ test_id="sharepoint",
62
+ expected_num_files=4,
63
+ validate_downloaded_files=True,
64
+ exclude_fields_extend=[
65
+ "metadata.date_created",
66
+ "metadata.date_modified",
67
+ "additional_metadata.LastModified",
68
+ "additional_metadata.@microsoft.graph.downloadUrl",
69
+ ],
70
+ ),
71
+ )
@@ -10,6 +10,13 @@ from pydantic import Field
10
10
  from test.integration.connectors.utils.validation.utils import ValidationConfig
11
11
  from unstructured_ingest.v2.interfaces import Downloader, FileData, Indexer
12
12
 
13
+ NONSTANDARD_METADATA_FIELDS = {
14
+ "additional_metadata.@microsoft.graph.downloadUrl": [
15
+ "additional_metadata",
16
+ "@microsoft.graph.downloadUrl",
17
+ ]
18
+ }
19
+
13
20
 
14
21
  class SourceValidationConfigs(ValidationConfig):
15
22
  expected_number_indexed_file_data: Optional[int] = None
@@ -26,7 +33,7 @@ class SourceValidationConfigs(ValidationConfig):
26
33
  def get_exclude_fields(self) -> list[str]:
27
34
  exclude_fields = self.exclude_fields
28
35
  exclude_fields.extend(self.exclude_fields_extend)
29
- return exclude_fields
36
+ return list(set(exclude_fields))
30
37
 
31
38
  def run_file_data_validation(
32
39
  self, predownload_file_data: FileData, postdownload_file_data: FileData
@@ -45,8 +52,13 @@ class SourceValidationConfigs(ValidationConfig):
45
52
  exclude_fields = self.get_exclude_fields()
46
53
  # Ignore fields that dynamically change every time the tests run
47
54
  copied_data = data.copy()
55
+
48
56
  for exclude_field in exclude_fields:
49
- exclude_field_vals = exclude_field.split(".")
57
+ exclude_field_vals = (
58
+ NONSTANDARD_METADATA_FIELDS[exclude_field]
59
+ if exclude_field in NONSTANDARD_METADATA_FIELDS
60
+ else exclude_field.split(".")
61
+ )
50
62
  if len(exclude_field_vals) == 1:
51
63
  current_val = copied_data
52
64
  drop_field = exclude_field_vals[0]
@@ -261,21 +273,38 @@ async def source_connector_validation(
261
273
  indexer.precheck()
262
274
  download_dir = downloader.download_config.download_dir
263
275
  test_output_dir = configs.test_output_dir()
264
- for file_data in indexer.run():
265
- assert file_data
266
- predownload_file_data = file_data.model_copy(deep=True)
267
- all_predownload_file_data.append(predownload_file_data)
268
- if downloader.is_async():
269
- resp = await downloader.run_async(file_data=file_data)
270
- else:
271
- resp = downloader.run(file_data=file_data)
272
- if isinstance(resp, list):
273
- for r in resp:
274
- postdownload_file_data = r["file_data"].model_copy(deep=True)
276
+ if indexer.is_async():
277
+ async for file_data in indexer.run_async():
278
+ assert file_data
279
+ predownload_file_data = file_data.model_copy(deep=True)
280
+ all_predownload_file_data.append(predownload_file_data)
281
+ if downloader.is_async():
282
+ resp = await downloader.run_async(file_data=file_data)
283
+ else:
284
+ resp = downloader.run(file_data=file_data)
285
+ if isinstance(resp, list):
286
+ for r in resp:
287
+ postdownload_file_data = r["file_data"].model_copy(deep=True)
288
+ all_postdownload_file_data.append(postdownload_file_data)
289
+ else:
290
+ postdownload_file_data = resp["file_data"].model_copy(deep=True)
291
+ all_postdownload_file_data.append(postdownload_file_data)
292
+ else:
293
+ for file_data in indexer.run():
294
+ assert file_data
295
+ predownload_file_data = file_data.model_copy(deep=True)
296
+ all_predownload_file_data.append(predownload_file_data)
297
+ if downloader.is_async():
298
+ resp = await downloader.run_async(file_data=file_data)
299
+ else:
300
+ resp = downloader.run(file_data=file_data)
301
+ if isinstance(resp, list):
302
+ for r in resp:
303
+ postdownload_file_data = r["file_data"].model_copy(deep=True)
304
+ all_postdownload_file_data.append(postdownload_file_data)
305
+ else:
306
+ postdownload_file_data = resp["file_data"].model_copy(deep=True)
275
307
  all_postdownload_file_data.append(postdownload_file_data)
276
- else:
277
- postdownload_file_data = resp["file_data"].model_copy(deep=True)
278
- all_postdownload_file_data.append(postdownload_file_data)
279
308
  if not overwrite_fixtures:
280
309
  print("Running validation")
281
310
  run_all_validations(
@@ -31,7 +31,7 @@ def get_aws_credentials() -> dict:
31
31
  def test_bedrock_embedder(embedder_file: Path):
32
32
  aws_credentials = get_aws_credentials()
33
33
  embedder_config = EmbedderConfig(
34
- embedding_provider="aws-bedrock",
34
+ embedding_provider="bedrock",
35
35
  embedding_aws_access_key_id=aws_credentials["aws_access_key_id"],
36
36
  embedding_aws_secret_access_key=aws_credentials["aws_secret_access_key"],
37
37
  )
@@ -1,4 +1,3 @@
1
- import json
2
1
  import os
3
2
  from pathlib import Path
4
3
 
@@ -15,6 +14,9 @@ all_partition_files = [path for path in assets_dir.iterdir() if path.is_file()]
15
14
  non_image_partition_files = [
16
15
  path for path in all_partition_files if path.suffix not in [".jpg", ".png", ".tif"]
17
16
  ]
17
+ supported_fast_partition_files = [
18
+ path for path in non_image_partition_files if path.suffix != ".eml"
19
+ ]
18
20
  image_partition_files = [
19
21
  path for path in all_partition_files if path not in non_image_partition_files
20
22
  ]
@@ -33,18 +35,13 @@ async def test_partitioner_api_hi_res(partition_file: Path):
33
35
  )
34
36
  partitioner = Partitioner(config=partitioner_config)
35
37
  results = await partitioner.run_async(filename=partition_file)
36
- results_dir = int_test_dir / "results"
37
- results_dir.mkdir(exist_ok=True)
38
- results_path = results_dir / f"{partition_file.name}.json"
39
- with results_path.open("w") as f:
40
- json.dump(results, f, indent=2)
41
38
  assert results
42
39
 
43
40
 
44
41
  @pytest.mark.parametrize(
45
42
  "partition_file",
46
- non_image_partition_files,
47
- ids=[path.name for path in non_image_partition_files],
43
+ supported_fast_partition_files,
44
+ ids=[path.name for path in supported_fast_partition_files],
48
45
  )
49
46
  @requires_env("UNSTRUCTURED_API_KEY", "UNSTRUCTURED_API_URL")
50
47
  @pytest.mark.asyncio
@@ -68,7 +65,11 @@ async def test_partitioner_api_fast_error(partition_file: Path):
68
65
  api_key = os.getenv("UNSTRUCTURED_API_KEY")
69
66
  api_url = os.getenv("UNSTRUCTURED_API_URL")
70
67
  partitioner_config = PartitionerConfig(
71
- strategy="fast", partition_by_api=True, api_key=api_key, partition_endpoint=api_url
68
+ strategy="fast",
69
+ partition_by_api=True,
70
+ api_key=api_key,
71
+ partition_endpoint=api_url,
72
+ raise_unsupported_filetype=True,
72
73
  )
73
74
  partitioner = Partitioner(config=partitioner_config)
74
75
  with pytest.raises(UserError):
@@ -1 +1 @@
1
- __version__ = "0.4.7" # pragma: no cover
1
+ __version__ = "0.5.0" # pragma: no cover
@@ -417,7 +417,7 @@ class CliEmbeddingConfig(EmbeddingConfig, CliMixin):
417
417
  embed_providers = [
418
418
  "openai",
419
419
  "huggingface",
420
- "aws-bedrock",
420
+ "bedrock",
421
421
  "vertexai",
422
422
  "voyageai",
423
423
  "octoai",
@@ -226,7 +226,7 @@ class EmbeddingConfig(BaseConfig):
226
226
  )
227
227
 
228
228
  return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs))
229
- elif self.provider == "aws-bedrock":
229
+ elif self.provider == "bedrock":
230
230
  from unstructured_ingest.embed.bedrock import (
231
231
  BedrockEmbeddingConfig,
232
232
  BedrockEmbeddingEncoder,
@@ -268,6 +268,7 @@ class Pipeline:
268
268
 
269
269
  # Partition content
270
270
  elements = self.partitioner_step(downloaded_data)
271
+ elements = self.clean_results(results=elements)
271
272
  # Download data non longer needed, delete if possible
272
273
  self.downloader_step.delete_cache()
273
274
  elements = self.clean_results(results=elements)
@@ -105,6 +105,7 @@ class OnedriveIndexerConfig(IndexerConfig):
105
105
  class OnedriveIndexer(Indexer):
106
106
  connection_config: OnedriveConnectionConfig
107
107
  index_config: OnedriveIndexerConfig
108
+ connector_type: str = CONNECTOR_TYPE
108
109
 
109
110
  def precheck(self) -> None:
110
111
  try:
@@ -172,7 +173,7 @@ class OnedriveIndexer(Indexer):
172
173
  )
173
174
  return FileData(
174
175
  identifier=drive_item.id,
175
- connector_type=CONNECTOR_TYPE,
176
+ connector_type=self.connector_type,
176
177
  source_identifiers=SourceIdentifiers(
177
178
  fullpath=server_path, filename=drive_item.name, rel_path=rel_path
178
179
  ),
@@ -201,7 +202,8 @@ class OnedriveIndexer(Indexer):
201
202
  token_resp = await asyncio.to_thread(self.connection_config.get_token)
202
203
  if "error" in token_resp:
203
204
  raise SourceConnectionError(
204
- f"[{CONNECTOR_TYPE}]: {token_resp['error']} ({token_resp.get('error_description')})"
205
+ f"[{self.connector_type}]: {token_resp['error']} "
206
+ f"({token_resp.get('error_description')})"
205
207
  )
206
208
 
207
209
  client = await asyncio.to_thread(self.connection_config.get_client)
@@ -221,6 +223,7 @@ class OnedriveDownloaderConfig(DownloaderConfig):
221
223
  class OnedriveDownloader(Downloader):
222
224
  connection_config: OnedriveConnectionConfig
223
225
  download_config: OnedriveDownloaderConfig
226
+ connector_type: str = CONNECTOR_TYPE
224
227
 
225
228
  @SourceConnectionNetworkError.wrap
226
229
  def _fetch_file(self, file_data: FileData) -> DriveItem:
@@ -260,7 +263,9 @@ class OnedriveDownloader(Downloader):
260
263
  file.download_session(f).execute_query()
261
264
  return self.generate_download_response(file_data=file_data, download_path=download_path)
262
265
  except Exception as e:
263
- logger.error(f"[{CONNECTOR_TYPE}] Exception during downloading: {e}", exc_info=True)
266
+ logger.error(
267
+ f"[{self.connector_type}] Exception during downloading: {e}", exc_info=True
268
+ )
264
269
  # Re-raise to see full stack trace locally
265
270
  raise
266
271
 
@@ -56,6 +56,7 @@ class SharepointIndexerConfig(OnedriveIndexerConfig):
56
56
  class SharepointIndexer(OnedriveIndexer):
57
57
  connection_config: SharepointConnectionConfig
58
58
  index_config: SharepointIndexerConfig
59
+ connector_type: str = CONNECTOR_TYPE
59
60
 
60
61
  @requires_dependencies(["office365"], extras="sharepoint")
61
62
  async def run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
@@ -64,7 +65,8 @@ class SharepointIndexer(OnedriveIndexer):
64
65
  token_resp = await asyncio.to_thread(self.connection_config.get_token)
65
66
  if "error" in token_resp:
66
67
  raise SourceConnectionError(
67
- f"[{CONNECTOR_TYPE}]: {token_resp['error']} ({token_resp.get('error_description')})"
68
+ f"[{self.connector_type}]: {token_resp['error']} "
69
+ f"({token_resp.get('error_description')})"
68
70
  )
69
71
 
70
72
  client = await asyncio.to_thread(self.connection_config.get_client)
@@ -90,6 +92,7 @@ class SharepointDownloaderConfig(OnedriveDownloaderConfig):
90
92
  class SharepointDownloader(OnedriveDownloader):
91
93
  connection_config: SharepointConnectionConfig
92
94
  download_config: SharepointDownloaderConfig
95
+ connector_type: str = CONNECTOR_TYPE
93
96
 
94
97
  @SourceConnectionNetworkError.wrap
95
98
  @requires_dependencies(["office365"], extras="onedrive")
@@ -18,7 +18,7 @@ class EmbedderConfig(BaseModel):
18
18
  "openai",
19
19
  "azure-openai",
20
20
  "huggingface",
21
- "aws-bedrock",
21
+ "bedrock",
22
22
  "vertexai",
23
23
  "voyageai",
24
24
  "octoai",
@@ -162,7 +162,7 @@ class EmbedderConfig(BaseModel):
162
162
  if self.embedding_provider == "octoai":
163
163
  return self.get_octoai_embedder(embedding_kwargs=kwargs)
164
164
 
165
- if self.embedding_provider == "aws-bedrock":
165
+ if self.embedding_provider == "bedrock":
166
166
  return self.get_bedrock_embedder()
167
167
 
168
168
  if self.embedding_provider == "vertexai":
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from abc import ABC
2
3
  from dataclasses import dataclass
3
4
  from pathlib import Path
@@ -7,6 +8,7 @@ from pydantic import BaseModel, Field, SecretStr
7
8
 
8
9
  from unstructured_ingest.utils.data_prep import flatten_dict
9
10
  from unstructured_ingest.utils.dep_check import requires_dependencies
11
+ from unstructured_ingest.v2.errors import UserError
10
12
  from unstructured_ingest.v2.interfaces.process import BaseProcess
11
13
  from unstructured_ingest.v2.logger import logger
12
14
  from unstructured_ingest.v2.unstructured_api import call_api_async
@@ -73,6 +75,9 @@ class PartitionerConfig(BaseModel):
73
75
  hi_res_model_name: Optional[str] = Field(
74
76
  default=None, description="Model name for hi-res strategy."
75
77
  )
78
+ raise_unsupported_filetype: bool = Field(
79
+ default=False, description="Raise an error if the file type is not supported"
80
+ )
76
81
 
77
82
  def model_post_init(self, __context: Any) -> None:
78
83
  if self.metadata_exclude and self.metadata_include:
@@ -151,13 +156,25 @@ class Partitioner(BaseProcess, ABC):
151
156
  class FileDataSourceMetadata(DataSourceMetadata):
152
157
  filesize_bytes: Optional[int] = None
153
158
 
159
+ metadata = metadata or {}
154
160
  logger.debug(f"using local partition with kwargs: {self.config.to_partition_kwargs()}")
155
161
  logger.debug(f"partitioning file {filename} with metadata {metadata}")
156
- elements = partition(
157
- filename=str(filename.resolve()),
158
- data_source_metadata=FileDataSourceMetadata.from_dict(metadata),
159
- **self.config.to_partition_kwargs(),
160
- )
162
+ try:
163
+ elements = partition(
164
+ filename=str(filename.resolve()),
165
+ data_source_metadata=FileDataSourceMetadata.from_dict(metadata),
166
+ **self.config.to_partition_kwargs(),
167
+ )
168
+ except ValueError as sdk_error:
169
+ if (
170
+ self.is_unstructured_error_unsupported_filetype(sdk_error=sdk_error)
171
+ and not self.config.raise_unsupported_filetype
172
+ ):
173
+ logger.warning(
174
+ f"Unsupported file type for strategy {self.config.strategy}: {filename}"
175
+ )
176
+ return []
177
+ raise sdk_error
161
178
  return self.postprocess(elements=elements_to_dicts(elements))
162
179
 
163
180
  @requires_dependencies(dependencies=["unstructured_client"], extras="remote")
@@ -179,10 +196,37 @@ class Partitioner(BaseProcess, ABC):
179
196
  element["metadata"]["data_source"] = metadata
180
197
  return self.postprocess(elements=elements)
181
198
 
199
+ def is_unstructured_error_unsupported_filetype(self, sdk_error: ValueError) -> bool:
200
+ error_msg = sdk_error.args[0]
201
+ return (
202
+ "Invalid file" in error_msg
203
+ or "Unstructured schema" in error_msg
204
+ or "fast strategy is not available for image files" in error_msg
205
+ )
206
+
207
+ def is_client_error_unsupported_filetype(self, error: UserError) -> bool:
208
+ error_msg = error.args[0]
209
+ error_dict = json.loads(error_msg)
210
+ details = error_dict["detail"]
211
+ return "fast strategy is not available for image files" in details or (
212
+ "file type" in details.lower() and "is not supported" in details.lower()
213
+ )
214
+
182
215
  def run(self, filename: Path, metadata: Optional[dict] = None, **kwargs) -> list[dict]:
183
216
  return self.partition_locally(filename, metadata=metadata, **kwargs)
184
217
 
185
218
  async def run_async(
186
219
  self, filename: Path, metadata: Optional[dict] = None, **kwargs
187
220
  ) -> list[dict]:
188
- return await self.partition_via_api(filename, metadata=metadata, **kwargs)
221
+ try:
222
+ return await self.partition_via_api(filename, metadata=metadata, **kwargs)
223
+ except UserError as user_error:
224
+ if (
225
+ self.is_client_error_unsupported_filetype(error=user_error)
226
+ and not self.config.raise_unsupported_filetype
227
+ ):
228
+ logger.warning(
229
+ f"Unsupported file type for strategy {self.config.strategy}: {filename}"
230
+ )
231
+ return []
232
+ raise user_error
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.4.7
3
+ Version: 0.5.0
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -23,30 +23,30 @@ Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: pandas
26
- Requires-Dist: pydantic>=2.7
27
26
  Requires-Dist: dataclasses-json
27
+ Requires-Dist: pydantic>=2.7
28
+ Requires-Dist: click
29
+ Requires-Dist: tqdm
28
30
  Requires-Dist: python-dateutil
29
31
  Requires-Dist: opentelemetry-sdk
30
- Requires-Dist: tqdm
31
- Requires-Dist: click
32
32
  Provides-Extra: airtable
33
33
  Requires-Dist: pyairtable; extra == "airtable"
34
34
  Provides-Extra: astradb
35
35
  Requires-Dist: astrapy; extra == "astradb"
36
36
  Provides-Extra: azure
37
- Requires-Dist: fsspec; extra == "azure"
38
37
  Requires-Dist: adlfs; extra == "azure"
38
+ Requires-Dist: fsspec; extra == "azure"
39
39
  Provides-Extra: azure-ai-search
40
40
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
41
41
  Provides-Extra: bedrock
42
- Requires-Dist: boto3; extra == "bedrock"
43
42
  Requires-Dist: aioboto3; extra == "bedrock"
43
+ Requires-Dist: boto3; extra == "bedrock"
44
44
  Provides-Extra: biomed
45
45
  Requires-Dist: requests; extra == "biomed"
46
46
  Requires-Dist: bs4; extra == "biomed"
47
47
  Provides-Extra: box
48
- Requires-Dist: fsspec; extra == "box"
49
48
  Requires-Dist: boxfs; extra == "box"
49
+ Requires-Dist: fsspec; extra == "box"
50
50
  Provides-Extra: chroma
51
51
  Requires-Dist: chromadb; extra == "chroma"
52
52
  Provides-Extra: clarifai
@@ -63,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
63
63
  Provides-Extra: databricks-volumes
64
64
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
65
65
  Provides-Extra: delta-table
66
- Requires-Dist: boto3; extra == "delta-table"
67
66
  Requires-Dist: deltalake; extra == "delta-table"
67
+ Requires-Dist: boto3; extra == "delta-table"
68
68
  Provides-Extra: discord
69
69
  Requires-Dist: discord.py; extra == "discord"
70
70
  Provides-Extra: doc
@@ -92,19 +92,19 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
92
92
  Provides-Extra: epub
93
93
  Requires-Dist: unstructured[epub]; extra == "epub"
94
94
  Provides-Extra: gcs
95
- Requires-Dist: fsspec; extra == "gcs"
96
- Requires-Dist: bs4; extra == "gcs"
97
95
  Requires-Dist: gcsfs; extra == "gcs"
96
+ Requires-Dist: bs4; extra == "gcs"
97
+ Requires-Dist: fsspec; extra == "gcs"
98
98
  Provides-Extra: github
99
- Requires-Dist: requests; extra == "github"
100
99
  Requires-Dist: pygithub>1.58.0; extra == "github"
100
+ Requires-Dist: requests; extra == "github"
101
101
  Provides-Extra: gitlab
102
102
  Requires-Dist: python-gitlab; extra == "gitlab"
103
103
  Provides-Extra: google-drive
104
104
  Requires-Dist: google-api-python-client; extra == "google-drive"
105
105
  Provides-Extra: hubspot
106
- Requires-Dist: urllib3; extra == "hubspot"
107
106
  Requires-Dist: hubspot-api-client; extra == "hubspot"
107
+ Requires-Dist: urllib3; extra == "hubspot"
108
108
  Provides-Extra: jira
109
109
  Requires-Dist: atlassian-python-api; extra == "jira"
110
110
  Provides-Extra: kafka
@@ -127,15 +127,15 @@ Requires-Dist: cymple; extra == "neo4j"
127
127
  Requires-Dist: neo4j; extra == "neo4j"
128
128
  Provides-Extra: notion
129
129
  Requires-Dist: httpx; extra == "notion"
130
- Requires-Dist: htmlBuilder; extra == "notion"
131
- Requires-Dist: notion-client; extra == "notion"
132
130
  Requires-Dist: backoff; extra == "notion"
131
+ Requires-Dist: notion-client; extra == "notion"
132
+ Requires-Dist: htmlBuilder; extra == "notion"
133
133
  Provides-Extra: odt
134
134
  Requires-Dist: unstructured[odt]; extra == "odt"
135
135
  Provides-Extra: onedrive
136
- Requires-Dist: msal; extra == "onedrive"
137
136
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
138
137
  Requires-Dist: bs4; extra == "onedrive"
138
+ Requires-Dist: msal; extra == "onedrive"
139
139
  Provides-Extra: openai
140
140
  Requires-Dist: tiktoken; extra == "openai"
141
141
  Requires-Dist: openai; extra == "openai"
@@ -144,8 +144,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
144
144
  Provides-Extra: org
145
145
  Requires-Dist: unstructured[org]; extra == "org"
146
146
  Provides-Extra: outlook
147
- Requires-Dist: msal; extra == "outlook"
148
147
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
148
+ Requires-Dist: msal; extra == "outlook"
149
149
  Provides-Extra: pdf
150
150
  Requires-Dist: unstructured[pdf]; extra == "pdf"
151
151
  Provides-Extra: pinecone
@@ -169,35 +169,35 @@ Requires-Dist: unstructured[rst]; extra == "rst"
169
169
  Provides-Extra: rtf
170
170
  Requires-Dist: unstructured[rtf]; extra == "rtf"
171
171
  Provides-Extra: s3
172
- Requires-Dist: fsspec; extra == "s3"
173
172
  Requires-Dist: s3fs; extra == "s3"
173
+ Requires-Dist: fsspec; extra == "s3"
174
174
  Provides-Extra: salesforce
175
175
  Requires-Dist: simple-salesforce; extra == "salesforce"
176
176
  Provides-Extra: sftp
177
177
  Requires-Dist: paramiko; extra == "sftp"
178
178
  Requires-Dist: fsspec; extra == "sftp"
179
179
  Provides-Extra: sharepoint
180
- Requires-Dist: msal; extra == "sharepoint"
181
180
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
181
+ Requires-Dist: msal; extra == "sharepoint"
182
182
  Provides-Extra: singlestore
183
183
  Requires-Dist: singlestoredb; extra == "singlestore"
184
184
  Provides-Extra: slack
185
185
  Requires-Dist: slack-sdk[optional]; extra == "slack"
186
186
  Provides-Extra: snowflake
187
- Requires-Dist: snowflake-connector-python; extra == "snowflake"
188
187
  Requires-Dist: psycopg2-binary; extra == "snowflake"
188
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
189
189
  Provides-Extra: togetherai
190
190
  Requires-Dist: together; extra == "togetherai"
191
191
  Provides-Extra: tsv
192
192
  Requires-Dist: unstructured[tsv]; extra == "tsv"
193
193
  Provides-Extra: vastdb
194
194
  Requires-Dist: vastdb; extra == "vastdb"
195
- Requires-Dist: pyarrow; extra == "vastdb"
196
195
  Requires-Dist: ibis; extra == "vastdb"
196
+ Requires-Dist: pyarrow; extra == "vastdb"
197
197
  Provides-Extra: vectara
198
- Requires-Dist: aiofiles; extra == "vectara"
199
198
  Requires-Dist: httpx; extra == "vectara"
200
199
  Requires-Dist: requests; extra == "vectara"
200
+ Requires-Dist: aiofiles; extra == "vectara"
201
201
  Provides-Extra: weaviate
202
202
  Requires-Dist: weaviate-client; extra == "weaviate"
203
203
  Provides-Extra: wikipedia
@@ -15,11 +15,12 @@ test/integration/connectors/test_milvus.py,sha256=7mI6zznN0PTxDL9DLogH1k3dxx6R8D
15
15
  test/integration/connectors/test_mongodb.py,sha256=0A6DvF-iTCSZzOefisd_i20j9li8uNWTF2wyLGwlhco,12446
16
16
  test/integration/connectors/test_neo4j.py,sha256=r4TRYtTXeeOdcRcfa_gvslhSKvoIWrwN1FRJ5XRoH4k,8456
17
17
  test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qkoV3srltFOSSKTA,5403
18
- test/integration/connectors/test_onedrive.py,sha256=rjgN2LhaW1htEMBJPxmlP_kcRB7p_oOeZcogFlHyJH4,3721
18
+ test/integration/connectors/test_onedrive.py,sha256=iwiDK0kWCfQbIEPnWUzzAA5PiCsHcmFZSxEcIZy_6cc,5229
19
19
  test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_YlgCzZB7wRU17ehQ0,12407
20
20
  test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
21
21
  test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
22
22
  test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
23
+ test/integration/connectors/test_sharepoint.py,sha256=8HlcnrP4K8oPUzef6AA11P2cMlxSp7tiddTkT4JOeRU,2378
23
24
  test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
24
25
  test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
26
  test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
@@ -46,7 +47,7 @@ test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQ
46
47
  test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
48
  test/integration/connectors/utils/validation/destination.py,sha256=ZvMSvqz9in35xaoUJGx9rG8oWCU3FYlfLLQ6sfdI0pw,2649
48
49
  test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
49
- test/integration/connectors/utils/validation/source.py,sha256=VALU5ms_JBu_eFkp2WQ7oZtJKozJ8MZSJ7h7ZA3Fz_Q,12296
50
+ test/integration/connectors/utils/validation/source.py,sha256=xnAZI26ILdeMhgrWAGrU2N2fqK58YNGkfyUhJekZ0Ho,13541
50
51
  test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
51
52
  test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
53
  test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
@@ -55,7 +56,7 @@ test/integration/connectors/weaviate/test_local.py,sha256=gXMpnzVcrNQdptDjx0haPW
55
56
  test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
57
  test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
57
58
  test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
58
- test/integration/embedders/test_bedrock.py,sha256=ZehreheLgY9Bqdjk-3MQOaou9IP-H3Pcz7WWiOWAxTU,3557
59
+ test/integration/embedders/test_bedrock.py,sha256=vmjoi1uUk-LX4Yz0ZPn6Ry1JdVEsyIhLhPbSPmkeT9o,3553
59
60
  test/integration/embedders/test_huggingface.py,sha256=qFblyXounVNRaNkk3gbKoBqU5E2dNecgKU2Bz2LyOa8,989
60
61
  test/integration/embedders/test_mixedbread.py,sha256=lLz_cooyC38VSo-FMHbhKpHvYs3QzA20NOIvM5oooaw,1998
61
62
  test/integration/embedders/test_octoai.py,sha256=qs-bqZ7iGWO_BzUZvKJmOHBT3cmFSkEYbleWhj3snJc,2197
@@ -65,7 +66,7 @@ test/integration/embedders/test_vertexai.py,sha256=4-E4plJXFf1b02RhOqOCBHR2GA4gT
65
66
  test/integration/embedders/test_voyageai.py,sha256=Gm3sVjhsym1ASIDfr-sZoCbpsNMaAk_l4E3-dtjRCQ4,1832
66
67
  test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
67
68
  test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
69
+ test/integration/partitioners/test_partitioner.py,sha256=6sdZhhtqEICBPqEgpKrCQIfJ-7hKcwuTFqjWs1mbQf8,2787
69
70
  test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
71
  test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
71
72
  test/unit/test_html.py,sha256=LKGi_QaH4U4gktrbd2NcURL-d-0Rm1UnG5Y6r9EvTG0,4489
@@ -102,9 +103,9 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
102
103
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
104
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
104
105
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
105
- unstructured_ingest/__version__.py,sha256=i2QrUEuUnVPQuTv5hg_JWbhbwm5k6KU4hPIFq0SIgdc,42
106
+ unstructured_ingest/__version__.py,sha256=A09Ks7MDqP-QtYP9TIQMxydOZeCTtu9i7xoq5wdy4As,42
106
107
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
107
- unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
108
+ unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
108
109
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
109
110
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
110
111
  unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
@@ -112,7 +113,7 @@ unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29
112
113
  unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
113
114
  unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
114
115
  unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
115
- unstructured_ingest/cli/interfaces.py,sha256=lpaaOdAQ4NMsawVaHSk5lXCcZ0Mw85kRzfElu1ODCB0,24090
116
+ unstructured_ingest/cli/interfaces.py,sha256=pvEwNfYwINx3-TQ0LPudjpYNR3PnanUiXpEePPEtRSw,24086
116
117
  unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
117
118
  unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
119
  unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
@@ -399,7 +400,7 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-
399
400
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
400
401
  unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
401
402
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
402
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=4IwCWMlBrMpZI6V82q5nzrbyQNDVM62AQsWt6MUBWa8,16508
403
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=b37fQGm_lGutQ3Jc0qePB15lkBiFavH9tCso3inm-3I,16564
403
404
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
404
405
  unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
405
406
  unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
@@ -413,9 +414,9 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
413
414
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
414
415
  unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
415
416
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
416
- unstructured_ingest/v2/processes/embedder.py,sha256=uiuCOSwwasHp4eqtewMvgnM86WVch7HDFiWqpGLahvo,7812
417
+ unstructured_ingest/v2/processes/embedder.py,sha256=PTBlRgNbAXkSaLg7JrZzHwAoqpHmopg8jNU1TmaXguU,7804
417
418
  unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
418
- unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
419
+ unstructured_ingest/v2/processes/partitioner.py,sha256=ZC9mt85I3o_SLR4DvE7vPBGphMET994phFkTuT-L9B8,9998
419
420
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
420
421
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=KO1zn-96Qa49TOSZn-gv_RUMGMCmUcdtHoeJqCpxPLY,6219
421
422
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
@@ -433,12 +434,12 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
433
434
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
434
435
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
435
436
  unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
436
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=b616B_-9MfU6gxvpw7IBUa2szNFURA_VP8q5j2FXxnA,17632
437
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
437
438
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
438
439
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
439
440
  unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
440
441
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
441
- unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=SdcbOEUzgi1sUZJA6doZDm-a8d4F3Qtud-OVbDKW7Ng,4456
442
+ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=f0F7KioXgucVc3tVASTa67ynlTa4s9_FKGPHop6Xm0A,4563
442
443
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
443
444
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
444
445
  unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
@@ -562,9 +563,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
562
563
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
563
564
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
564
565
  unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
565
- unstructured_ingest-0.4.7.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
566
- unstructured_ingest-0.4.7.dist-info/METADATA,sha256=yGcahQ8fZmoU_c1h02b76tRn5w0uj_931AAQKlFrqxs,8051
567
- unstructured_ingest-0.4.7.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
568
- unstructured_ingest-0.4.7.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
569
- unstructured_ingest-0.4.7.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
570
- unstructured_ingest-0.4.7.dist-info/RECORD,,
566
+ unstructured_ingest-0.5.0.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
567
+ unstructured_ingest-0.5.0.dist-info/METADATA,sha256=dyxZ7tfjq1tkZPJgaK6ZanQwB6pteSIznmfUhAgnT64,8051
568
+ unstructured_ingest-0.5.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
569
+ unstructured_ingest-0.5.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
570
+ unstructured_ingest-0.5.0.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
571
+ unstructured_ingest-0.5.0.dist-info/RECORD,,