unstructured-ingest 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (40) hide show
  1. test/integration/connectors/weaviate/test_local.py +27 -6
  2. test/integration/embedders/test_azure_openai.py +1 -3
  3. test/integration/embedders/test_bedrock.py +2 -2
  4. test/integration/embedders/test_huggingface.py +1 -3
  5. test/integration/embedders/test_mixedbread.py +2 -2
  6. test/integration/embedders/test_octoai.py +2 -4
  7. test/integration/embedders/test_openai.py +2 -4
  8. test/integration/embedders/test_togetherai.py +2 -2
  9. test/integration/embedders/test_vertexai.py +2 -4
  10. test/integration/embedders/test_voyageai.py +2 -4
  11. test/integration/embedders/utils.py +12 -14
  12. test/unit/embed/test_openai.py +12 -4
  13. test/unit/test_html.py +112 -0
  14. test/unit/v2/embedders/test_voyageai.py +1 -1
  15. unstructured_ingest/__version__.py +1 -1
  16. unstructured_ingest/embed/huggingface.py +6 -1
  17. unstructured_ingest/embed/interfaces.py +9 -6
  18. unstructured_ingest/embed/mixedbreadai.py +3 -10
  19. unstructured_ingest/embed/octoai.py +14 -7
  20. unstructured_ingest/embed/openai.py +18 -5
  21. unstructured_ingest/embed/togetherai.py +19 -8
  22. unstructured_ingest/embed/vertexai.py +13 -6
  23. unstructured_ingest/embed/voyageai.py +19 -6
  24. unstructured_ingest/utils/html.py +143 -93
  25. unstructured_ingest/v2/interfaces/__init__.py +2 -1
  26. unstructured_ingest/v2/interfaces/indexer.py +2 -3
  27. unstructured_ingest/v2/interfaces/process.py +3 -0
  28. unstructured_ingest/v2/interfaces/uploader.py +14 -1
  29. unstructured_ingest/v2/pipeline/pipeline.py +20 -6
  30. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  31. unstructured_ingest/v2/processes/connectors/confluence.py +15 -22
  32. unstructured_ingest/v2/processes/connectors/onedrive.py +5 -29
  33. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +41 -3
  34. unstructured_ingest/v2/processes/embedder.py +3 -0
  35. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/METADATA +9 -9
  36. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/RECORD +40 -38
  37. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/LICENSE.md +0 -0
  38. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/WHEEL +0 -0
  39. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/entry_points.txt +0 -0
  40. {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ import json
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
  from time import time
8
- from typing import TYPE_CHECKING, Any, AsyncIterator, Generator, Iterator, Optional, TypeVar
8
+ from typing import TYPE_CHECKING, Any, AsyncIterator, Optional
9
9
 
10
10
  from dateutil import parser
11
11
  from pydantic import Field, Secret
@@ -101,27 +101,6 @@ class OnedriveIndexerConfig(IndexerConfig):
101
101
  recursive: bool = False
102
102
 
103
103
 
104
- T = TypeVar("T")
105
-
106
-
107
- def async_iterable_to_sync_iterable(iterator: AsyncIterator[T]) -> Iterator[T]:
108
- # This version works on Python 3.9 by manually handling the async iteration.
109
- loop = asyncio.new_event_loop()
110
- asyncio.set_event_loop(loop)
111
- try:
112
- while True:
113
- try:
114
- # Instead of anext(iterator), we directly call __anext__().
115
- # __anext__ returns a coroutine that we must run until complete.
116
- future = iterator.__anext__()
117
- result = loop.run_until_complete(future)
118
- yield result
119
- except StopAsyncIteration:
120
- break
121
- finally:
122
- loop.close()
123
-
124
-
125
104
  @dataclass
126
105
  class OnedriveIndexer(Indexer):
127
106
  connection_config: OnedriveConnectionConfig
@@ -215,7 +194,10 @@ class OnedriveIndexer(Indexer):
215
194
  # Offload the file data creation if it's not guaranteed async
216
195
  return await asyncio.to_thread(self.drive_item_to_file_data_sync, drive_item)
217
196
 
218
- async def _run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
197
+ def is_async(self) -> bool:
198
+ return True
199
+
200
+ async def run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
219
201
  token_resp = await asyncio.to_thread(self.connection_config.get_token)
220
202
  if "error" in token_resp:
221
203
  raise SourceConnectionError(
@@ -230,12 +212,6 @@ class OnedriveIndexer(Indexer):
230
212
  file_data = await self.drive_item_to_file_data(drive_item=drive_item)
231
213
  yield file_data
232
214
 
233
- def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
234
- # Convert the async generator to a sync generator without loading all data into memory
235
- async_gen = self._run_async(**kwargs)
236
- for item in async_iterable_to_sync_iterable(async_gen):
237
- yield item
238
-
239
215
 
240
216
  class OnedriveDownloaderConfig(DownloaderConfig):
241
217
  pass
@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import date, datetime
6
+ from pathlib import Path
6
7
  from typing import TYPE_CHECKING, Any, Generator, Optional
7
8
 
8
9
  from dateutil import parser
@@ -15,10 +16,10 @@ from unstructured_ingest.v2.interfaces import (
15
16
  AccessConfig,
16
17
  ConnectionConfig,
17
18
  FileData,
18
- Uploader,
19
19
  UploaderConfig,
20
20
  UploadStager,
21
21
  UploadStagerConfig,
22
+ VectorDBUploader,
22
23
  )
23
24
  from unstructured_ingest.v2.logger import logger
24
25
 
@@ -160,7 +161,9 @@ class WeaviateUploadStager(UploadStager):
160
161
 
161
162
 
162
163
  class WeaviateUploaderConfig(UploaderConfig):
163
- collection: str = Field(description="The name of the collection this object belongs to")
164
+ collection: Optional[str] = Field(
165
+ description="The name of the collection this object belongs to", default=None
166
+ )
164
167
  batch_size: Optional[int] = Field(default=None, description="Number of records per batch")
165
168
  requests_per_minute: Optional[int] = Field(default=None, description="Rate limit for upload")
166
169
  dynamic_batch: bool = Field(default=True, description="Whether to use dynamic batch")
@@ -205,17 +208,50 @@ class WeaviateUploaderConfig(UploaderConfig):
205
208
 
206
209
 
207
210
  @dataclass
208
- class WeaviateUploader(Uploader, ABC):
211
+ class WeaviateUploader(VectorDBUploader, ABC):
209
212
  upload_config: WeaviateUploaderConfig
210
213
  connection_config: WeaviateConnectionConfig
211
214
 
215
+ def _collection_exists(self, collection_name: Optional[str] = None):
216
+ collection_name = collection_name or self.upload_config.collection
217
+ with self.connection_config.get_client() as weaviate_client:
218
+ return weaviate_client.collections.exists(name=collection_name)
219
+
212
220
  def precheck(self) -> None:
213
221
  try:
214
222
  self.connection_config.get_client()
223
+ # only if collection name populated should we check that it exists
224
+ if self.upload_config.collection and not self._collection_exists():
225
+ raise DestinationConnectionError(
226
+ f"collection '{self.upload_config.collection}' does not exist"
227
+ )
215
228
  except Exception as e:
216
229
  logger.error(f"Failed to validate connection {e}", exc_info=True)
217
230
  raise DestinationConnectionError(f"failed to validate connection: {e}")
218
231
 
232
+ def init(self, *kwargs: Any) -> None:
233
+ self.create_destination()
234
+
235
+ def create_destination(
236
+ self, destination_name: str = "elements", vector_length: Optional[int] = None, **kwargs: Any
237
+ ) -> bool:
238
+ collection_name = self.upload_config.collection or destination_name
239
+ self.upload_config.collection = collection_name
240
+ connectors_dir = Path(__file__).parents[1]
241
+ collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
242
+ with collection_config_file.open() as f:
243
+ collection_config = json.load(f)
244
+ collection_config["class"] = collection_name
245
+ if not self._collection_exists():
246
+ logger.info(
247
+ f"creating default weaviate collection '{collection_name}' with default configs"
248
+ )
249
+ with self.connection_config.get_client() as weaviate_client:
250
+ weaviate_client.collections.create_from_dict(config=collection_config)
251
+ return True
252
+ logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
253
+ return False
254
+
219
255
  def check_for_errors(self, client: "WeaviateClient") -> None:
220
256
  failed_uploads = client.batch.failed_objects
221
257
  if failed_uploads:
@@ -253,6 +289,8 @@ class WeaviateUploader(Uploader, ABC):
253
289
  f"writing {len(data)} objects to destination "
254
290
  f"class {self.connection_config.access_config} "
255
291
  )
292
+ if not self.upload_config.collection:
293
+ raise ValueError("No collection specified")
256
294
 
257
295
  with self.connection_config.get_client() as weaviate_client:
258
296
  self.delete_by_record_id(client=weaviate_client, file_data=file_data)
@@ -184,6 +184,9 @@ class EmbedderConfig(BaseModel):
184
184
  class Embedder(BaseProcess, ABC):
185
185
  config: EmbedderConfig
186
186
 
187
+ def init(self, *kwargs: Any) -> None:
188
+ self.config.get_embedder().initialize()
189
+
187
190
  def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
188
191
  # TODO update base embedder classes to support async
189
192
  embedder = self.config.get_embedder()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -24,9 +24,9 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: pydantic>=2.7
26
26
  Requires-Dist: click
27
- Requires-Dist: tqdm
28
- Requires-Dist: dataclasses-json
29
27
  Requires-Dist: pandas
28
+ Requires-Dist: dataclasses-json
29
+ Requires-Dist: tqdm
30
30
  Requires-Dist: opentelemetry-sdk
31
31
  Requires-Dist: python-dateutil
32
32
  Provides-Extra: airtable
@@ -126,10 +126,10 @@ Requires-Dist: networkx; extra == "neo4j"
126
126
  Requires-Dist: cymple; extra == "neo4j"
127
127
  Requires-Dist: neo4j; extra == "neo4j"
128
128
  Provides-Extra: notion
129
- Requires-Dist: htmlBuilder; extra == "notion"
130
129
  Requires-Dist: backoff; extra == "notion"
131
- Requires-Dist: notion-client; extra == "notion"
132
130
  Requires-Dist: httpx; extra == "notion"
131
+ Requires-Dist: notion-client; extra == "notion"
132
+ Requires-Dist: htmlBuilder; extra == "notion"
133
133
  Provides-Extra: odt
134
134
  Requires-Dist: unstructured[odt]; extra == "odt"
135
135
  Provides-Extra: onedrive
@@ -174,8 +174,8 @@ Requires-Dist: s3fs; extra == "s3"
174
174
  Provides-Extra: salesforce
175
175
  Requires-Dist: simple-salesforce; extra == "salesforce"
176
176
  Provides-Extra: sftp
177
- Requires-Dist: paramiko; extra == "sftp"
178
177
  Requires-Dist: fsspec; extra == "sftp"
178
+ Requires-Dist: paramiko; extra == "sftp"
179
179
  Provides-Extra: sharepoint
180
180
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
181
181
  Requires-Dist: msal; extra == "sharepoint"
@@ -184,8 +184,8 @@ Requires-Dist: singlestoredb; extra == "singlestore"
184
184
  Provides-Extra: slack
185
185
  Requires-Dist: slack-sdk[optional]; extra == "slack"
186
186
  Provides-Extra: snowflake
187
- Requires-Dist: snowflake-connector-python; extra == "snowflake"
188
187
  Requires-Dist: psycopg2-binary; extra == "snowflake"
188
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
189
189
  Provides-Extra: togetherai
190
190
  Requires-Dist: together; extra == "togetherai"
191
191
  Provides-Extra: tsv
@@ -195,9 +195,9 @@ Requires-Dist: ibis; extra == "vastdb"
195
195
  Requires-Dist: pyarrow; extra == "vastdb"
196
196
  Requires-Dist: vastdb; extra == "vastdb"
197
197
  Provides-Extra: vectara
198
- Requires-Dist: aiofiles; extra == "vectara"
199
- Requires-Dist: requests; extra == "vectara"
200
198
  Requires-Dist: httpx; extra == "vectara"
199
+ Requires-Dist: requests; extra == "vectara"
200
+ Requires-Dist: aiofiles; extra == "vectara"
201
201
  Provides-Extra: weaviate
202
202
  Requires-Dist: weaviate-client; extra == "weaviate"
203
203
  Provides-Extra: wikipedia
@@ -51,29 +51,30 @@ test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_Jj
51
51
  test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
53
53
  test/integration/connectors/weaviate/test_cloud.py,sha256=U1ZS6a7wTPX7h3XGvaJHaT-Uwg4IeGgzxx1YBywgVhM,1284
54
- test/integration/connectors/weaviate/test_local.py,sha256=bSJwS6rWxPf3BoOXKzZi2AOuT51py9V3tao6IBy1Rgk,4538
54
+ test/integration/connectors/weaviate/test_local.py,sha256=gXMpnzVcrNQdptDjx0haPWBU-dm1MQTkalgxocI3-L8,5287
55
55
  test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
57
- test/integration/embedders/test_azure_openai.py,sha256=6tFpKFBFRXD49imhhRzsvy3MPtuZ4L1PtnKyMVBRAqc,1808
58
- test/integration/embedders/test_bedrock.py,sha256=WuI2limd0dcT9JTkc44pjjHsdz755bQlZ1by-Xyuy_Y,3565
59
- test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMYrnSPqyihriUeqACbM,1007
60
- test/integration/embedders/test_mixedbread.py,sha256=udjfl5fb71R62hC9pYBj7yBrbCzRYNJuIn2ApXY0i18,2006
61
- test/integration/embedders/test_octoai.py,sha256=LT-JpfNDb2d16HWrX0nWHcPOf0_CmY6tsfUMoC8IWpY,2219
62
- test/integration/embedders/test_openai.py,sha256=9GZI0ZzSDHtdTDMY3yB_JOV3eDbrkPhoN1WD1JILBNE,2149
63
- test/integration/embedders/test_togetherai.py,sha256=bsD5Ea8spLzQlTjQKXXTLaFcEnRisQQve8_tDThg6Qo,2213
64
- test/integration/embedders/test_vertexai.py,sha256=AH3spYF9dtIZUf6qN1_r9-rMff8FdNQaQC5bHcx-G3w,1852
65
- test/integration/embedders/test_voyageai.py,sha256=U6RILPJJyRc2X7E9DhkrNghYba-e_lOhcMkL2LejN7Y,1854
66
- test/integration/embedders/utils.py,sha256=2B_JnitpCchZZO-UCi-5jX4JhRAwBgLal8F03SBc0bQ,2932
57
+ test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
58
+ test/integration/embedders/test_bedrock.py,sha256=ZehreheLgY9Bqdjk-3MQOaou9IP-H3Pcz7WWiOWAxTU,3557
59
+ test/integration/embedders/test_huggingface.py,sha256=qFblyXounVNRaNkk3gbKoBqU5E2dNecgKU2Bz2LyOa8,989
60
+ test/integration/embedders/test_mixedbread.py,sha256=lLz_cooyC38VSo-FMHbhKpHvYs3QzA20NOIvM5oooaw,1998
61
+ test/integration/embedders/test_octoai.py,sha256=qs-bqZ7iGWO_BzUZvKJmOHBT3cmFSkEYbleWhj3snJc,2197
62
+ test/integration/embedders/test_openai.py,sha256=9XioXuvdnbh_3vRmRwpMsi1D5heCcY7KA4nHb5vOU_M,2127
63
+ test/integration/embedders/test_togetherai.py,sha256=hsg3c3SGJGd93unz4-VLYmFXxLA1vmrD5xK5Gj-g0R4,2205
64
+ test/integration/embedders/test_vertexai.py,sha256=4-E4plJXFf1b02RhOqOCBHR2GA4gTnc8K4AnHm6EgPU,1830
65
+ test/integration/embedders/test_voyageai.py,sha256=Gm3sVjhsym1ASIDfr-sZoCbpsNMaAk_l4E3-dtjRCQ4,1832
66
+ test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
67
67
  test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
68
  test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
69
69
  test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
71
+ test/unit/test_html.py,sha256=LKGi_QaH4U4gktrbd2NcURL-d-0Rm1UnG5Y6r9EvTG0,4489
71
72
  test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
72
73
  test/unit/test_utils.py,sha256=Q6mp9YZPah8z3-2lreyRbmAc7m2Y_w26_N9vocSInoA,5421
73
74
  test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
75
  test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
75
76
  test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
76
- test/unit/embed/test_openai.py,sha256=QGpMQ6mSNOuEcCn8PcEhKEjq1tygTm6K68UDfHHiIu4,833
77
+ test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
77
78
  test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
78
79
  test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
79
80
  test/unit/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -95,13 +96,13 @@ test/unit/v2/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN
95
96
  test/unit/v2/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
96
97
  test/unit/v2/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
97
98
  test/unit/v2/embedders/test_vertexai.py,sha256=_4a0tw_GbyvgYJSrP1yw1KjEQJYGzqR5yNXBCSdK8yQ,1145
98
- test/unit/v2/embedders/test_voyageai.py,sha256=De_25F0EhxTNLmAE_c-EK2pFO5p54ad1TVVF055y6p0,1186
99
+ test/unit/v2/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
99
100
  test/unit/v2/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
101
  test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U-dS0ga6h04h7WSfg,2281
101
102
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
103
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
103
104
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
104
- unstructured_ingest/__version__.py,sha256=Y85nIpRVpjjjl2MW3ZwhLs55JjhABkZJeXfKDAbsRxM,42
105
+ unstructured_ingest/__version__.py,sha256=k5K6WAWnRkNeRW39AQyaFiSCUwHRsxlNOpkoF4MqU3c,42
105
106
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
106
107
  unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
107
108
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -271,14 +272,14 @@ unstructured_ingest/connector/notion/types/database_properties/verification.py,s
271
272
  unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
272
273
  unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
273
274
  unstructured_ingest/embed/bedrock.py,sha256=50G8PBEdW3ILwyWXAWl4w-gUA9I0AR7LuFq6NLz-sWI,7284
274
- unstructured_ingest/embed/huggingface.py,sha256=2cBiQhOhfWHX3hS-eKjocysOkUaRlyRfUj9Kxjrp6cE,1934
275
- unstructured_ingest/embed/interfaces.py,sha256=eHS7vyWPhIsUV_BvQ2NtfknoQvd2UzusdRHbVFhJEqw,3654
276
- unstructured_ingest/embed/mixedbreadai.py,sha256=qTPMa9FUyk1u7qCGoz1OUcdxaeA8Ck4Kc7JAusqVw5s,6922
277
- unstructured_ingest/embed/octoai.py,sha256=yjy9nl2F_3Qosa69vCmaFym7l1oSo-zuJ06HqmSbJIE,4676
278
- unstructured_ingest/embed/openai.py,sha256=ExPR9Gv5YM_9fF4YCN5NyrsaiVj68-037rKLbXco3NE,4076
279
- unstructured_ingest/embed/togetherai.py,sha256=nLeDdvzMvsL1EjdsQR37xW-3xorj5uPF7DPKzO-eHR0,3782
280
- unstructured_ingest/embed/vertexai.py,sha256=sV4DUr4YrqAXvuPw5lbQuAY8YWYM5FjKjYPMd_D_x0g,4496
281
- unstructured_ingest/embed/voyageai.py,sha256=70p3rBSd5gq9QukfF7dZHg5Fy0fJkh2Opw-NNjnvmI8,4594
275
+ unstructured_ingest/embed/huggingface.py,sha256=Avcc16st9Cp2xGScG6TeNEEd3T8YjjnESNN4OdIlnh0,2119
276
+ unstructured_ingest/embed/interfaces.py,sha256=7jsQ3rLOXy1hq__muf-EPcLnv17XzNQaD05AyGbZeNo,3739
277
+ unstructured_ingest/embed/mixedbreadai.py,sha256=OhF5cMxWMq8-0mt8_-Xe3ZkjGjf2u6QYzfzgHnOEYtU,6838
278
+ unstructured_ingest/embed/octoai.py,sha256=oLNlM02W1CNUYRG_j6qWyI7yE24vYGKYradNzeeP6mE,5062
279
+ unstructured_ingest/embed/openai.py,sha256=H1sURGuRvXBUSXJcAVzrLObV5wSCVM29tkaXJ-9ZR30,4727
280
+ unstructured_ingest/embed/togetherai.py,sha256=SUd16JEUPlR8aCrd4q_T3CHwMTRUi-1yenq_r1AWlak,4266
281
+ unstructured_ingest/embed/vertexai.py,sha256=CPptS7U5W1CgvxIN8CgVz5J1Ia4FctV6BsmpN9c92A0,4890
282
+ unstructured_ingest/embed/voyageai.py,sha256=lydMASUDcTuyfWBPS3uIqDJPQbjf95bEI5Kr4tytONs,5111
282
283
  unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
283
284
  unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
284
285
  unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
@@ -363,7 +364,7 @@ unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSz
363
364
  unstructured_ingest/utils/data_prep.py,sha256=X3d8Kos1zqX-HQAicF_8TB0BrstRtHrbMzu_1s7mj7M,7191
364
365
  unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
365
366
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
366
- unstructured_ingest/utils/html.py,sha256=gORKKCkva71JBbOilYtAn_MLLCqV8VKmSjSbpwEOlno,4257
367
+ unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
367
368
  unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
368
369
  unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_ZhW-FEcMC2sCNsSNwDOjo,1729
369
370
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
@@ -386,19 +387,19 @@ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdj
386
387
  unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
387
388
  unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
388
389
  unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
389
- unstructured_ingest/v2/interfaces/__init__.py,sha256=9VO09XuTvyOcFF8ZDKN169fNb_uA5TAYzPsiPHOyxhQ,963
390
+ unstructured_ingest/v2/interfaces/__init__.py,sha256=Xp7-345QpM6MG7V7G4ZrVERjADAUBiPAY88PKaMRyqY,1005
390
391
  unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
391
392
  unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
392
393
  unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
393
- unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
394
- unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
394
+ unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
395
+ unstructured_ingest/v2/interfaces/process.py,sha256=6Ll0O9ATcdm36dx2_TOg9PfCEJrADgyd8OQK3TTNzZM,448
395
396
  unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
396
397
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=9EV9863ODDv0Y5liDT3xh2yiVuFiaVVyCcnwCy6nfkM,3172
397
- unstructured_ingest/v2/interfaces/uploader.py,sha256=T2oHbN-d4Px1w1oATKKYZA10aUssqytEpiaqBM92r0Q,1600
398
+ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-iZPCVsUaL0rljcME,2090
398
399
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
400
  unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
400
401
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
401
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=7Yg8_xwlSX6lA-oPGlTcn6KXZ9kc51zsoJxME5TiUlw,15956
402
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=y6AkUBUL2r3t4OO0jWKomtN3v8U7EDtMPrJ8VYRo7VM,16344
402
403
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
403
404
  unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
404
405
  unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
@@ -412,7 +413,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
412
413
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
413
414
  unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
414
415
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
415
- unstructured_ingest/v2/processes/embedder.py,sha256=xCBpaL07WnVUOUW8SHktaf1vwBGZxl3Nf8-99509ClQ,7721
416
+ unstructured_ingest/v2/processes/embedder.py,sha256=uiuCOSwwasHp4eqtewMvgnM86WVch7HDFiWqpGLahvo,7812
416
417
  unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
417
418
  unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
418
419
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
@@ -421,7 +422,7 @@ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XE
421
422
  unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
422
423
  unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
423
424
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
424
- unstructured_ingest/v2/processes/connectors/confluence.py,sha256=OdoMK5ZD2HOncquj9c_Xct7bFa6kSGW3qZwfiN1LqtQ,11399
425
+ unstructured_ingest/v2/processes/connectors/confluence.py,sha256=_zkiST0FTggEKNORalCcZZIRGZKnCM0LLcavgQZfDVE,11112
425
426
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
426
427
  unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
427
428
  unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
@@ -432,7 +433,7 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
432
433
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
433
434
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
434
435
  unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
435
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=d6gC40YmfqBNXxizAt4MO4OOu5BoCZ7SAe1AbNwTP0E,18322
436
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=sVRk1LodwVS9do3kmetO8kvSdEzfR-oATXa6covC64Y,17365
436
437
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
437
438
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
438
439
  unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
@@ -441,6 +442,7 @@ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtl
441
442
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
442
443
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
443
444
  unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
445
+ unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
446
  unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
445
447
  unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=Yj4fIxgGo9Qh1x_6-INdbrPGHCuZElu-QKNfSVtW7F4,7694
446
448
  unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
@@ -558,10 +560,10 @@ unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWa
558
560
  unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
559
561
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
560
562
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
561
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
562
- unstructured_ingest-0.4.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
563
- unstructured_ingest-0.4.2.dist-info/METADATA,sha256=-3ILUK1wZ1fDgJcT22FO9ZhM_NKKHNBCLvgWBgzvVOY,8051
564
- unstructured_ingest-0.4.2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
565
- unstructured_ingest-0.4.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
566
- unstructured_ingest-0.4.2.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
567
- unstructured_ingest-0.4.2.dist-info/RECORD,,
563
+ unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
564
+ unstructured_ingest-0.4.4.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
565
+ unstructured_ingest-0.4.4.dist-info/METADATA,sha256=h_Yeg9jJuyJmsipS3juMfEozK8U6sNyA-PotmiuuBsE,8051
566
+ unstructured_ingest-0.4.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
567
+ unstructured_ingest-0.4.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
568
+ unstructured_ingest-0.4.4.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
569
+ unstructured_ingest-0.4.4.dist-info/RECORD,,