unstructured-ingest 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/weaviate/test_local.py +27 -6
- test/integration/embedders/test_azure_openai.py +1 -3
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -3
- test/integration/embedders/test_mixedbread.py +2 -2
- test/integration/embedders/test_octoai.py +2 -4
- test/integration/embedders/test_openai.py +2 -4
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +2 -4
- test/integration/embedders/test_voyageai.py +2 -4
- test/integration/embedders/utils.py +12 -14
- test/unit/embed/test_openai.py +12 -4
- test/unit/test_html.py +112 -0
- test/unit/v2/embedders/test_voyageai.py +1 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/huggingface.py +6 -1
- unstructured_ingest/embed/interfaces.py +9 -6
- unstructured_ingest/embed/mixedbreadai.py +3 -10
- unstructured_ingest/embed/octoai.py +14 -7
- unstructured_ingest/embed/openai.py +18 -5
- unstructured_ingest/embed/togetherai.py +19 -8
- unstructured_ingest/embed/vertexai.py +13 -6
- unstructured_ingest/embed/voyageai.py +19 -6
- unstructured_ingest/utils/html.py +143 -93
- unstructured_ingest/v2/interfaces/__init__.py +2 -1
- unstructured_ingest/v2/interfaces/indexer.py +2 -3
- unstructured_ingest/v2/interfaces/process.py +3 -0
- unstructured_ingest/v2/interfaces/uploader.py +14 -1
- unstructured_ingest/v2/pipeline/pipeline.py +20 -6
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +15 -22
- unstructured_ingest/v2/processes/connectors/onedrive.py +5 -29
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +41 -3
- unstructured_ingest/v2/processes/embedder.py +3 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/METADATA +9 -9
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/RECORD +40 -38
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ import json
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from time import time
|
|
8
|
-
from typing import TYPE_CHECKING, Any, AsyncIterator,
|
|
8
|
+
from typing import TYPE_CHECKING, Any, AsyncIterator, Optional
|
|
9
9
|
|
|
10
10
|
from dateutil import parser
|
|
11
11
|
from pydantic import Field, Secret
|
|
@@ -101,27 +101,6 @@ class OnedriveIndexerConfig(IndexerConfig):
|
|
|
101
101
|
recursive: bool = False
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
T = TypeVar("T")
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def async_iterable_to_sync_iterable(iterator: AsyncIterator[T]) -> Iterator[T]:
|
|
108
|
-
# This version works on Python 3.9 by manually handling the async iteration.
|
|
109
|
-
loop = asyncio.new_event_loop()
|
|
110
|
-
asyncio.set_event_loop(loop)
|
|
111
|
-
try:
|
|
112
|
-
while True:
|
|
113
|
-
try:
|
|
114
|
-
# Instead of anext(iterator), we directly call __anext__().
|
|
115
|
-
# __anext__ returns a coroutine that we must run until complete.
|
|
116
|
-
future = iterator.__anext__()
|
|
117
|
-
result = loop.run_until_complete(future)
|
|
118
|
-
yield result
|
|
119
|
-
except StopAsyncIteration:
|
|
120
|
-
break
|
|
121
|
-
finally:
|
|
122
|
-
loop.close()
|
|
123
|
-
|
|
124
|
-
|
|
125
104
|
@dataclass
|
|
126
105
|
class OnedriveIndexer(Indexer):
|
|
127
106
|
connection_config: OnedriveConnectionConfig
|
|
@@ -215,7 +194,10 @@ class OnedriveIndexer(Indexer):
|
|
|
215
194
|
# Offload the file data creation if it's not guaranteed async
|
|
216
195
|
return await asyncio.to_thread(self.drive_item_to_file_data_sync, drive_item)
|
|
217
196
|
|
|
218
|
-
|
|
197
|
+
def is_async(self) -> bool:
|
|
198
|
+
return True
|
|
199
|
+
|
|
200
|
+
async def run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
|
|
219
201
|
token_resp = await asyncio.to_thread(self.connection_config.get_token)
|
|
220
202
|
if "error" in token_resp:
|
|
221
203
|
raise SourceConnectionError(
|
|
@@ -230,12 +212,6 @@ class OnedriveIndexer(Indexer):
|
|
|
230
212
|
file_data = await self.drive_item_to_file_data(drive_item=drive_item)
|
|
231
213
|
yield file_data
|
|
232
214
|
|
|
233
|
-
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
234
|
-
# Convert the async generator to a sync generator without loading all data into memory
|
|
235
|
-
async_gen = self._run_async(**kwargs)
|
|
236
|
-
for item in async_iterable_to_sync_iterable(async_gen):
|
|
237
|
-
yield item
|
|
238
|
-
|
|
239
215
|
|
|
240
216
|
class OnedriveDownloaderConfig(DownloaderConfig):
|
|
241
217
|
pass
|
|
@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from datetime import date, datetime
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
7
8
|
|
|
8
9
|
from dateutil import parser
|
|
@@ -15,10 +16,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
16
|
AccessConfig,
|
|
16
17
|
ConnectionConfig,
|
|
17
18
|
FileData,
|
|
18
|
-
Uploader,
|
|
19
19
|
UploaderConfig,
|
|
20
20
|
UploadStager,
|
|
21
21
|
UploadStagerConfig,
|
|
22
|
+
VectorDBUploader,
|
|
22
23
|
)
|
|
23
24
|
from unstructured_ingest.v2.logger import logger
|
|
24
25
|
|
|
@@ -160,7 +161,9 @@ class WeaviateUploadStager(UploadStager):
|
|
|
160
161
|
|
|
161
162
|
|
|
162
163
|
class WeaviateUploaderConfig(UploaderConfig):
|
|
163
|
-
collection: str = Field(
|
|
164
|
+
collection: Optional[str] = Field(
|
|
165
|
+
description="The name of the collection this object belongs to", default=None
|
|
166
|
+
)
|
|
164
167
|
batch_size: Optional[int] = Field(default=None, description="Number of records per batch")
|
|
165
168
|
requests_per_minute: Optional[int] = Field(default=None, description="Rate limit for upload")
|
|
166
169
|
dynamic_batch: bool = Field(default=True, description="Whether to use dynamic batch")
|
|
@@ -205,17 +208,50 @@ class WeaviateUploaderConfig(UploaderConfig):
|
|
|
205
208
|
|
|
206
209
|
|
|
207
210
|
@dataclass
|
|
208
|
-
class WeaviateUploader(
|
|
211
|
+
class WeaviateUploader(VectorDBUploader, ABC):
|
|
209
212
|
upload_config: WeaviateUploaderConfig
|
|
210
213
|
connection_config: WeaviateConnectionConfig
|
|
211
214
|
|
|
215
|
+
def _collection_exists(self, collection_name: Optional[str] = None):
|
|
216
|
+
collection_name = collection_name or self.upload_config.collection
|
|
217
|
+
with self.connection_config.get_client() as weaviate_client:
|
|
218
|
+
return weaviate_client.collections.exists(name=collection_name)
|
|
219
|
+
|
|
212
220
|
def precheck(self) -> None:
|
|
213
221
|
try:
|
|
214
222
|
self.connection_config.get_client()
|
|
223
|
+
# only if collection name populated should we check that it exists
|
|
224
|
+
if self.upload_config.collection and not self._collection_exists():
|
|
225
|
+
raise DestinationConnectionError(
|
|
226
|
+
f"collection '{self.upload_config.collection}' does not exist"
|
|
227
|
+
)
|
|
215
228
|
except Exception as e:
|
|
216
229
|
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
217
230
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
218
231
|
|
|
232
|
+
def init(self, *kwargs: Any) -> None:
|
|
233
|
+
self.create_destination()
|
|
234
|
+
|
|
235
|
+
def create_destination(
|
|
236
|
+
self, destination_name: str = "elements", vector_length: Optional[int] = None, **kwargs: Any
|
|
237
|
+
) -> bool:
|
|
238
|
+
collection_name = self.upload_config.collection or destination_name
|
|
239
|
+
self.upload_config.collection = collection_name
|
|
240
|
+
connectors_dir = Path(__file__).parents[1]
|
|
241
|
+
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
242
|
+
with collection_config_file.open() as f:
|
|
243
|
+
collection_config = json.load(f)
|
|
244
|
+
collection_config["class"] = collection_name
|
|
245
|
+
if not self._collection_exists():
|
|
246
|
+
logger.info(
|
|
247
|
+
f"creating default weaviate collection '{collection_name}' with default configs"
|
|
248
|
+
)
|
|
249
|
+
with self.connection_config.get_client() as weaviate_client:
|
|
250
|
+
weaviate_client.collections.create_from_dict(config=collection_config)
|
|
251
|
+
return True
|
|
252
|
+
logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
|
|
253
|
+
return False
|
|
254
|
+
|
|
219
255
|
def check_for_errors(self, client: "WeaviateClient") -> None:
|
|
220
256
|
failed_uploads = client.batch.failed_objects
|
|
221
257
|
if failed_uploads:
|
|
@@ -253,6 +289,8 @@ class WeaviateUploader(Uploader, ABC):
|
|
|
253
289
|
f"writing {len(data)} objects to destination "
|
|
254
290
|
f"class {self.connection_config.access_config} "
|
|
255
291
|
)
|
|
292
|
+
if not self.upload_config.collection:
|
|
293
|
+
raise ValueError("No collection specified")
|
|
256
294
|
|
|
257
295
|
with self.connection_config.get_client() as weaviate_client:
|
|
258
296
|
self.delete_by_record_id(client=weaviate_client, file_data=file_data)
|
|
@@ -184,6 +184,9 @@ class EmbedderConfig(BaseModel):
|
|
|
184
184
|
class Embedder(BaseProcess, ABC):
|
|
185
185
|
config: EmbedderConfig
|
|
186
186
|
|
|
187
|
+
def init(self, *kwargs: Any) -> None:
|
|
188
|
+
self.config.get_embedder().initialize()
|
|
189
|
+
|
|
187
190
|
def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
|
|
188
191
|
# TODO update base embedder classes to support async
|
|
189
192
|
embedder = self.config.get_embedder()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -24,9 +24,9 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: pydantic>=2.7
|
|
26
26
|
Requires-Dist: click
|
|
27
|
-
Requires-Dist: tqdm
|
|
28
|
-
Requires-Dist: dataclasses-json
|
|
29
27
|
Requires-Dist: pandas
|
|
28
|
+
Requires-Dist: dataclasses-json
|
|
29
|
+
Requires-Dist: tqdm
|
|
30
30
|
Requires-Dist: opentelemetry-sdk
|
|
31
31
|
Requires-Dist: python-dateutil
|
|
32
32
|
Provides-Extra: airtable
|
|
@@ -126,10 +126,10 @@ Requires-Dist: networkx; extra == "neo4j"
|
|
|
126
126
|
Requires-Dist: cymple; extra == "neo4j"
|
|
127
127
|
Requires-Dist: neo4j; extra == "neo4j"
|
|
128
128
|
Provides-Extra: notion
|
|
129
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
130
129
|
Requires-Dist: backoff; extra == "notion"
|
|
131
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
132
130
|
Requires-Dist: httpx; extra == "notion"
|
|
131
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
132
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
133
133
|
Provides-Extra: odt
|
|
134
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
135
135
|
Provides-Extra: onedrive
|
|
@@ -174,8 +174,8 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
174
174
|
Provides-Extra: salesforce
|
|
175
175
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
176
176
|
Provides-Extra: sftp
|
|
177
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
178
177
|
Requires-Dist: fsspec; extra == "sftp"
|
|
178
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
179
179
|
Provides-Extra: sharepoint
|
|
180
180
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
181
181
|
Requires-Dist: msal; extra == "sharepoint"
|
|
@@ -184,8 +184,8 @@ Requires-Dist: singlestoredb; extra == "singlestore"
|
|
|
184
184
|
Provides-Extra: slack
|
|
185
185
|
Requires-Dist: slack-sdk[optional]; extra == "slack"
|
|
186
186
|
Provides-Extra: snowflake
|
|
187
|
-
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
188
187
|
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
188
|
+
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
189
189
|
Provides-Extra: togetherai
|
|
190
190
|
Requires-Dist: together; extra == "togetherai"
|
|
191
191
|
Provides-Extra: tsv
|
|
@@ -195,9 +195,9 @@ Requires-Dist: ibis; extra == "vastdb"
|
|
|
195
195
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
196
196
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
197
197
|
Provides-Extra: vectara
|
|
198
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
199
|
-
Requires-Dist: requests; extra == "vectara"
|
|
200
198
|
Requires-Dist: httpx; extra == "vectara"
|
|
199
|
+
Requires-Dist: requests; extra == "vectara"
|
|
200
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
201
201
|
Provides-Extra: weaviate
|
|
202
202
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
203
203
|
Provides-Extra: wikipedia
|
|
@@ -51,29 +51,30 @@ test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_Jj
|
|
|
51
51
|
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
53
53
|
test/integration/connectors/weaviate/test_cloud.py,sha256=U1ZS6a7wTPX7h3XGvaJHaT-Uwg4IeGgzxx1YBywgVhM,1284
|
|
54
|
-
test/integration/connectors/weaviate/test_local.py,sha256=
|
|
54
|
+
test/integration/connectors/weaviate/test_local.py,sha256=gXMpnzVcrNQdptDjx0haPWBU-dm1MQTkalgxocI3-L8,5287
|
|
55
55
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
57
|
-
test/integration/embedders/test_azure_openai.py,sha256=
|
|
58
|
-
test/integration/embedders/test_bedrock.py,sha256=
|
|
59
|
-
test/integration/embedders/test_huggingface.py,sha256=
|
|
60
|
-
test/integration/embedders/test_mixedbread.py,sha256=
|
|
61
|
-
test/integration/embedders/test_octoai.py,sha256=
|
|
62
|
-
test/integration/embedders/test_openai.py,sha256=
|
|
63
|
-
test/integration/embedders/test_togetherai.py,sha256=
|
|
64
|
-
test/integration/embedders/test_vertexai.py,sha256=
|
|
65
|
-
test/integration/embedders/test_voyageai.py,sha256=
|
|
66
|
-
test/integration/embedders/utils.py,sha256=
|
|
57
|
+
test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
|
|
58
|
+
test/integration/embedders/test_bedrock.py,sha256=ZehreheLgY9Bqdjk-3MQOaou9IP-H3Pcz7WWiOWAxTU,3557
|
|
59
|
+
test/integration/embedders/test_huggingface.py,sha256=qFblyXounVNRaNkk3gbKoBqU5E2dNecgKU2Bz2LyOa8,989
|
|
60
|
+
test/integration/embedders/test_mixedbread.py,sha256=lLz_cooyC38VSo-FMHbhKpHvYs3QzA20NOIvM5oooaw,1998
|
|
61
|
+
test/integration/embedders/test_octoai.py,sha256=qs-bqZ7iGWO_BzUZvKJmOHBT3cmFSkEYbleWhj3snJc,2197
|
|
62
|
+
test/integration/embedders/test_openai.py,sha256=9XioXuvdnbh_3vRmRwpMsi1D5heCcY7KA4nHb5vOU_M,2127
|
|
63
|
+
test/integration/embedders/test_togetherai.py,sha256=hsg3c3SGJGd93unz4-VLYmFXxLA1vmrD5xK5Gj-g0R4,2205
|
|
64
|
+
test/integration/embedders/test_vertexai.py,sha256=4-E4plJXFf1b02RhOqOCBHR2GA4gTnc8K4AnHm6EgPU,1830
|
|
65
|
+
test/integration/embedders/test_voyageai.py,sha256=Gm3sVjhsym1ASIDfr-sZoCbpsNMaAk_l4E3-dtjRCQ4,1832
|
|
66
|
+
test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
|
|
67
67
|
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
|
|
69
69
|
test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
70
|
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
71
|
+
test/unit/test_html.py,sha256=LKGi_QaH4U4gktrbd2NcURL-d-0Rm1UnG5Y6r9EvTG0,4489
|
|
71
72
|
test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
|
|
72
73
|
test/unit/test_utils.py,sha256=Q6mp9YZPah8z3-2lreyRbmAc7m2Y_w26_N9vocSInoA,5421
|
|
73
74
|
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
75
|
test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
|
|
75
76
|
test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
|
|
76
|
-
test/unit/embed/test_openai.py,sha256=
|
|
77
|
+
test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
|
|
77
78
|
test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
|
|
78
79
|
test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
|
|
79
80
|
test/unit/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -95,13 +96,13 @@ test/unit/v2/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN
|
|
|
95
96
|
test/unit/v2/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
|
|
96
97
|
test/unit/v2/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
|
|
97
98
|
test/unit/v2/embedders/test_vertexai.py,sha256=_4a0tw_GbyvgYJSrP1yw1KjEQJYGzqR5yNXBCSdK8yQ,1145
|
|
98
|
-
test/unit/v2/embedders/test_voyageai.py,sha256=
|
|
99
|
+
test/unit/v2/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
|
|
99
100
|
test/unit/v2/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
101
|
test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U-dS0ga6h04h7WSfg,2281
|
|
101
102
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
103
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
103
104
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
104
|
-
unstructured_ingest/__version__.py,sha256=
|
|
105
|
+
unstructured_ingest/__version__.py,sha256=k5K6WAWnRkNeRW39AQyaFiSCUwHRsxlNOpkoF4MqU3c,42
|
|
105
106
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
106
107
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
107
108
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -271,14 +272,14 @@ unstructured_ingest/connector/notion/types/database_properties/verification.py,s
|
|
|
271
272
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
272
273
|
unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
|
|
273
274
|
unstructured_ingest/embed/bedrock.py,sha256=50G8PBEdW3ILwyWXAWl4w-gUA9I0AR7LuFq6NLz-sWI,7284
|
|
274
|
-
unstructured_ingest/embed/huggingface.py,sha256=
|
|
275
|
-
unstructured_ingest/embed/interfaces.py,sha256=
|
|
276
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256=
|
|
277
|
-
unstructured_ingest/embed/octoai.py,sha256=
|
|
278
|
-
unstructured_ingest/embed/openai.py,sha256=
|
|
279
|
-
unstructured_ingest/embed/togetherai.py,sha256=
|
|
280
|
-
unstructured_ingest/embed/vertexai.py,sha256=
|
|
281
|
-
unstructured_ingest/embed/voyageai.py,sha256=
|
|
275
|
+
unstructured_ingest/embed/huggingface.py,sha256=Avcc16st9Cp2xGScG6TeNEEd3T8YjjnESNN4OdIlnh0,2119
|
|
276
|
+
unstructured_ingest/embed/interfaces.py,sha256=7jsQ3rLOXy1hq__muf-EPcLnv17XzNQaD05AyGbZeNo,3739
|
|
277
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=OhF5cMxWMq8-0mt8_-Xe3ZkjGjf2u6QYzfzgHnOEYtU,6838
|
|
278
|
+
unstructured_ingest/embed/octoai.py,sha256=oLNlM02W1CNUYRG_j6qWyI7yE24vYGKYradNzeeP6mE,5062
|
|
279
|
+
unstructured_ingest/embed/openai.py,sha256=H1sURGuRvXBUSXJcAVzrLObV5wSCVM29tkaXJ-9ZR30,4727
|
|
280
|
+
unstructured_ingest/embed/togetherai.py,sha256=SUd16JEUPlR8aCrd4q_T3CHwMTRUi-1yenq_r1AWlak,4266
|
|
281
|
+
unstructured_ingest/embed/vertexai.py,sha256=CPptS7U5W1CgvxIN8CgVz5J1Ia4FctV6BsmpN9c92A0,4890
|
|
282
|
+
unstructured_ingest/embed/voyageai.py,sha256=lydMASUDcTuyfWBPS3uIqDJPQbjf95bEI5Kr4tytONs,5111
|
|
282
283
|
unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
|
|
283
284
|
unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
|
|
284
285
|
unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
|
|
@@ -363,7 +364,7 @@ unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSz
|
|
|
363
364
|
unstructured_ingest/utils/data_prep.py,sha256=X3d8Kos1zqX-HQAicF_8TB0BrstRtHrbMzu_1s7mj7M,7191
|
|
364
365
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
365
366
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
366
|
-
unstructured_ingest/utils/html.py,sha256=
|
|
367
|
+
unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
|
|
367
368
|
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
368
369
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_ZhW-FEcMC2sCNsSNwDOjo,1729
|
|
369
370
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
@@ -386,19 +387,19 @@ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdj
|
|
|
386
387
|
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
387
388
|
unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
|
|
388
389
|
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
|
|
389
|
-
unstructured_ingest/v2/interfaces/__init__.py,sha256=
|
|
390
|
+
unstructured_ingest/v2/interfaces/__init__.py,sha256=Xp7-345QpM6MG7V7G4ZrVERjADAUBiPAY88PKaMRyqY,1005
|
|
390
391
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
391
392
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
392
393
|
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
393
|
-
unstructured_ingest/v2/interfaces/indexer.py,sha256=
|
|
394
|
-
unstructured_ingest/v2/interfaces/process.py,sha256=
|
|
394
|
+
unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
|
|
395
|
+
unstructured_ingest/v2/interfaces/process.py,sha256=6Ll0O9ATcdm36dx2_TOg9PfCEJrADgyd8OQK3TTNzZM,448
|
|
395
396
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
396
397
|
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=9EV9863ODDv0Y5liDT3xh2yiVuFiaVVyCcnwCy6nfkM,3172
|
|
397
|
-
unstructured_ingest/v2/interfaces/uploader.py,sha256=
|
|
398
|
+
unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-iZPCVsUaL0rljcME,2090
|
|
398
399
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
400
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
400
401
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
401
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
402
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=y6AkUBUL2r3t4OO0jWKomtN3v8U7EDtMPrJ8VYRo7VM,16344
|
|
402
403
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
403
404
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
404
405
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
@@ -412,7 +413,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
|
|
|
412
413
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
413
414
|
unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
|
|
414
415
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
415
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
416
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=uiuCOSwwasHp4eqtewMvgnM86WVch7HDFiWqpGLahvo,7812
|
|
416
417
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
417
418
|
unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
|
|
418
419
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
@@ -421,7 +422,7 @@ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XE
|
|
|
421
422
|
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
|
|
422
423
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
423
424
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
424
|
-
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=
|
|
425
|
+
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=_zkiST0FTggEKNORalCcZZIRGZKnCM0LLcavgQZfDVE,11112
|
|
425
426
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
426
427
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
|
|
427
428
|
unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
|
|
@@ -432,7 +433,7 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
|
|
|
432
433
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
433
434
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
434
435
|
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
|
|
435
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=sVRk1LodwVS9do3kmetO8kvSdEzfR-oATXa6covC64Y,17365
|
|
436
437
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
437
438
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
|
|
438
439
|
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
@@ -441,6 +442,7 @@ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtl
|
|
|
441
442
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
442
443
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
443
444
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
445
|
+
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
444
446
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
|
|
445
447
|
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=Yj4fIxgGo9Qh1x_6-INdbrPGHCuZElu-QKNfSVtW7F4,7694
|
|
446
448
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
|
|
@@ -558,10 +560,10 @@ unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWa
|
|
|
558
560
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
559
561
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
560
562
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
561
|
-
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=
|
|
562
|
-
unstructured_ingest-0.4.
|
|
563
|
-
unstructured_ingest-0.4.
|
|
564
|
-
unstructured_ingest-0.4.
|
|
565
|
-
unstructured_ingest-0.4.
|
|
566
|
-
unstructured_ingest-0.4.
|
|
567
|
-
unstructured_ingest-0.4.
|
|
563
|
+
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
|
|
564
|
+
unstructured_ingest-0.4.4.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
565
|
+
unstructured_ingest-0.4.4.dist-info/METADATA,sha256=h_Yeg9jJuyJmsipS3juMfEozK8U6sNyA-PotmiuuBsE,8051
|
|
566
|
+
unstructured_ingest-0.4.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
567
|
+
unstructured_ingest-0.4.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
568
|
+
unstructured_ingest-0.4.4.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
569
|
+
unstructured_ingest-0.4.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|