unstructured-ingest 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/weaviate/test_local.py +27 -6
- test/integration/embedders/test_azure_openai.py +1 -3
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -3
- test/integration/embedders/test_mixedbread.py +2 -2
- test/integration/embedders/test_octoai.py +2 -4
- test/integration/embedders/test_openai.py +2 -4
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +2 -4
- test/integration/embedders/test_voyageai.py +2 -4
- test/integration/embedders/utils.py +12 -14
- test/unit/embed/test_openai.py +12 -4
- test/unit/test_html.py +112 -0
- test/unit/v2/embedders/test_voyageai.py +1 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/huggingface.py +6 -1
- unstructured_ingest/embed/interfaces.py +9 -6
- unstructured_ingest/embed/mixedbreadai.py +3 -10
- unstructured_ingest/embed/octoai.py +14 -7
- unstructured_ingest/embed/openai.py +18 -5
- unstructured_ingest/embed/togetherai.py +19 -8
- unstructured_ingest/embed/vertexai.py +13 -6
- unstructured_ingest/embed/voyageai.py +19 -6
- unstructured_ingest/utils/html.py +143 -93
- unstructured_ingest/v2/interfaces/__init__.py +2 -1
- unstructured_ingest/v2/interfaces/process.py +3 -0
- unstructured_ingest/v2/interfaces/uploader.py +14 -1
- unstructured_ingest/v2/pipeline/pipeline.py +20 -6
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +15 -22
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +41 -3
- unstructured_ingest/v2/processes/embedder.py +3 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/METADATA +22 -22
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/RECORD +38 -36
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from datetime import date, datetime
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
7
8
|
|
|
8
9
|
from dateutil import parser
|
|
@@ -15,10 +16,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
16
|
AccessConfig,
|
|
16
17
|
ConnectionConfig,
|
|
17
18
|
FileData,
|
|
18
|
-
Uploader,
|
|
19
19
|
UploaderConfig,
|
|
20
20
|
UploadStager,
|
|
21
21
|
UploadStagerConfig,
|
|
22
|
+
VectorDBUploader,
|
|
22
23
|
)
|
|
23
24
|
from unstructured_ingest.v2.logger import logger
|
|
24
25
|
|
|
@@ -160,7 +161,9 @@ class WeaviateUploadStager(UploadStager):
|
|
|
160
161
|
|
|
161
162
|
|
|
162
163
|
class WeaviateUploaderConfig(UploaderConfig):
|
|
163
|
-
collection: str = Field(
|
|
164
|
+
collection: Optional[str] = Field(
|
|
165
|
+
description="The name of the collection this object belongs to", default=None
|
|
166
|
+
)
|
|
164
167
|
batch_size: Optional[int] = Field(default=None, description="Number of records per batch")
|
|
165
168
|
requests_per_minute: Optional[int] = Field(default=None, description="Rate limit for upload")
|
|
166
169
|
dynamic_batch: bool = Field(default=True, description="Whether to use dynamic batch")
|
|
@@ -205,17 +208,50 @@ class WeaviateUploaderConfig(UploaderConfig):
|
|
|
205
208
|
|
|
206
209
|
|
|
207
210
|
@dataclass
|
|
208
|
-
class WeaviateUploader(
|
|
211
|
+
class WeaviateUploader(VectorDBUploader, ABC):
|
|
209
212
|
upload_config: WeaviateUploaderConfig
|
|
210
213
|
connection_config: WeaviateConnectionConfig
|
|
211
214
|
|
|
215
|
+
def _collection_exists(self, collection_name: Optional[str] = None):
|
|
216
|
+
collection_name = collection_name or self.upload_config.collection
|
|
217
|
+
with self.connection_config.get_client() as weaviate_client:
|
|
218
|
+
return weaviate_client.collections.exists(name=collection_name)
|
|
219
|
+
|
|
212
220
|
def precheck(self) -> None:
|
|
213
221
|
try:
|
|
214
222
|
self.connection_config.get_client()
|
|
223
|
+
# only if collection name populated should we check that it exists
|
|
224
|
+
if self.upload_config.collection and not self._collection_exists():
|
|
225
|
+
raise DestinationConnectionError(
|
|
226
|
+
f"collection '{self.upload_config.collection}' does not exist"
|
|
227
|
+
)
|
|
215
228
|
except Exception as e:
|
|
216
229
|
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
217
230
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
218
231
|
|
|
232
|
+
def init(self, *kwargs: Any) -> None:
|
|
233
|
+
self.create_destination()
|
|
234
|
+
|
|
235
|
+
def create_destination(
|
|
236
|
+
self, destination_name: str = "elements", vector_length: Optional[int] = None, **kwargs: Any
|
|
237
|
+
) -> bool:
|
|
238
|
+
collection_name = self.upload_config.collection or destination_name
|
|
239
|
+
self.upload_config.collection = collection_name
|
|
240
|
+
connectors_dir = Path(__file__).parents[1]
|
|
241
|
+
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
242
|
+
with collection_config_file.open() as f:
|
|
243
|
+
collection_config = json.load(f)
|
|
244
|
+
collection_config["class"] = collection_name
|
|
245
|
+
if not self._collection_exists():
|
|
246
|
+
logger.info(
|
|
247
|
+
f"creating default weaviate collection '{collection_name}' with default configs"
|
|
248
|
+
)
|
|
249
|
+
with self.connection_config.get_client() as weaviate_client:
|
|
250
|
+
weaviate_client.collections.create_from_dict(config=collection_config)
|
|
251
|
+
return True
|
|
252
|
+
logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
|
|
253
|
+
return False
|
|
254
|
+
|
|
219
255
|
def check_for_errors(self, client: "WeaviateClient") -> None:
|
|
220
256
|
failed_uploads = client.batch.failed_objects
|
|
221
257
|
if failed_uploads:
|
|
@@ -253,6 +289,8 @@ class WeaviateUploader(Uploader, ABC):
|
|
|
253
289
|
f"writing {len(data)} objects to destination "
|
|
254
290
|
f"class {self.connection_config.access_config} "
|
|
255
291
|
)
|
|
292
|
+
if not self.upload_config.collection:
|
|
293
|
+
raise ValueError("No collection specified")
|
|
256
294
|
|
|
257
295
|
with self.connection_config.get_client() as weaviate_client:
|
|
258
296
|
self.delete_by_record_id(client=weaviate_client, file_data=file_data)
|
|
@@ -184,6 +184,9 @@ class EmbedderConfig(BaseModel):
|
|
|
184
184
|
class Embedder(BaseProcess, ABC):
|
|
185
185
|
config: EmbedderConfig
|
|
186
186
|
|
|
187
|
+
def init(self, *kwargs: Any) -> None:
|
|
188
|
+
self.config.get_embedder().initialize()
|
|
189
|
+
|
|
187
190
|
def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
|
|
188
191
|
# TODO update base embedder classes to support async
|
|
189
192
|
embedder = self.config.get_embedder()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,38 +22,38 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: pydantic>=2.7
|
|
26
|
-
Requires-Dist: click
|
|
27
|
-
Requires-Dist: tqdm
|
|
28
25
|
Requires-Dist: dataclasses-json
|
|
29
26
|
Requires-Dist: pandas
|
|
30
|
-
Requires-Dist: opentelemetry-sdk
|
|
31
27
|
Requires-Dist: python-dateutil
|
|
28
|
+
Requires-Dist: opentelemetry-sdk
|
|
29
|
+
Requires-Dist: click
|
|
30
|
+
Requires-Dist: pydantic>=2.7
|
|
31
|
+
Requires-Dist: tqdm
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
35
35
|
Requires-Dist: astrapy; extra == "astradb"
|
|
36
36
|
Provides-Extra: azure
|
|
37
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
38
37
|
Requires-Dist: fsspec; extra == "azure"
|
|
38
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
39
39
|
Provides-Extra: azure-ai-search
|
|
40
40
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
41
41
|
Provides-Extra: bedrock
|
|
42
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
43
42
|
Requires-Dist: boto3; extra == "bedrock"
|
|
43
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
44
44
|
Provides-Extra: biomed
|
|
45
45
|
Requires-Dist: bs4; extra == "biomed"
|
|
46
46
|
Requires-Dist: requests; extra == "biomed"
|
|
47
47
|
Provides-Extra: box
|
|
48
|
-
Requires-Dist: boxfs; extra == "box"
|
|
49
48
|
Requires-Dist: fsspec; extra == "box"
|
|
49
|
+
Requires-Dist: boxfs; extra == "box"
|
|
50
50
|
Provides-Extra: chroma
|
|
51
51
|
Requires-Dist: chromadb; extra == "chroma"
|
|
52
52
|
Provides-Extra: clarifai
|
|
53
53
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
54
54
|
Provides-Extra: confluence
|
|
55
|
-
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
56
55
|
Requires-Dist: requests; extra == "confluence"
|
|
56
|
+
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
57
57
|
Provides-Extra: couchbase
|
|
58
58
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
59
59
|
Provides-Extra: csv
|
|
@@ -63,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
63
63
|
Provides-Extra: databricks-volumes
|
|
64
64
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
65
65
|
Provides-Extra: delta-table
|
|
66
|
-
Requires-Dist: deltalake; extra == "delta-table"
|
|
67
66
|
Requires-Dist: boto3; extra == "delta-table"
|
|
67
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
68
68
|
Provides-Extra: discord
|
|
69
69
|
Requires-Dist: discord.py; extra == "discord"
|
|
70
70
|
Provides-Extra: doc
|
|
@@ -72,8 +72,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
72
72
|
Provides-Extra: docx
|
|
73
73
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
74
74
|
Provides-Extra: dropbox
|
|
75
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
76
75
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
76
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
77
77
|
Provides-Extra: duckdb
|
|
78
78
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
79
79
|
Provides-Extra: elasticsearch
|
|
@@ -92,12 +92,12 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
92
92
|
Provides-Extra: epub
|
|
93
93
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
94
94
|
Provides-Extra: gcs
|
|
95
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
96
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
97
95
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
96
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
97
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
98
98
|
Provides-Extra: github
|
|
99
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
100
99
|
Requires-Dist: requests; extra == "github"
|
|
100
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
101
101
|
Provides-Extra: gitlab
|
|
102
102
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
103
103
|
Provides-Extra: google-drive
|
|
@@ -122,9 +122,9 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
122
122
|
Provides-Extra: msg
|
|
123
123
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
124
124
|
Provides-Extra: neo4j
|
|
125
|
+
Requires-Dist: neo4j; extra == "neo4j"
|
|
125
126
|
Requires-Dist: networkx; extra == "neo4j"
|
|
126
127
|
Requires-Dist: cymple; extra == "neo4j"
|
|
127
|
-
Requires-Dist: neo4j; extra == "neo4j"
|
|
128
128
|
Provides-Extra: notion
|
|
129
129
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
130
130
|
Requires-Dist: backoff; extra == "notion"
|
|
@@ -133,9 +133,9 @@ Requires-Dist: httpx; extra == "notion"
|
|
|
133
133
|
Provides-Extra: odt
|
|
134
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
136
137
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
138
|
Requires-Dist: bs4; extra == "onedrive"
|
|
138
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
139
139
|
Provides-Extra: openai
|
|
140
140
|
Requires-Dist: openai; extra == "openai"
|
|
141
141
|
Requires-Dist: tiktoken; extra == "openai"
|
|
@@ -144,8 +144,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
|
144
144
|
Provides-Extra: org
|
|
145
145
|
Requires-Dist: unstructured[org]; extra == "org"
|
|
146
146
|
Provides-Extra: outlook
|
|
147
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
148
147
|
Requires-Dist: msal; extra == "outlook"
|
|
148
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
149
149
|
Provides-Extra: pdf
|
|
150
150
|
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
151
151
|
Provides-Extra: pinecone
|
|
@@ -174,11 +174,11 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
174
174
|
Provides-Extra: salesforce
|
|
175
175
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
176
176
|
Provides-Extra: sftp
|
|
177
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
178
177
|
Requires-Dist: fsspec; extra == "sftp"
|
|
178
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
179
179
|
Provides-Extra: sharepoint
|
|
180
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
181
180
|
Requires-Dist: msal; extra == "sharepoint"
|
|
181
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
182
182
|
Provides-Extra: singlestore
|
|
183
183
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
184
184
|
Provides-Extra: slack
|
|
@@ -191,13 +191,13 @@ Requires-Dist: together; extra == "togetherai"
|
|
|
191
191
|
Provides-Extra: tsv
|
|
192
192
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
193
193
|
Provides-Extra: vastdb
|
|
194
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
194
195
|
Requires-Dist: ibis; extra == "vastdb"
|
|
195
196
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
196
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
197
197
|
Provides-Extra: vectara
|
|
198
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
199
|
-
Requires-Dist: requests; extra == "vectara"
|
|
200
198
|
Requires-Dist: httpx; extra == "vectara"
|
|
199
|
+
Requires-Dist: requests; extra == "vectara"
|
|
200
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
201
201
|
Provides-Extra: weaviate
|
|
202
202
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
203
203
|
Provides-Extra: wikipedia
|
|
@@ -51,29 +51,30 @@ test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_Jj
|
|
|
51
51
|
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
53
53
|
test/integration/connectors/weaviate/test_cloud.py,sha256=U1ZS6a7wTPX7h3XGvaJHaT-Uwg4IeGgzxx1YBywgVhM,1284
|
|
54
|
-
test/integration/connectors/weaviate/test_local.py,sha256=
|
|
54
|
+
test/integration/connectors/weaviate/test_local.py,sha256=gXMpnzVcrNQdptDjx0haPWBU-dm1MQTkalgxocI3-L8,5287
|
|
55
55
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
57
|
-
test/integration/embedders/test_azure_openai.py,sha256=
|
|
58
|
-
test/integration/embedders/test_bedrock.py,sha256=
|
|
59
|
-
test/integration/embedders/test_huggingface.py,sha256=
|
|
60
|
-
test/integration/embedders/test_mixedbread.py,sha256=
|
|
61
|
-
test/integration/embedders/test_octoai.py,sha256=
|
|
62
|
-
test/integration/embedders/test_openai.py,sha256=
|
|
63
|
-
test/integration/embedders/test_togetherai.py,sha256=
|
|
64
|
-
test/integration/embedders/test_vertexai.py,sha256=
|
|
65
|
-
test/integration/embedders/test_voyageai.py,sha256=
|
|
66
|
-
test/integration/embedders/utils.py,sha256=
|
|
57
|
+
test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
|
|
58
|
+
test/integration/embedders/test_bedrock.py,sha256=ZehreheLgY9Bqdjk-3MQOaou9IP-H3Pcz7WWiOWAxTU,3557
|
|
59
|
+
test/integration/embedders/test_huggingface.py,sha256=qFblyXounVNRaNkk3gbKoBqU5E2dNecgKU2Bz2LyOa8,989
|
|
60
|
+
test/integration/embedders/test_mixedbread.py,sha256=lLz_cooyC38VSo-FMHbhKpHvYs3QzA20NOIvM5oooaw,1998
|
|
61
|
+
test/integration/embedders/test_octoai.py,sha256=qs-bqZ7iGWO_BzUZvKJmOHBT3cmFSkEYbleWhj3snJc,2197
|
|
62
|
+
test/integration/embedders/test_openai.py,sha256=9XioXuvdnbh_3vRmRwpMsi1D5heCcY7KA4nHb5vOU_M,2127
|
|
63
|
+
test/integration/embedders/test_togetherai.py,sha256=hsg3c3SGJGd93unz4-VLYmFXxLA1vmrD5xK5Gj-g0R4,2205
|
|
64
|
+
test/integration/embedders/test_vertexai.py,sha256=4-E4plJXFf1b02RhOqOCBHR2GA4gTnc8K4AnHm6EgPU,1830
|
|
65
|
+
test/integration/embedders/test_voyageai.py,sha256=Gm3sVjhsym1ASIDfr-sZoCbpsNMaAk_l4E3-dtjRCQ4,1832
|
|
66
|
+
test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
|
|
67
67
|
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
|
|
69
69
|
test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
70
|
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
71
|
+
test/unit/test_html.py,sha256=LKGi_QaH4U4gktrbd2NcURL-d-0Rm1UnG5Y6r9EvTG0,4489
|
|
71
72
|
test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
|
|
72
73
|
test/unit/test_utils.py,sha256=Q6mp9YZPah8z3-2lreyRbmAc7m2Y_w26_N9vocSInoA,5421
|
|
73
74
|
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
75
|
test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
|
|
75
76
|
test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
|
|
76
|
-
test/unit/embed/test_openai.py,sha256=
|
|
77
|
+
test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
|
|
77
78
|
test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
|
|
78
79
|
test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
|
|
79
80
|
test/unit/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -95,13 +96,13 @@ test/unit/v2/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN
|
|
|
95
96
|
test/unit/v2/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
|
|
96
97
|
test/unit/v2/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
|
|
97
98
|
test/unit/v2/embedders/test_vertexai.py,sha256=_4a0tw_GbyvgYJSrP1yw1KjEQJYGzqR5yNXBCSdK8yQ,1145
|
|
98
|
-
test/unit/v2/embedders/test_voyageai.py,sha256=
|
|
99
|
+
test/unit/v2/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
|
|
99
100
|
test/unit/v2/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
101
|
test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U-dS0ga6h04h7WSfg,2281
|
|
101
102
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
103
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
103
104
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
104
|
-
unstructured_ingest/__version__.py,sha256=
|
|
105
|
+
unstructured_ingest/__version__.py,sha256=C0tWanpqRzvQsOclLMfAsEjPaa-5I3hXoMIvdtnb1w4,42
|
|
105
106
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
106
107
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
107
108
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -271,14 +272,14 @@ unstructured_ingest/connector/notion/types/database_properties/verification.py,s
|
|
|
271
272
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
272
273
|
unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
|
|
273
274
|
unstructured_ingest/embed/bedrock.py,sha256=50G8PBEdW3ILwyWXAWl4w-gUA9I0AR7LuFq6NLz-sWI,7284
|
|
274
|
-
unstructured_ingest/embed/huggingface.py,sha256=
|
|
275
|
-
unstructured_ingest/embed/interfaces.py,sha256=
|
|
276
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256=
|
|
277
|
-
unstructured_ingest/embed/octoai.py,sha256=
|
|
278
|
-
unstructured_ingest/embed/openai.py,sha256=
|
|
279
|
-
unstructured_ingest/embed/togetherai.py,sha256=
|
|
280
|
-
unstructured_ingest/embed/vertexai.py,sha256=
|
|
281
|
-
unstructured_ingest/embed/voyageai.py,sha256=
|
|
275
|
+
unstructured_ingest/embed/huggingface.py,sha256=Avcc16st9Cp2xGScG6TeNEEd3T8YjjnESNN4OdIlnh0,2119
|
|
276
|
+
unstructured_ingest/embed/interfaces.py,sha256=7jsQ3rLOXy1hq__muf-EPcLnv17XzNQaD05AyGbZeNo,3739
|
|
277
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=OhF5cMxWMq8-0mt8_-Xe3ZkjGjf2u6QYzfzgHnOEYtU,6838
|
|
278
|
+
unstructured_ingest/embed/octoai.py,sha256=oLNlM02W1CNUYRG_j6qWyI7yE24vYGKYradNzeeP6mE,5062
|
|
279
|
+
unstructured_ingest/embed/openai.py,sha256=H1sURGuRvXBUSXJcAVzrLObV5wSCVM29tkaXJ-9ZR30,4727
|
|
280
|
+
unstructured_ingest/embed/togetherai.py,sha256=SUd16JEUPlR8aCrd4q_T3CHwMTRUi-1yenq_r1AWlak,4266
|
|
281
|
+
unstructured_ingest/embed/vertexai.py,sha256=CPptS7U5W1CgvxIN8CgVz5J1Ia4FctV6BsmpN9c92A0,4890
|
|
282
|
+
unstructured_ingest/embed/voyageai.py,sha256=lydMASUDcTuyfWBPS3uIqDJPQbjf95bEI5Kr4tytONs,5111
|
|
282
283
|
unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
|
|
283
284
|
unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
|
|
284
285
|
unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
|
|
@@ -363,7 +364,7 @@ unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSz
|
|
|
363
364
|
unstructured_ingest/utils/data_prep.py,sha256=X3d8Kos1zqX-HQAicF_8TB0BrstRtHrbMzu_1s7mj7M,7191
|
|
364
365
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
365
366
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
366
|
-
unstructured_ingest/utils/html.py,sha256=
|
|
367
|
+
unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
|
|
367
368
|
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
368
369
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_ZhW-FEcMC2sCNsSNwDOjo,1729
|
|
369
370
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
@@ -386,19 +387,19 @@ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdj
|
|
|
386
387
|
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
387
388
|
unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
|
|
388
389
|
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
|
|
389
|
-
unstructured_ingest/v2/interfaces/__init__.py,sha256=
|
|
390
|
+
unstructured_ingest/v2/interfaces/__init__.py,sha256=Xp7-345QpM6MG7V7G4ZrVERjADAUBiPAY88PKaMRyqY,1005
|
|
390
391
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
391
392
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
392
393
|
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
393
394
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
394
|
-
unstructured_ingest/v2/interfaces/process.py,sha256=
|
|
395
|
+
unstructured_ingest/v2/interfaces/process.py,sha256=6Ll0O9ATcdm36dx2_TOg9PfCEJrADgyd8OQK3TTNzZM,448
|
|
395
396
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
396
397
|
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=9EV9863ODDv0Y5liDT3xh2yiVuFiaVVyCcnwCy6nfkM,3172
|
|
397
|
-
unstructured_ingest/v2/interfaces/uploader.py,sha256=
|
|
398
|
+
unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-iZPCVsUaL0rljcME,2090
|
|
398
399
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
400
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
400
401
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
401
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
402
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=y6AkUBUL2r3t4OO0jWKomtN3v8U7EDtMPrJ8VYRo7VM,16344
|
|
402
403
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
403
404
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
404
405
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
@@ -412,7 +413,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
|
|
|
412
413
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
413
414
|
unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
|
|
414
415
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
415
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
416
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=uiuCOSwwasHp4eqtewMvgnM86WVch7HDFiWqpGLahvo,7812
|
|
416
417
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
417
418
|
unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
|
|
418
419
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
@@ -421,7 +422,7 @@ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XE
|
|
|
421
422
|
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
|
|
422
423
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
423
424
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
424
|
-
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=
|
|
425
|
+
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=_zkiST0FTggEKNORalCcZZIRGZKnCM0LLcavgQZfDVE,11112
|
|
425
426
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
426
427
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
|
|
427
428
|
unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
|
|
@@ -441,6 +442,7 @@ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtl
|
|
|
441
442
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
442
443
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
443
444
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
445
|
+
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
444
446
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
|
|
445
447
|
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=Yj4fIxgGo9Qh1x_6-INdbrPGHCuZElu-QKNfSVtW7F4,7694
|
|
446
448
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
|
|
@@ -558,10 +560,10 @@ unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWa
|
|
|
558
560
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
559
561
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
560
562
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
561
|
-
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=
|
|
562
|
-
unstructured_ingest-0.4.
|
|
563
|
-
unstructured_ingest-0.4.
|
|
564
|
-
unstructured_ingest-0.4.
|
|
565
|
-
unstructured_ingest-0.4.
|
|
566
|
-
unstructured_ingest-0.4.
|
|
567
|
-
unstructured_ingest-0.4.
|
|
563
|
+
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
|
|
564
|
+
unstructured_ingest-0.4.3.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
565
|
+
unstructured_ingest-0.4.3.dist-info/METADATA,sha256=UXXbx1Vr9zdcvAfOdgabURlB8nR2I8Lo_aDTN1PNjwU,8051
|
|
566
|
+
unstructured_ingest-0.4.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
567
|
+
unstructured_ingest-0.4.3.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
568
|
+
unstructured_ingest-0.4.3.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
569
|
+
unstructured_ingest-0.4.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.2.dist-info → unstructured_ingest-0.4.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|