unstructured-ingest 0.5.5__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_pinecone.py +34 -0
- test/integration/connectors/weaviate/test_local.py +3 -3
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/interfaces/process.py +1 -1
- unstructured_ingest/v2/interfaces/uploader.py +2 -2
- unstructured_ingest/v2/pipeline/pipeline.py +26 -31
- unstructured_ingest/v2/processes/connectors/pinecone.py +93 -5
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +13 -2
- unstructured_ingest/v2/processes/embedder.py +1 -1
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/METADATA +17 -17
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/RECORD +15 -15
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/top_level.txt +0 -0
|
@@ -351,3 +351,37 @@ def test_pinecone_stager(
|
|
|
351
351
|
stager=stager,
|
|
352
352
|
tmp_dir=tmp_path,
|
|
353
353
|
)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@requires_env(API_KEY)
|
|
357
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
358
|
+
def test_pinecone_create_destination(pinecone_index):
|
|
359
|
+
uploader = PineconeUploader(
|
|
360
|
+
connection_config=PineconeConnectionConfig(
|
|
361
|
+
access_config=PineconeAccessConfig(api_key=get_api_key())
|
|
362
|
+
),
|
|
363
|
+
upload_config=PineconeUploaderConfig(),
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
random_id = str(uuid4()).split("-")[0]
|
|
367
|
+
|
|
368
|
+
index_name = f"test-create-destination-{random_id}"
|
|
369
|
+
|
|
370
|
+
assert not uploader.index_exists(index_name=index_name)
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
uploader.create_destination(destination_name=index_name, vector_length=1536)
|
|
374
|
+
except Exception as e:
|
|
375
|
+
error_body = getattr(e, "body", None)
|
|
376
|
+
raise pytest.fail(f"failed to create destination: {e} {error_body}")
|
|
377
|
+
|
|
378
|
+
assert uploader.index_exists(index_name=index_name), "destination was not created successfully"
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
pc = uploader.connection_config.get_client()
|
|
382
|
+
logger.info(f"deleting index for test create destination: {index_name}")
|
|
383
|
+
pc.delete_index(name=index_name)
|
|
384
|
+
except Exception as e:
|
|
385
|
+
raise pytest.fail(f"failed to cleanup / delete the destination: {e}")
|
|
386
|
+
|
|
387
|
+
assert not uploader.index_exists(index_name=index_name), "cleanup failed"
|
|
@@ -78,7 +78,6 @@ def run_uploader_and_validate(
|
|
|
78
78
|
validate_count(expected_count=expected_count)
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
@pytest.mark.asyncio
|
|
82
81
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
83
82
|
def test_weaviate_local_destination(upload_file: Path, collection: str, tmp_path: Path):
|
|
84
83
|
file_data = FileData(
|
|
@@ -142,11 +141,12 @@ def test_weaviate_local_create_destination(weaviate_instance):
|
|
|
142
141
|
upload_config=LocalWeaviateUploaderConfig(),
|
|
143
142
|
connection_config=LocalWeaviateConnectionConfig(),
|
|
144
143
|
)
|
|
145
|
-
collection_name = "system_created"
|
|
144
|
+
collection_name = "system_created-123"
|
|
145
|
+
formatted_collection_name = "System_created_123"
|
|
146
146
|
created = uploader.create_destination(destination_name=collection_name)
|
|
147
147
|
assert created
|
|
148
148
|
with uploader.connection_config.get_client() as weaviate_client:
|
|
149
|
-
assert weaviate_client.collections.exists(name=
|
|
149
|
+
assert weaviate_client.collections.exists(name=formatted_collection_name)
|
|
150
150
|
|
|
151
151
|
created = uploader.create_destination(destination_name=collection_name)
|
|
152
152
|
assert not created
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.6" # pragma: no cover
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from abc import ABC
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, TypeVar
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
@@ -61,6 +61,6 @@ class Uploader(BaseProcess, BaseConnector, ABC):
|
|
|
61
61
|
@dataclass
|
|
62
62
|
class VectorDBUploader(Uploader, ABC):
|
|
63
63
|
def create_destination(
|
|
64
|
-
self, destination_name: str = "elements",
|
|
64
|
+
self, vector_length: int, destination_name: str = "elements", **kwargs: Any
|
|
65
65
|
) -> bool:
|
|
66
66
|
return False
|
|
@@ -126,14 +126,32 @@ class Pipeline:
|
|
|
126
126
|
for kk, vv in v.items():
|
|
127
127
|
logger.error(f"{k}: [{kk}] {vv}")
|
|
128
128
|
|
|
129
|
+
def _run_initialization(self):
|
|
130
|
+
failures = {}
|
|
131
|
+
init_kwargs = {}
|
|
132
|
+
for step in self._get_ordered_steps():
|
|
133
|
+
try:
|
|
134
|
+
step.process.init(**init_kwargs)
|
|
135
|
+
step.process.precheck()
|
|
136
|
+
# Make sure embedder dimensions available for downstream steps
|
|
137
|
+
if isinstance(step.process, Embedder):
|
|
138
|
+
embed_dimensions = step.process.config.get_embedder().dimension
|
|
139
|
+
init_kwargs["vector_length"] = embed_dimensions
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
failures[step.process.__class__.__name__] = f"[{type(e).__name__}] {e}"
|
|
143
|
+
if failures:
|
|
144
|
+
for k, v in failures.items():
|
|
145
|
+
logger.error(f"Step initialization failure: {k}: {v}")
|
|
146
|
+
raise PipelineError("Initialization failed")
|
|
147
|
+
|
|
129
148
|
def run(self):
|
|
130
149
|
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint, log_out=logger.info)
|
|
131
150
|
try:
|
|
132
151
|
with otel_handler.get_tracer().start_as_current_span(
|
|
133
152
|
"ingest process", record_exception=True
|
|
134
153
|
):
|
|
135
|
-
self.
|
|
136
|
-
self._run_prechecks()
|
|
154
|
+
self._run_initialization()
|
|
137
155
|
self._run()
|
|
138
156
|
finally:
|
|
139
157
|
self.log_statuses()
|
|
@@ -154,43 +172,20 @@ class Pipeline:
|
|
|
154
172
|
final = [f for f in flat if f]
|
|
155
173
|
return final or None
|
|
156
174
|
|
|
157
|
-
def
|
|
158
|
-
steps = [self.indexer_step, self.downloader_step
|
|
175
|
+
def _get_ordered_steps(self) -> list[PipelineStep]:
|
|
176
|
+
steps = [self.indexer_step, self.downloader_step]
|
|
177
|
+
if self.uncompress_step:
|
|
178
|
+
steps.append(self.uncompress_step)
|
|
179
|
+
steps.append(self.partitioner_step)
|
|
159
180
|
if self.chunker_step:
|
|
160
181
|
steps.append(self.chunker_step)
|
|
161
182
|
if self.embedder_step:
|
|
162
183
|
steps.append(self.embedder_step)
|
|
163
|
-
if self.uncompress_step:
|
|
164
|
-
steps.append(self.uncompress_step)
|
|
165
184
|
if self.stager_step:
|
|
166
185
|
steps.append(self.stager_step)
|
|
186
|
+
steps.append(self.uploader_step)
|
|
167
187
|
return steps
|
|
168
188
|
|
|
169
|
-
def _run_inits(self):
|
|
170
|
-
failures = {}
|
|
171
|
-
|
|
172
|
-
for step in self._get_all_steps():
|
|
173
|
-
try:
|
|
174
|
-
step.process.init()
|
|
175
|
-
except Exception as e:
|
|
176
|
-
failures[step.process.__class__.__name__] = f"[{type(e).__name__}] {e}"
|
|
177
|
-
if failures:
|
|
178
|
-
for k, v in failures.items():
|
|
179
|
-
logger.error(f"Step init failure: {k}: {v}")
|
|
180
|
-
raise PipelineError("Init failed")
|
|
181
|
-
|
|
182
|
-
def _run_prechecks(self):
|
|
183
|
-
failures = {}
|
|
184
|
-
for step in self._get_all_steps():
|
|
185
|
-
try:
|
|
186
|
-
step.process.precheck()
|
|
187
|
-
except Exception as e:
|
|
188
|
-
failures[step.process.__class__.__name__] = f"[{type(e).__name__}] {e}"
|
|
189
|
-
if failures:
|
|
190
|
-
for k, v in failures.items():
|
|
191
|
-
logger.error(f"Step precheck failure: {k}: {v}")
|
|
192
|
-
raise PipelineError("Precheck failed")
|
|
193
|
-
|
|
194
189
|
def apply_filter(self, records: list[dict]) -> list[dict]:
|
|
195
190
|
if not self.filter_step:
|
|
196
191
|
return records
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import re
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
4
5
|
|
|
5
6
|
from pydantic import Field, Secret
|
|
6
7
|
|
|
@@ -13,10 +14,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
13
14
|
AccessConfig,
|
|
14
15
|
ConnectionConfig,
|
|
15
16
|
FileData,
|
|
16
|
-
Uploader,
|
|
17
17
|
UploaderConfig,
|
|
18
18
|
UploadStager,
|
|
19
19
|
UploadStagerConfig,
|
|
20
|
+
VectorDBUploader,
|
|
20
21
|
)
|
|
21
22
|
from unstructured_ingest.v2.logger import logger
|
|
22
23
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
@@ -41,7 +42,7 @@ class PineconeAccessConfig(AccessConfig):
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
class PineconeConnectionConfig(ConnectionConfig):
|
|
44
|
-
index_name: str = Field(description="Name of the index to connect to.")
|
|
45
|
+
index_name: Optional[str] = Field(description="Name of the index to connect to.", default=None)
|
|
45
46
|
access_config: Secret[PineconeAccessConfig] = Field(
|
|
46
47
|
default=PineconeAccessConfig(), validate_default=True
|
|
47
48
|
)
|
|
@@ -160,18 +161,101 @@ class PineconeUploadStager(UploadStager):
|
|
|
160
161
|
|
|
161
162
|
|
|
162
163
|
@dataclass
|
|
163
|
-
class PineconeUploader(
|
|
164
|
+
class PineconeUploader(VectorDBUploader):
|
|
164
165
|
upload_config: PineconeUploaderConfig
|
|
165
166
|
connection_config: PineconeConnectionConfig
|
|
166
167
|
connector_type: str = CONNECTOR_TYPE
|
|
167
168
|
|
|
169
|
+
def init(self, **kwargs: Any) -> None:
|
|
170
|
+
self.create_destination(**kwargs)
|
|
171
|
+
|
|
172
|
+
def index_exists(self, index_name: Optional[str]) -> bool:
|
|
173
|
+
from pinecone.exceptions import NotFoundException
|
|
174
|
+
|
|
175
|
+
index_name = index_name or self.connection_config.index_name
|
|
176
|
+
pc = self.connection_config.get_client()
|
|
177
|
+
try:
|
|
178
|
+
pc.describe_index(index_name)
|
|
179
|
+
return True
|
|
180
|
+
except NotFoundException:
|
|
181
|
+
return False
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.error(f"failed to check if pinecone index exists : {e}")
|
|
184
|
+
raise DestinationConnectionError(f"failed to check if pinecone index exists : {e}")
|
|
185
|
+
|
|
168
186
|
def precheck(self):
|
|
169
187
|
try:
|
|
170
|
-
|
|
188
|
+
# just a connection check here. not an actual index_exists check
|
|
189
|
+
self.index_exists("just-checking-our-connection")
|
|
190
|
+
|
|
191
|
+
if self.connection_config.index_name and not self.index_exists(
|
|
192
|
+
self.connection_config.index_name
|
|
193
|
+
):
|
|
194
|
+
raise DestinationConnectionError(
|
|
195
|
+
f"index {self.connection_config.index_name} does not exist"
|
|
196
|
+
)
|
|
171
197
|
except Exception as e:
|
|
172
198
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
173
199
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
174
200
|
|
|
201
|
+
def format_destination_name(self, destination_name: str) -> str:
|
|
202
|
+
# Pinecone naming requirements:
|
|
203
|
+
# can only contain lowercase letters, numbers, and hyphens
|
|
204
|
+
# must be 45 characters or less
|
|
205
|
+
formatted = re.sub(r"[^a-z0-9]", "-", destination_name.lower())
|
|
206
|
+
return formatted
|
|
207
|
+
|
|
208
|
+
def create_destination(
|
|
209
|
+
self,
|
|
210
|
+
vector_length: int,
|
|
211
|
+
destination_name: str = "elements",
|
|
212
|
+
destination_type: Literal["pod", "serverless"] = "serverless",
|
|
213
|
+
serverless_cloud: str = "aws",
|
|
214
|
+
serverless_region: str = "us-west-2",
|
|
215
|
+
pod_environment: str = "us-east1-gcp",
|
|
216
|
+
pod_type: str = "p1.x1",
|
|
217
|
+
pod_count: int = 1,
|
|
218
|
+
**kwargs: Any,
|
|
219
|
+
) -> bool:
|
|
220
|
+
from pinecone import PodSpec, ServerlessSpec
|
|
221
|
+
|
|
222
|
+
index_name = destination_name or self.connection_config.index_name
|
|
223
|
+
index_name = self.format_destination_name(index_name)
|
|
224
|
+
self.connection_config.index_name = index_name
|
|
225
|
+
|
|
226
|
+
if not self.index_exists(index_name):
|
|
227
|
+
|
|
228
|
+
logger.info(f"creating pinecone index {index_name}")
|
|
229
|
+
|
|
230
|
+
pc = self.connection_config.get_client()
|
|
231
|
+
|
|
232
|
+
if destination_type == "serverless":
|
|
233
|
+
pc.create_index(
|
|
234
|
+
name=destination_name,
|
|
235
|
+
dimension=vector_length,
|
|
236
|
+
spec=ServerlessSpec(cloud=serverless_cloud, region=serverless_region),
|
|
237
|
+
**kwargs,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return True
|
|
241
|
+
|
|
242
|
+
elif destination_type == "pod":
|
|
243
|
+
pc.create_index(
|
|
244
|
+
name=destination_name,
|
|
245
|
+
dimension=vector_length,
|
|
246
|
+
spec=PodSpec(environment=pod_environment, pod_type=pod_type, pods=pod_count),
|
|
247
|
+
**kwargs,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return True
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
raise ValueError(f"unexpected destination type: {destination_type}")
|
|
254
|
+
|
|
255
|
+
else:
|
|
256
|
+
logger.debug(f"index {index_name} already exists, skipping creation")
|
|
257
|
+
return False
|
|
258
|
+
|
|
175
259
|
def pod_delete_by_record_id(self, file_data: FileData) -> None:
|
|
176
260
|
logger.debug(
|
|
177
261
|
f"deleting any content with metadata "
|
|
@@ -266,6 +350,10 @@ class PineconeUploader(Uploader):
|
|
|
266
350
|
)
|
|
267
351
|
# Determine if serverless or pod based index
|
|
268
352
|
pinecone_client = self.connection_config.get_client()
|
|
353
|
+
|
|
354
|
+
if not self.connection_config.index_name:
|
|
355
|
+
raise ValueError("No index name specified")
|
|
356
|
+
|
|
269
357
|
index_description = pinecone_client.describe_index(name=self.connection_config.index_name)
|
|
270
358
|
if "serverless" in index_description.get("spec"):
|
|
271
359
|
self.serverless_delete_by_record_id(file_data=file_data)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import re
|
|
2
3
|
from abc import ABC, abstractmethod
|
|
3
4
|
from contextlib import contextmanager
|
|
4
5
|
from dataclasses import dataclass, field
|
|
@@ -229,19 +230,29 @@ class WeaviateUploader(VectorDBUploader, ABC):
|
|
|
229
230
|
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
230
231
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
231
232
|
|
|
232
|
-
def init(self,
|
|
233
|
-
self.create_destination()
|
|
233
|
+
def init(self, **kwargs: Any) -> None:
|
|
234
|
+
self.create_destination(**kwargs)
|
|
235
|
+
|
|
236
|
+
def format_destination_name(self, destination_name: str) -> str:
|
|
237
|
+
# Weaviate naming requirements:
|
|
238
|
+
# must be alphanumeric and underscores only
|
|
239
|
+
formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
|
|
240
|
+
# must begin with capital letter
|
|
241
|
+
return formatted.capitalize()
|
|
234
242
|
|
|
235
243
|
def create_destination(
|
|
236
244
|
self, destination_name: str = "elements", vector_length: Optional[int] = None, **kwargs: Any
|
|
237
245
|
) -> bool:
|
|
246
|
+
destination_name = self.format_destination_name(destination_name)
|
|
238
247
|
collection_name = self.upload_config.collection or destination_name
|
|
239
248
|
self.upload_config.collection = collection_name
|
|
249
|
+
|
|
240
250
|
connectors_dir = Path(__file__).parents[1]
|
|
241
251
|
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
242
252
|
with collection_config_file.open() as f:
|
|
243
253
|
collection_config = json.load(f)
|
|
244
254
|
collection_config["class"] = collection_name
|
|
255
|
+
|
|
245
256
|
if not self._collection_exists():
|
|
246
257
|
logger.info(
|
|
247
258
|
f"creating default weaviate collection '{collection_name}' with default configs"
|
|
@@ -186,7 +186,7 @@ class EmbedderConfig(BaseModel):
|
|
|
186
186
|
class Embedder(BaseProcess, ABC):
|
|
187
187
|
config: EmbedderConfig
|
|
188
188
|
|
|
189
|
-
def init(self,
|
|
189
|
+
def init(self, **kwargs: Any) -> None:
|
|
190
190
|
self.config.get_embedder().initialize()
|
|
191
191
|
|
|
192
192
|
def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.6
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
+
Requires-Dist: dataclasses_json
|
|
26
|
+
Requires-Dist: click
|
|
27
|
+
Requires-Dist: python-dateutil
|
|
25
28
|
Requires-Dist: tqdm
|
|
26
29
|
Requires-Dist: pydantic>=2.7
|
|
27
|
-
Requires-Dist: click
|
|
28
30
|
Requires-Dist: opentelemetry-sdk
|
|
29
|
-
Requires-Dist: python-dateutil
|
|
30
31
|
Requires-Dist: pandas
|
|
31
|
-
Requires-Dist: dataclasses_json
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -66,13 +66,13 @@ Requires-Dist: pyairtable; extra == "airtable"
|
|
|
66
66
|
Provides-Extra: astradb
|
|
67
67
|
Requires-Dist: astrapy; extra == "astradb"
|
|
68
68
|
Provides-Extra: azure
|
|
69
|
-
Requires-Dist: fsspec; extra == "azure"
|
|
70
69
|
Requires-Dist: adlfs; extra == "azure"
|
|
70
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: requests; extra == "biomed"
|
|
75
74
|
Requires-Dist: bs4; extra == "biomed"
|
|
75
|
+
Requires-Dist: requests; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
77
|
Requires-Dist: fsspec; extra == "box"
|
|
78
78
|
Requires-Dist: boxfs; extra == "box"
|
|
@@ -98,9 +98,9 @@ Requires-Dist: duckdb; extra == "duckdb"
|
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
102
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
103
101
|
Requires-Dist: bs4; extra == "gcs"
|
|
102
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
103
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
105
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
106
106
|
Requires-Dist: requests; extra == "github"
|
|
@@ -109,8 +109,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
109
109
|
Provides-Extra: google-drive
|
|
110
110
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
111
111
|
Provides-Extra: hubspot
|
|
112
|
-
Requires-Dist: urllib3; extra == "hubspot"
|
|
113
112
|
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
113
|
+
Requires-Dist: urllib3; extra == "hubspot"
|
|
114
114
|
Provides-Extra: jira
|
|
115
115
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
116
116
|
Provides-Extra: kafka
|
|
@@ -128,14 +128,14 @@ Requires-Dist: cymple; extra == "neo4j"
|
|
|
128
128
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
129
|
Requires-Dist: networkx; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
132
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
133
131
|
Requires-Dist: backoff; extra == "notion"
|
|
134
132
|
Requires-Dist: httpx; extra == "notion"
|
|
133
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
134
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
136
137
|
Requires-Dist: msal; extra == "onedrive"
|
|
137
138
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
@@ -165,8 +165,8 @@ Requires-Dist: fsspec; extra == "sftp"
|
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
168
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
169
168
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
169
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
170
170
|
Provides-Extra: wikipedia
|
|
171
171
|
Requires-Dist: wikipedia; extra == "wikipedia"
|
|
172
172
|
Provides-Extra: weaviate
|
|
@@ -178,13 +178,13 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
182
|
-
Requires-Dist: requests; extra == "vectara"
|
|
183
181
|
Requires-Dist: httpx; extra == "vectara"
|
|
182
|
+
Requires-Dist: requests; extra == "vectara"
|
|
183
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
185
186
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
186
187
|
Requires-Dist: ibis; extra == "vastdb"
|
|
187
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
188
188
|
Provides-Extra: embed-huggingface
|
|
189
189
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
190
190
|
Provides-Extra: embed-octoai
|
|
@@ -200,8 +200,8 @@ Provides-Extra: openai
|
|
|
200
200
|
Requires-Dist: openai; extra == "openai"
|
|
201
201
|
Requires-Dist: tiktoken; extra == "openai"
|
|
202
202
|
Provides-Extra: bedrock
|
|
203
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
204
203
|
Requires-Dist: boto3; extra == "bedrock"
|
|
204
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
205
205
|
Provides-Extra: togetherai
|
|
206
206
|
Requires-Dist: together; extra == "togetherai"
|
|
207
207
|
Dynamic: author
|
|
@@ -17,7 +17,7 @@ test/integration/connectors/test_mongodb.py,sha256=0A6DvF-iTCSZzOefisd_i20j9li8u
|
|
|
17
17
|
test/integration/connectors/test_neo4j.py,sha256=r4TRYtTXeeOdcRcfa_gvslhSKvoIWrwN1FRJ5XRoH4k,8456
|
|
18
18
|
test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qkoV3srltFOSSKTA,5403
|
|
19
19
|
test/integration/connectors/test_onedrive.py,sha256=iwiDK0kWCfQbIEPnWUzzAA5PiCsHcmFZSxEcIZy_6cc,5229
|
|
20
|
-
test/integration/connectors/test_pinecone.py,sha256=
|
|
20
|
+
test/integration/connectors/test_pinecone.py,sha256=9FC0frer7gtDzk5A6OhGsV8S4ggYfa5ReEO9t7L3Am0,13649
|
|
21
21
|
test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
|
|
22
22
|
test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
|
|
23
23
|
test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
|
|
@@ -54,7 +54,7 @@ test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_Jj
|
|
|
54
54
|
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
56
56
|
test/integration/connectors/weaviate/test_cloud.py,sha256=U1ZS6a7wTPX7h3XGvaJHaT-Uwg4IeGgzxx1YBywgVhM,1284
|
|
57
|
-
test/integration/connectors/weaviate/test_local.py,sha256=
|
|
57
|
+
test/integration/connectors/weaviate/test_local.py,sha256=NMQh9kV_BoIrpXe5abGkUSJYsY2ipRSqyFS4EzH1o7s,5333
|
|
58
58
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
59
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
60
60
|
test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
|
|
@@ -107,7 +107,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
107
107
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
108
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
109
109
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
110
|
-
unstructured_ingest/__version__.py,sha256=
|
|
110
|
+
unstructured_ingest/__version__.py,sha256=8heXQJ79JSGfqiDjjQtqcfkCTWOYFwgErKEt_wwF3c4,42
|
|
111
111
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
112
112
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
113
113
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -397,14 +397,14 @@ unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIws
|
|
|
397
397
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
398
398
|
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
399
399
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
|
|
400
|
-
unstructured_ingest/v2/interfaces/process.py,sha256=
|
|
400
|
+
unstructured_ingest/v2/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
|
|
401
401
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
402
402
|
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=9EV9863ODDv0Y5liDT3xh2yiVuFiaVVyCcnwCy6nfkM,3172
|
|
403
|
-
unstructured_ingest/v2/interfaces/uploader.py,sha256=
|
|
403
|
+
unstructured_ingest/v2/interfaces/uploader.py,sha256=diMkAD5HY8IYpeP1DoFeRD_SexAgOEl1nUcimNnyATc,2063
|
|
404
404
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
405
405
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
406
406
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
407
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
407
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=UeOk5SywJZIn3kCnHclQ2cP7JJIXb4NDjpwzsCP_cF0,16523
|
|
408
408
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
409
409
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
410
410
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
@@ -418,7 +418,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
|
|
|
418
418
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
419
419
|
unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
|
|
420
420
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
421
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
421
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=4x-Rt5UCvwdgihDAr24hvTGDEd1CdKF9xJrf3aMU-ck,7926
|
|
422
422
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
423
423
|
unstructured_ingest/v2/processes/partitioner.py,sha256=ZC9mt85I3o_SLR4DvE7vPBGphMET994phFkTuT-L9B8,9998
|
|
424
424
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
@@ -440,7 +440,7 @@ unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNN
|
|
|
440
440
|
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=ijp5hjmDpLoIHL9UJzV4_4vVtQBlQ2R_vLatlUYivX4,17464
|
|
441
441
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
|
|
442
442
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
443
|
-
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=
|
|
443
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=93WmYO9OT8er9DSlh8odbJCtjcLsVMlqyXlYADgDEjc,14013
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
445
445
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
446
446
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=2T9Bm1H_ALwHhG_YP7vsuUUW-mUg61zcaae3aa9BnN4,4827
|
|
@@ -566,10 +566,10 @@ unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWa
|
|
|
566
566
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
567
567
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
568
568
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
569
|
-
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=
|
|
570
|
-
unstructured_ingest-0.5.
|
|
571
|
-
unstructured_ingest-0.5.
|
|
572
|
-
unstructured_ingest-0.5.
|
|
573
|
-
unstructured_ingest-0.5.
|
|
574
|
-
unstructured_ingest-0.5.
|
|
575
|
-
unstructured_ingest-0.5.
|
|
569
|
+
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yfABJKJGCvPuZ2XCNtDOuCtiscdEAmBCSPPNZnbTKDk,12821
|
|
570
|
+
unstructured_ingest-0.5.6.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
571
|
+
unstructured_ingest-0.5.6.dist-info/METADATA,sha256=ts8jHfqXkNXKcF9TL5UqQNHkynZuzjiobUomXaqiYgM,8316
|
|
572
|
+
unstructured_ingest-0.5.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
573
|
+
unstructured_ingest-0.5.6.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
574
|
+
unstructured_ingest-0.5.6.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
575
|
+
unstructured_ingest-0.5.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.5.dist-info → unstructured_ingest-0.5.6.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|