unstructured-ingest 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_lancedb.py +46 -9
- test/integration/connectors/test_pinecone.py +60 -9
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +1 -2
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +1 -5
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +12 -2
- unstructured_ingest/v2/processes/connectors/pinecone.py +31 -21
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +0 -3
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +0 -10
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/METADATA +15 -15
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/RECORD +15 -15
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ from lancedb import AsyncConnection
|
|
|
12
12
|
from upath import UPath
|
|
13
13
|
|
|
14
14
|
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
15
|
+
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
15
16
|
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
16
17
|
from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
|
|
17
18
|
LanceDBAwsAccessConfig,
|
|
@@ -43,7 +44,6 @@ DATABASE_NAME = "database"
|
|
|
43
44
|
TABLE_NAME = "elements"
|
|
44
45
|
DIMENSION = 384
|
|
45
46
|
NUMBER_EXPECTED_ROWS = 22
|
|
46
|
-
NUMBER_EXPECTED_COLUMNS = 10
|
|
47
47
|
S3_BUCKET = "s3://utic-ingest-test-fixtures/"
|
|
48
48
|
GS_BUCKET = "gs://utic-test-ingest-fixtures-output/"
|
|
49
49
|
AZURE_BUCKET = "az://utic-ingest-test-fixtures-output/"
|
|
@@ -54,9 +54,9 @@ REQUIRED_ENV_VARS = {
|
|
|
54
54
|
"local": (),
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
|
|
58
57
|
SCHEMA = pa.schema(
|
|
59
58
|
[
|
|
59
|
+
pa.field(RECORD_ID_LABEL, pa.string()),
|
|
60
60
|
pa.field("vector", pa.list_(pa.float16(), DIMENSION)),
|
|
61
61
|
pa.field("text", pa.string(), nullable=True),
|
|
62
62
|
pa.field("type", pa.string(), nullable=True),
|
|
@@ -69,6 +69,7 @@ SCHEMA = pa.schema(
|
|
|
69
69
|
pa.field("metadata-page_number", pa.int32(), nullable=True),
|
|
70
70
|
]
|
|
71
71
|
)
|
|
72
|
+
NUMBER_EXPECTED_COLUMNS = len(SCHEMA.names)
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
@pytest_asyncio.fixture
|
|
@@ -116,7 +117,7 @@ async def test_lancedb_destination(
|
|
|
116
117
|
file_data = FileData(
|
|
117
118
|
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
118
119
|
connector_type=CONNECTOR_TYPE,
|
|
119
|
-
identifier="mock
|
|
120
|
+
identifier="mock-file-data",
|
|
120
121
|
)
|
|
121
122
|
stager = LanceDBUploadStager()
|
|
122
123
|
uploader = _get_uploader(uri)
|
|
@@ -129,17 +130,52 @@ async def test_lancedb_destination(
|
|
|
129
130
|
|
|
130
131
|
await uploader.run_async(path=staged_file_path, file_data=file_data)
|
|
131
132
|
|
|
132
|
-
|
|
133
|
-
|
|
133
|
+
# Test upload to empty table
|
|
134
|
+
with await connection.open_table(TABLE_NAME) as table:
|
|
135
|
+
table_df: pd.DataFrame = await table.to_pandas()
|
|
134
136
|
|
|
135
137
|
assert len(table_df) == NUMBER_EXPECTED_ROWS
|
|
136
138
|
assert len(table_df.columns) == NUMBER_EXPECTED_COLUMNS
|
|
137
139
|
|
|
140
|
+
assert table_df[RECORD_ID_LABEL][0] == file_data.identifier
|
|
138
141
|
assert table_df["element_id"][0] == "2470d8dc42215b3d68413b55bf00fed2"
|
|
139
142
|
assert table_df["type"][0] == "CompositeElement"
|
|
140
143
|
assert table_df["metadata-filename"][0] == "DA-1p-with-duplicate-pages.pdf.json"
|
|
141
144
|
assert table_df["metadata-text_as_html"][0] is None
|
|
142
145
|
|
|
146
|
+
# Test upload of the second file, rows should be appended
|
|
147
|
+
file_data.identifier = "mock-file-data-2"
|
|
148
|
+
staged_second_file_path = stager.run(
|
|
149
|
+
elements_filepath=upload_file,
|
|
150
|
+
file_data=file_data,
|
|
151
|
+
output_dir=tmp_path,
|
|
152
|
+
output_filename=f"{upload_file.stem}-2{upload_file.suffix}",
|
|
153
|
+
)
|
|
154
|
+
await uploader.run_async(path=staged_second_file_path, file_data=file_data)
|
|
155
|
+
with await connection.open_table(TABLE_NAME) as table:
|
|
156
|
+
appended_table_df: pd.DataFrame = await table.to_pandas()
|
|
157
|
+
assert len(appended_table_df) == 2 * NUMBER_EXPECTED_ROWS
|
|
158
|
+
|
|
159
|
+
# Test re-upload of the first file, rows should be overwritten, not appended
|
|
160
|
+
await uploader.run_async(path=staged_file_path, file_data=file_data)
|
|
161
|
+
with await connection.open_table(TABLE_NAME) as table:
|
|
162
|
+
overwritten_table_df: pd.DataFrame = await table.to_pandas()
|
|
163
|
+
assert len(overwritten_table_df) == 2 * NUMBER_EXPECTED_ROWS
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class TestPrecheck:
|
|
167
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
168
|
+
@pytest.mark.parametrize("connection_with_uri", ["local", "s3", "gcs", "az"], indirect=True)
|
|
169
|
+
def test_succeeds(
|
|
170
|
+
self,
|
|
171
|
+
upload_file: Path,
|
|
172
|
+
connection_with_uri: tuple[AsyncConnection, str],
|
|
173
|
+
tmp_path: Path,
|
|
174
|
+
) -> None:
|
|
175
|
+
_, uri = connection_with_uri
|
|
176
|
+
uploader = _get_uploader(uri)
|
|
177
|
+
uploader.precheck()
|
|
178
|
+
|
|
143
179
|
|
|
144
180
|
def _get_uri(target: Literal["local", "s3", "gcs", "az"], local_base_path: Path) -> str:
|
|
145
181
|
if target == "local":
|
|
@@ -158,11 +194,12 @@ def _get_uploader(
|
|
|
158
194
|
uri: str,
|
|
159
195
|
) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBAwsUploader, LanceDBGSPUploader]:
|
|
160
196
|
target = uri.split("://", maxsplit=1)[0] if uri.startswith(("s3", "az", "gs")) else "local"
|
|
197
|
+
upload_config = LanceDBUploaderConfig(table_name=TABLE_NAME)
|
|
161
198
|
if target == "az":
|
|
162
199
|
azure_connection_string = os.getenv("AZURE_DEST_CONNECTION_STR")
|
|
163
200
|
access_config_kwargs = _parse_azure_connection_string(azure_connection_string)
|
|
164
201
|
return LanceDBAzureUploader(
|
|
165
|
-
upload_config=
|
|
202
|
+
upload_config=upload_config,
|
|
166
203
|
connection_config=LanceDBAzureConnectionConfig(
|
|
167
204
|
access_config=LanceDBAzureAccessConfig(**access_config_kwargs),
|
|
168
205
|
uri=uri,
|
|
@@ -171,7 +208,7 @@ def _get_uploader(
|
|
|
171
208
|
|
|
172
209
|
elif target == "s3":
|
|
173
210
|
return LanceDBAwsUploader(
|
|
174
|
-
upload_config=
|
|
211
|
+
upload_config=upload_config,
|
|
175
212
|
connection_config=LanceDBAwsConnectionConfig(
|
|
176
213
|
access_config=LanceDBAwsAccessConfig(
|
|
177
214
|
aws_access_key_id=os.getenv("S3_INGEST_TEST_ACCESS_KEY"),
|
|
@@ -182,7 +219,7 @@ def _get_uploader(
|
|
|
182
219
|
)
|
|
183
220
|
elif target == "gs":
|
|
184
221
|
return LanceDBGSPUploader(
|
|
185
|
-
upload_config=
|
|
222
|
+
upload_config=upload_config,
|
|
186
223
|
connection_config=LanceDBGCSConnectionConfig(
|
|
187
224
|
access_config=LanceDBGCSAccessConfig(
|
|
188
225
|
google_service_account_key=os.getenv("GCP_INGEST_SERVICE_KEY")
|
|
@@ -192,7 +229,7 @@ def _get_uploader(
|
|
|
192
229
|
)
|
|
193
230
|
else:
|
|
194
231
|
return LanceDBLocalUploader(
|
|
195
|
-
upload_config=
|
|
232
|
+
upload_config=upload_config,
|
|
196
233
|
connection_config=LanceDBLocalConnectionConfig(
|
|
197
234
|
access_config=LanceDBLocalAccessConfig(),
|
|
198
235
|
uri=uri,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import math
|
|
2
3
|
import os
|
|
3
4
|
import re
|
|
4
5
|
import time
|
|
@@ -19,6 +20,7 @@ from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
|
19
20
|
from unstructured_ingest.v2.logger import logger
|
|
20
21
|
from unstructured_ingest.v2.processes.connectors.pinecone import (
|
|
21
22
|
CONNECTOR_TYPE,
|
|
23
|
+
MAX_QUERY_RESULTS,
|
|
22
24
|
PineconeAccessConfig,
|
|
23
25
|
PineconeConnectionConfig,
|
|
24
26
|
PineconeUploader,
|
|
@@ -118,7 +120,10 @@ def validate_pinecone_index(
|
|
|
118
120
|
f"retry attempt {i}: expected {expected_num_of_vectors} != vector count {vector_count}"
|
|
119
121
|
)
|
|
120
122
|
time.sleep(interval)
|
|
121
|
-
assert vector_count == expected_num_of_vectors
|
|
123
|
+
assert vector_count == expected_num_of_vectors, (
|
|
124
|
+
f"vector count from index ({vector_count}) doesn't "
|
|
125
|
+
f"match expected number: {expected_num_of_vectors}"
|
|
126
|
+
)
|
|
122
127
|
|
|
123
128
|
|
|
124
129
|
@requires_env(API_KEY)
|
|
@@ -147,10 +152,7 @@ async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp
|
|
|
147
152
|
uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config)
|
|
148
153
|
uploader.precheck()
|
|
149
154
|
|
|
150
|
-
|
|
151
|
-
await uploader.run_async(path=new_upload_file, file_data=file_data)
|
|
152
|
-
else:
|
|
153
|
-
uploader.run(path=new_upload_file, file_data=file_data)
|
|
155
|
+
uploader.run(path=new_upload_file, file_data=file_data)
|
|
154
156
|
with new_upload_file.open() as f:
|
|
155
157
|
staged_content = json.load(f)
|
|
156
158
|
expected_num_of_vectors = len(staged_content)
|
|
@@ -160,10 +162,59 @@ async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp
|
|
|
160
162
|
)
|
|
161
163
|
|
|
162
164
|
# Rerun uploader and make sure no duplicates exist
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
uploader.run(path=new_upload_file, file_data=file_data)
|
|
166
|
+
logger.info("validating second upload")
|
|
167
|
+
validate_pinecone_index(
|
|
168
|
+
index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@requires_env(API_KEY)
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
175
|
+
@pytest.mark.skip(reason="TODO: get this to work")
|
|
176
|
+
async def test_pinecone_destination_large_index(
|
|
177
|
+
pinecone_index: str, upload_file: Path, temp_dir: Path
|
|
178
|
+
):
|
|
179
|
+
new_file = temp_dir / "large_file.json"
|
|
180
|
+
with upload_file.open() as f:
|
|
181
|
+
upload_content = json.load(f)
|
|
182
|
+
|
|
183
|
+
min_entries = math.ceil((MAX_QUERY_RESULTS * 2) / len(upload_content))
|
|
184
|
+
new_content = (upload_content * min_entries)[: (2 * MAX_QUERY_RESULTS)]
|
|
185
|
+
print(f"Creating large index content with {len(new_content)} records")
|
|
186
|
+
with new_file.open("w") as f:
|
|
187
|
+
json.dump(new_content, f)
|
|
188
|
+
|
|
189
|
+
expected_num_of_vectors = len(new_content)
|
|
190
|
+
file_data = FileData(
|
|
191
|
+
source_identifiers=SourceIdentifiers(fullpath=new_file.name, filename=new_file.name),
|
|
192
|
+
connector_type=CONNECTOR_TYPE,
|
|
193
|
+
identifier="pinecone_mock_id",
|
|
194
|
+
)
|
|
195
|
+
connection_config = PineconeConnectionConfig(
|
|
196
|
+
index_name=pinecone_index,
|
|
197
|
+
access_config=PineconeAccessConfig(api_key=get_api_key()),
|
|
198
|
+
)
|
|
199
|
+
stager_config = PineconeUploadStagerConfig()
|
|
200
|
+
stager = PineconeUploadStager(upload_stager_config=stager_config)
|
|
201
|
+
new_upload_file = stager.run(
|
|
202
|
+
elements_filepath=new_file,
|
|
203
|
+
output_dir=temp_dir,
|
|
204
|
+
output_filename=new_file.name,
|
|
205
|
+
file_data=file_data,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
upload_config = PineconeUploaderConfig()
|
|
209
|
+
uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config)
|
|
210
|
+
uploader.precheck()
|
|
211
|
+
|
|
212
|
+
uploader.run(path=new_upload_file, file_data=file_data)
|
|
213
|
+
validate_pinecone_index(
|
|
214
|
+
index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors
|
|
215
|
+
)
|
|
216
|
+
# Rerun uploader and make sure no duplicates exist
|
|
217
|
+
uploader.run(path=new_upload_file, file_data=file_data)
|
|
167
218
|
logger.info("validating second upload")
|
|
168
219
|
validate_pinecone_index(
|
|
169
220
|
index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.6" # pragma: no cover
|
|
@@ -233,8 +233,7 @@ class AzureAISearchUploader(Uploader):
|
|
|
233
233
|
raise WriteError(
|
|
234
234
|
", ".join(
|
|
235
235
|
[
|
|
236
|
-
f"{error.
|
|
237
|
-
f"[{error.status_code}] {error.error_message}"
|
|
236
|
+
f"{error.key}: " f"[{error.status_code}] {error.error_message}"
|
|
238
237
|
for error in errors
|
|
239
238
|
],
|
|
240
239
|
),
|
|
@@ -142,8 +142,6 @@ class ElasticsearchIndexer(Indexer):
|
|
|
142
142
|
def precheck(self) -> None:
|
|
143
143
|
try:
|
|
144
144
|
with self.connection_config.get_client() as client:
|
|
145
|
-
if not client.ping():
|
|
146
|
-
raise SourceConnectionError("cluster not detected")
|
|
147
145
|
indices = client.indices.get_alias(index="*")
|
|
148
146
|
if self.index_config.index_name not in indices:
|
|
149
147
|
raise SourceConnectionError(
|
|
@@ -393,11 +391,9 @@ class ElasticsearchUploader(Uploader):
|
|
|
393
391
|
def precheck(self) -> None:
|
|
394
392
|
try:
|
|
395
393
|
with self.connection_config.get_client() as client:
|
|
396
|
-
if not client.ping():
|
|
397
|
-
raise DestinationConnectionError("cluster not detected")
|
|
398
394
|
indices = client.indices.get_alias(index="*")
|
|
399
395
|
if self.upload_config.index_name not in indices:
|
|
400
|
-
raise
|
|
396
|
+
raise DestinationConnectionError(
|
|
401
397
|
"index {} not found: {}".format(
|
|
402
398
|
self.upload_config.index_name, ", ".join(indices.keys())
|
|
403
399
|
)
|
|
@@ -15,6 +15,7 @@ from unstructured_ingest.error import DestinationConnectionError
|
|
|
15
15
|
from unstructured_ingest.logger import logger
|
|
16
16
|
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
17
17
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
18
|
+
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
18
19
|
from unstructured_ingest.v2.interfaces.connector import ConnectionConfig
|
|
19
20
|
from unstructured_ingest.v2.interfaces.file_data import FileData
|
|
20
21
|
from unstructured_ingest.v2.interfaces.upload_stager import UploadStager, UploadStagerConfig
|
|
@@ -84,7 +85,7 @@ class LanceDBUploadStager(UploadStager):
|
|
|
84
85
|
|
|
85
86
|
df = pd.DataFrame(
|
|
86
87
|
[
|
|
87
|
-
self._conform_element_contents(element_contents)
|
|
88
|
+
self._conform_element_contents(element_contents, file_data)
|
|
88
89
|
for element_contents in elements_contents
|
|
89
90
|
]
|
|
90
91
|
)
|
|
@@ -94,9 +95,10 @@ class LanceDBUploadStager(UploadStager):
|
|
|
94
95
|
|
|
95
96
|
return output_path
|
|
96
97
|
|
|
97
|
-
def _conform_element_contents(self, element: dict) -> dict:
|
|
98
|
+
def _conform_element_contents(self, element: dict, file_data: FileData) -> dict:
|
|
98
99
|
return {
|
|
99
100
|
"vector": element.pop("embeddings", None),
|
|
101
|
+
RECORD_ID_LABEL: file_data.identifier,
|
|
100
102
|
**flatten_dict(element, separator="-"),
|
|
101
103
|
}
|
|
102
104
|
|
|
@@ -134,6 +136,14 @@ class LanceDBUploader(Uploader):
|
|
|
134
136
|
async with self.get_table() as table:
|
|
135
137
|
schema = await table.schema()
|
|
136
138
|
df = self._fit_to_schema(df, schema)
|
|
139
|
+
if RECORD_ID_LABEL not in schema.names:
|
|
140
|
+
logger.warning(
|
|
141
|
+
f"Designated table doesn't contain {RECORD_ID_LABEL} column of type"
|
|
142
|
+
" string which is required to support overwriting updates on subsequent"
|
|
143
|
+
" uploads of the same record. New rows will be appended instead."
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
await table.delete(f'{RECORD_ID_LABEL} = "{file_data.identifier}"')
|
|
137
147
|
await table.add(data=df)
|
|
138
148
|
|
|
139
149
|
def _fit_to_schema(self, df: pd.DataFrame, schema) -> pd.DataFrame:
|
|
@@ -31,6 +31,7 @@ CONNECTOR_TYPE = "pinecone"
|
|
|
31
31
|
MAX_PAYLOAD_SIZE = 2 * 1024 * 1024 # 2MB
|
|
32
32
|
MAX_POOL_THREADS = 100
|
|
33
33
|
MAX_METADATA_BYTES = 40960 # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
|
|
34
|
+
MAX_QUERY_RESULTS = 10000
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class PineconeAccessConfig(AccessConfig):
|
|
@@ -84,7 +85,7 @@ ALLOWED_FIELDS = (
|
|
|
84
85
|
|
|
85
86
|
class PineconeUploadStagerConfig(UploadStagerConfig):
|
|
86
87
|
metadata_fields: list[str] = Field(
|
|
87
|
-
default=
|
|
88
|
+
default=list(ALLOWED_FIELDS),
|
|
88
89
|
description=(
|
|
89
90
|
"which metadata from the source element to map to the payload metadata being sent to "
|
|
90
91
|
"Pinecone."
|
|
@@ -137,7 +138,6 @@ class PineconeUploadStager(UploadStager):
|
|
|
137
138
|
flatten_lists=True,
|
|
138
139
|
remove_none=True,
|
|
139
140
|
)
|
|
140
|
-
metadata[RECORD_ID_LABEL] = file_data.identifier
|
|
141
141
|
metadata_size_bytes = len(json.dumps(metadata).encode())
|
|
142
142
|
if metadata_size_bytes > MAX_METADATA_BYTES:
|
|
143
143
|
logger.info(
|
|
@@ -146,6 +146,8 @@ class PineconeUploadStager(UploadStager):
|
|
|
146
146
|
)
|
|
147
147
|
metadata = {}
|
|
148
148
|
|
|
149
|
+
metadata[RECORD_ID_LABEL] = file_data.identifier
|
|
150
|
+
|
|
149
151
|
return {
|
|
150
152
|
"id": str(uuid.uuid4()),
|
|
151
153
|
"values": embeddings,
|
|
@@ -213,6 +215,18 @@ class PineconeUploader(Uploader):
|
|
|
213
215
|
f"from pinecone index: {resp}"
|
|
214
216
|
)
|
|
215
217
|
|
|
218
|
+
def delete_by_query(self, index: "PineconeIndex", query_params: dict) -> None:
|
|
219
|
+
while True:
|
|
220
|
+
query_results = index.query(**query_params)
|
|
221
|
+
matches = query_results.get("matches", [])
|
|
222
|
+
if not matches:
|
|
223
|
+
break
|
|
224
|
+
ids = [match["id"] for match in matches]
|
|
225
|
+
delete_params = {"ids": ids}
|
|
226
|
+
if namespace := self.upload_config.namespace:
|
|
227
|
+
delete_params["namespace"] = namespace
|
|
228
|
+
index.delete(**delete_params)
|
|
229
|
+
|
|
216
230
|
def serverless_delete_by_record_id(self, file_data: FileData) -> None:
|
|
217
231
|
logger.debug(
|
|
218
232
|
f"deleting any content with metadata "
|
|
@@ -221,29 +235,25 @@ class PineconeUploader(Uploader):
|
|
|
221
235
|
)
|
|
222
236
|
index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
|
|
223
237
|
index_stats = index.describe_index_stats()
|
|
238
|
+
dimension = index_stats["dimension"]
|
|
224
239
|
total_vectors = index_stats["total_vector_count"]
|
|
225
240
|
if total_vectors == 0:
|
|
226
241
|
return
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
query_params["namespace"] = namespace
|
|
235
|
-
while True:
|
|
236
|
-
query_results = index.query(**query_params)
|
|
237
|
-
matches = query_results.get("matches", [])
|
|
238
|
-
if not matches:
|
|
239
|
-
break
|
|
240
|
-
ids = [match["id"] for match in matches]
|
|
241
|
-
delete_params = {"ids": ids}
|
|
242
|
+
while total_vectors > 0:
|
|
243
|
+
top_k = min(total_vectors, MAX_QUERY_RESULTS)
|
|
244
|
+
query_params = {
|
|
245
|
+
"filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}},
|
|
246
|
+
"vector": [0] * dimension,
|
|
247
|
+
"top_k": top_k,
|
|
248
|
+
}
|
|
242
249
|
if namespace := self.upload_config.namespace:
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
250
|
+
query_params["namespace"] = namespace
|
|
251
|
+
self.delete_by_query(index=index, query_params=query_params)
|
|
252
|
+
index_stats = index.describe_index_stats()
|
|
253
|
+
total_vectors = index_stats["total_vector_count"]
|
|
254
|
+
|
|
255
|
+
logger.info(
|
|
256
|
+
f"deleted {total_vectors} records with metadata "
|
|
247
257
|
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
248
258
|
f"from pinecone index"
|
|
249
259
|
)
|
|
@@ -10,8 +10,6 @@ from .embedded import CONNECTOR_TYPE as EMBEDDED_WEAVIATE_CONNECTOR_TYPE
|
|
|
10
10
|
from .embedded import weaviate_embedded_destination_entry
|
|
11
11
|
from .local import CONNECTOR_TYPE as LOCAL_WEAVIATE_CONNECTOR_TYPE
|
|
12
12
|
from .local import weaviate_local_destination_entry
|
|
13
|
-
from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
|
|
14
|
-
from .weaviate import weaviate_destination_entry
|
|
15
13
|
|
|
16
14
|
add_destination_entry(
|
|
17
15
|
destination_type=LOCAL_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_local_destination_entry
|
|
@@ -22,4 +20,3 @@ add_destination_entry(
|
|
|
22
20
|
add_destination_entry(
|
|
23
21
|
destination_type=EMBEDDED_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_embedded_destination_entry
|
|
24
22
|
)
|
|
25
|
-
add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry)
|
|
@@ -22,7 +22,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
22
22
|
UploadStagerConfig,
|
|
23
23
|
)
|
|
24
24
|
from unstructured_ingest.v2.logger import logger
|
|
25
|
-
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
26
25
|
|
|
27
26
|
if TYPE_CHECKING:
|
|
28
27
|
from weaviate.classes.init import Timeout
|
|
@@ -288,12 +287,3 @@ class WeaviateUploader(Uploader, ABC):
|
|
|
288
287
|
vector=vector,
|
|
289
288
|
)
|
|
290
289
|
self.check_for_errors(client=weaviate_client)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
weaviate_destination_entry = DestinationRegistryEntry(
|
|
294
|
-
connection_config=WeaviateConnectionConfig,
|
|
295
|
-
uploader=WeaviateUploader,
|
|
296
|
-
uploader_config=WeaviateUploaderConfig,
|
|
297
|
-
upload_stager=WeaviateUploadStager,
|
|
298
|
-
upload_stager_config=WeaviateUploadStagerConfig,
|
|
299
|
-
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,12 +22,12 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: opentelemetry-sdk
|
|
26
|
-
Requires-Dist: pandas
|
|
27
25
|
Requires-Dist: python-dateutil
|
|
26
|
+
Requires-Dist: tqdm
|
|
28
27
|
Requires-Dist: pydantic>=2.7
|
|
29
28
|
Requires-Dist: dataclasses-json
|
|
30
|
-
Requires-Dist:
|
|
29
|
+
Requires-Dist: opentelemetry-sdk
|
|
30
|
+
Requires-Dist: pandas
|
|
31
31
|
Requires-Dist: click
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
@@ -51,8 +51,8 @@ Requires-Dist: chromadb; extra == "chroma"
|
|
|
51
51
|
Provides-Extra: clarifai
|
|
52
52
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
53
53
|
Provides-Extra: confluence
|
|
54
|
-
Requires-Dist: requests; extra == "confluence"
|
|
55
54
|
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
55
|
+
Requires-Dist: requests; extra == "confluence"
|
|
56
56
|
Provides-Extra: couchbase
|
|
57
57
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
58
58
|
Provides-Extra: csv
|
|
@@ -78,8 +78,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
|
78
78
|
Provides-Extra: embed-mixedbreadai
|
|
79
79
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
80
80
|
Provides-Extra: embed-octoai
|
|
81
|
-
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
82
81
|
Requires-Dist: openai; extra == "embed-octoai"
|
|
82
|
+
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
83
83
|
Provides-Extra: embed-vertexai
|
|
84
84
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
85
85
|
Provides-Extra: embed-voyageai
|
|
@@ -98,8 +98,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
98
98
|
Provides-Extra: google-drive
|
|
99
99
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
100
100
|
Provides-Extra: hubspot
|
|
101
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
102
101
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
102
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
103
103
|
Provides-Extra: jira
|
|
104
104
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
105
105
|
Provides-Extra: kafka
|
|
@@ -117,26 +117,26 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
117
117
|
Provides-Extra: msg
|
|
118
118
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
119
119
|
Provides-Extra: notion
|
|
120
|
-
Requires-Dist:
|
|
120
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
121
121
|
Requires-Dist: backoff; extra == "notion"
|
|
122
122
|
Requires-Dist: httpx; extra == "notion"
|
|
123
|
-
Requires-Dist:
|
|
123
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
124
124
|
Provides-Extra: odt
|
|
125
125
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
126
126
|
Provides-Extra: onedrive
|
|
127
127
|
Requires-Dist: bs4; extra == "onedrive"
|
|
128
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
129
128
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
129
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
130
130
|
Provides-Extra: openai
|
|
131
|
-
Requires-Dist: tiktoken; extra == "openai"
|
|
132
131
|
Requires-Dist: openai; extra == "openai"
|
|
132
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
133
133
|
Provides-Extra: opensearch
|
|
134
134
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
135
135
|
Provides-Extra: org
|
|
136
136
|
Requires-Dist: unstructured[org]; extra == "org"
|
|
137
137
|
Provides-Extra: outlook
|
|
138
|
-
Requires-Dist: msal; extra == "outlook"
|
|
139
138
|
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
139
|
+
Requires-Dist: msal; extra == "outlook"
|
|
140
140
|
Provides-Extra: pdf
|
|
141
141
|
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
142
142
|
Provides-Extra: pinecone
|
|
@@ -163,18 +163,18 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
163
163
|
Provides-Extra: salesforce
|
|
164
164
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
165
165
|
Provides-Extra: sftp
|
|
166
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
167
166
|
Requires-Dist: paramiko; extra == "sftp"
|
|
167
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
168
168
|
Provides-Extra: sharepoint
|
|
169
|
-
Requires-Dist: msal; extra == "sharepoint"
|
|
170
169
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
170
|
+
Requires-Dist: msal; extra == "sharepoint"
|
|
171
171
|
Provides-Extra: singlestore
|
|
172
172
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
173
173
|
Provides-Extra: slack
|
|
174
174
|
Requires-Dist: slack-sdk[optional]; extra == "slack"
|
|
175
175
|
Provides-Extra: snowflake
|
|
176
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
177
176
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
177
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
178
178
|
Provides-Extra: togetherai
|
|
179
179
|
Requires-Dist: together; extra == "togetherai"
|
|
180
180
|
Provides-Extra: tsv
|
|
@@ -10,11 +10,11 @@ test/integration/connectors/test_azure_ai_search.py,sha256=dae4GifRiKue5YpsxworD
|
|
|
10
10
|
test/integration/connectors/test_confluence.py,sha256=xcPmZ_vi_pkCt-tUPn10P49FH9i_9YUbrAPO6fYk5rU,3521
|
|
11
11
|
test/integration/connectors/test_delta_table.py,sha256=GSzWIkbEUzOrRPt2F1uO0dabcp7kTFDj75BhhI2y-WU,6856
|
|
12
12
|
test/integration/connectors/test_kafka.py,sha256=j7jsNWZumNBv9v-5Bpx8geUUXpxxad5EuA4CMRsl4R8,7104
|
|
13
|
-
test/integration/connectors/test_lancedb.py,sha256=
|
|
13
|
+
test/integration/connectors/test_lancedb.py,sha256=U2HfIrf6iJ7lYMn-vz0j-LesVyDY-jc9QrQhlJVhG9Q,9183
|
|
14
14
|
test/integration/connectors/test_milvus.py,sha256=p4UujDr_tsRaQDmhDmDZp38t8oSFm7hrTqiq6NNuhGo,5933
|
|
15
15
|
test/integration/connectors/test_mongodb.py,sha256=YeS_DUnVYN02F76j87W8RhXGHnJMzQYb3n-L1-oWGXI,12254
|
|
16
16
|
test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP4NxOFm1AL8EPGLA,3554
|
|
17
|
-
test/integration/connectors/test_pinecone.py,sha256=
|
|
17
|
+
test/integration/connectors/test_pinecone.py,sha256=i-v5WkAI9M6SUZI7ch9qdILlRHopAdptpkSY12-BaTk,9483
|
|
18
18
|
test/integration/connectors/test_qdrant.py,sha256=ASvO-BNyhv8m8or28KljrJy27Da0uaTNeoR5w_QsvFg,5121
|
|
19
19
|
test/integration/connectors/test_s3.py,sha256=YHEYMqWTKTfR7wlL4VoxtgMs1YiYKyhLIBdG-anaQGo,6896
|
|
20
20
|
test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -83,7 +83,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
83
83
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
84
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
85
85
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
86
|
-
unstructured_ingest/__version__.py,sha256=
|
|
86
|
+
unstructured_ingest/__version__.py,sha256=J7Aic1p5b4KF_ydqV36h8cvEIhTtU-IJ72bMV9mQs8w,42
|
|
87
87
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
88
88
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
89
89
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -398,7 +398,7 @@ unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2j
|
|
|
398
398
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=8M3aYYNbOkS2SYG2B_HLHMgX4V69-Oz1VqpQcRQMiVg,5167
|
|
399
399
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
400
400
|
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=QTUQ-cv_iZi9eaXRRHQNKhtgFn-Pi20AXdSVaDFg9DM,15498
|
|
401
|
-
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256
|
|
401
|
+
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=97HxxVvqf-80Bxb-AaBhFhMvoRl7cUjn4n-39vCAVG0,11962
|
|
402
402
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
403
403
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=qQApDcmPBGg4tHXwSOj4JPkAbrO9GQ4NRlaETjhp25U,7003
|
|
404
404
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=LbUJLt6fqaNYSmy9vUiovG-UOALMcvh8OD-gZAaf-f4,12333
|
|
@@ -411,7 +411,7 @@ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=3sV0Yv2vYMLyxszKCqA
|
|
|
411
411
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=XLuprTCY0D9tAh_qn81MjJrDN9YaNqMlKe7BJl3eTZc,14998
|
|
412
412
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=heZMtOIrCySi552ldIk8iH0pSRXZ0W2LeD-CcNOwCFQ,15979
|
|
413
413
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
414
|
-
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256
|
|
414
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=-J6QPJv_jmjln8cTUsfEEAyd_hi_fmD-uwB6C84rA4w,11930
|
|
415
415
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
416
416
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
|
|
417
417
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
@@ -423,7 +423,7 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P
|
|
|
423
423
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
|
|
424
424
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=Wk7s2_u5G0BOV5slvGc8IlUf7ivznY9PrgPqe6nlJKM,2897
|
|
425
425
|
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
426
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=
|
|
426
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=sI58uypWr1mpSl4bxr46nIfypGZ4aqryCT83qqCVnSM,18921
|
|
427
427
|
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
|
|
428
428
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
429
429
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
|
|
@@ -443,7 +443,7 @@ unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur
|
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/lancedb/cloud.py,sha256=BFy0gW2OZ_qaZJM97m-tNsFaJPi9zOKrrd2y4thcNP0,1341
|
|
445
445
|
unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
|
|
446
|
-
unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=
|
|
446
|
+
unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=7FODnesYu8cFx1PeQJZxXij-8Dei4Kk3Bs0oxoUGBtI,5745
|
|
447
447
|
unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
|
|
448
448
|
unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
|
|
449
449
|
unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
|
|
@@ -456,14 +456,14 @@ unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1
|
|
|
456
456
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=jl524VudwmFK63emCT7DmZan_EWJAMiGir5_zoO9FuY,5697
|
|
457
457
|
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=LFzGeAUagLknK07DsXg2oSG7ZAgR6VqT9wfI_tYlHUg,14782
|
|
458
458
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
|
|
459
|
-
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=
|
|
459
|
+
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
460
460
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
461
461
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
462
462
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
463
|
-
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=
|
|
464
|
-
unstructured_ingest-0.3.
|
|
465
|
-
unstructured_ingest-0.3.
|
|
466
|
-
unstructured_ingest-0.3.
|
|
467
|
-
unstructured_ingest-0.3.
|
|
468
|
-
unstructured_ingest-0.3.
|
|
469
|
-
unstructured_ingest-0.3.
|
|
463
|
+
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=dBDC_M8GVKupl7i9UMRCZyRIUv6gTkq8bJE_SILydAc,11291
|
|
464
|
+
unstructured_ingest-0.3.6.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
465
|
+
unstructured_ingest-0.3.6.dist-info/METADATA,sha256=JmWEiv5oO6crJ6dRAOcBrCiJI12tOonA_arMTa5HoJY,7393
|
|
466
|
+
unstructured_ingest-0.3.6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
467
|
+
unstructured_ingest-0.3.6.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
468
|
+
unstructured_ingest-0.3.6.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
469
|
+
unstructured_ingest-0.3.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.3.4.dist-info → unstructured_ingest-0.3.6.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|