unstructured-ingest 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/sql/test_databricks_delta_tables.py +10 -10
- test/integration/connectors/utils/validation/equality.py +2 -1
- test/unit/v2/connectors/databricks/__init__.py +0 -0
- test/unit/v2/connectors/databricks/test_volumes_table.py +44 -0
- test/unit/v2/connectors/sql/test_sql.py +4 -2
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/data_prep.py +11 -3
- unstructured_ingest/utils/html.py +109 -0
- unstructured_ingest/utils/ndjson.py +52 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +3 -13
- unstructured_ingest/v2/pipeline/steps/chunk.py +3 -4
- unstructured_ingest/v2/pipeline/steps/embed.py +3 -4
- unstructured_ingest/v2/pipeline/steps/partition.py +3 -4
- unstructured_ingest/v2/processes/connectors/confluence.py +95 -25
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +14 -11
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +2 -2
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +7 -7
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +9 -9
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +41 -9
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +7 -7
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +8 -8
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +4 -0
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +15 -15
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +2 -1
- unstructured_ingest/v2/processes/connectors/sql/sql.py +14 -7
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +2 -1
- unstructured_ingest/v2/processes/connectors/sql/vastdb.py +270 -0
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/METADATA +23 -20
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/RECORD +35 -30
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pydantic import Field, Secret
|
|
8
|
+
|
|
9
|
+
from unstructured_ingest.error import DestinationConnectionError
|
|
10
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
11
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
+
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
13
|
+
from unstructured_ingest.v2.interfaces import (
|
|
14
|
+
FileData,
|
|
15
|
+
)
|
|
16
|
+
from unstructured_ingest.v2.logger import logger
|
|
17
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
18
|
+
DestinationRegistryEntry,
|
|
19
|
+
SourceRegistryEntry,
|
|
20
|
+
)
|
|
21
|
+
from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
22
|
+
_COLUMNS,
|
|
23
|
+
SQLAccessConfig,
|
|
24
|
+
SqlBatchFileData,
|
|
25
|
+
SQLConnectionConfig,
|
|
26
|
+
SQLDownloader,
|
|
27
|
+
SQLDownloaderConfig,
|
|
28
|
+
SQLIndexer,
|
|
29
|
+
SQLIndexerConfig,
|
|
30
|
+
SQLUploader,
|
|
31
|
+
SQLUploaderConfig,
|
|
32
|
+
SQLUploadStager,
|
|
33
|
+
SQLUploadStagerConfig,
|
|
34
|
+
)
|
|
35
|
+
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from vastdb import connect as VastdbConnect
|
|
39
|
+
from vastdb import transaction as VastdbTransaction
|
|
40
|
+
from vastdb.table import Table as VastdbTable
|
|
41
|
+
|
|
42
|
+
CONNECTOR_TYPE = "vastdb"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class VastdbAccessConfig(SQLAccessConfig):
|
|
46
|
+
endpoint: Optional[str] = Field(default=None, description="DB endpoint")
|
|
47
|
+
access_key_id: Optional[str] = Field(default=None, description="access key id")
|
|
48
|
+
access_key_secret: Optional[str] = Field(default=None, description="access key secret")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class VastdbConnectionConfig(SQLConnectionConfig):
|
|
52
|
+
access_config: Secret[VastdbAccessConfig] = Field(
|
|
53
|
+
default=VastdbAccessConfig(), validate_default=True
|
|
54
|
+
)
|
|
55
|
+
vastdb_bucket: str
|
|
56
|
+
vastdb_schema: str
|
|
57
|
+
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
58
|
+
|
|
59
|
+
@requires_dependencies(["vastdb"], extras="vastdb")
|
|
60
|
+
@contextmanager
|
|
61
|
+
def get_connection(self) -> "VastdbConnect":
|
|
62
|
+
from vastdb import connect
|
|
63
|
+
|
|
64
|
+
access_config = self.access_config.get_secret_value()
|
|
65
|
+
connection = connect(
|
|
66
|
+
endpoint=access_config.endpoint,
|
|
67
|
+
access=access_config.access_key_id,
|
|
68
|
+
secret=access_config.access_key_secret,
|
|
69
|
+
)
|
|
70
|
+
yield connection
|
|
71
|
+
|
|
72
|
+
@contextmanager
|
|
73
|
+
def get_cursor(self) -> "VastdbTransaction":
|
|
74
|
+
with self.get_connection() as connection:
|
|
75
|
+
with connection.transaction() as transaction:
|
|
76
|
+
yield transaction
|
|
77
|
+
|
|
78
|
+
@contextmanager
|
|
79
|
+
def get_table(self, table_name: str) -> "VastdbTable":
|
|
80
|
+
with self.get_cursor() as cursor:
|
|
81
|
+
bucket = cursor.bucket(self.vastdb_bucket)
|
|
82
|
+
schema = bucket.schema(self.vastdb_schema)
|
|
83
|
+
table = schema.table(table_name)
|
|
84
|
+
yield table
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class VastdbIndexerConfig(SQLIndexerConfig):
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class VastdbIndexer(SQLIndexer):
|
|
93
|
+
connection_config: VastdbConnectionConfig
|
|
94
|
+
index_config: VastdbIndexerConfig
|
|
95
|
+
connector_type: str = CONNECTOR_TYPE
|
|
96
|
+
|
|
97
|
+
def _get_doc_ids(self) -> list[str]:
|
|
98
|
+
with self.connection_config.get_table(self.index_config.table_name) as table:
|
|
99
|
+
reader = table.select(columns=[self.index_config.id_column])
|
|
100
|
+
results = reader.read_all() # Build a PyArrow Table from the RecordBatchReader
|
|
101
|
+
ids = sorted([result[self.index_config.id_column] for result in results.to_pylist()])
|
|
102
|
+
return ids
|
|
103
|
+
|
|
104
|
+
def precheck(self) -> None:
|
|
105
|
+
try:
|
|
106
|
+
with self.connection_config.get_table(self.index_config.table_name) as table:
|
|
107
|
+
table.select()
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
110
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class VastdbDownloaderConfig(SQLDownloaderConfig):
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class VastdbDownloader(SQLDownloader):
|
|
119
|
+
connection_config: VastdbConnectionConfig
|
|
120
|
+
download_config: VastdbDownloaderConfig
|
|
121
|
+
connector_type: str = CONNECTOR_TYPE
|
|
122
|
+
|
|
123
|
+
@requires_dependencies(["ibis"], extras="vastdb")
|
|
124
|
+
def query_db(self, file_data: SqlBatchFileData) -> tuple[list[tuple], list[str]]:
|
|
125
|
+
from ibis import _ # imports the Ibis deferred expression
|
|
126
|
+
|
|
127
|
+
table_name = file_data.additional_metadata.table_name
|
|
128
|
+
id_column = file_data.additional_metadata.id_column
|
|
129
|
+
ids = tuple([item.identifier for item in file_data.batch_items])
|
|
130
|
+
|
|
131
|
+
with self.connection_config.get_table(table_name) as table:
|
|
132
|
+
|
|
133
|
+
predicate = _[id_column].isin(ids)
|
|
134
|
+
|
|
135
|
+
if self.download_config.fields:
|
|
136
|
+
# Vastdb requires the id column to be included in the fields
|
|
137
|
+
fields = self.download_config.fields + [id_column]
|
|
138
|
+
# dict.fromkeys to remove duplicates and keep order
|
|
139
|
+
reader = table.select(columns=list(dict.fromkeys(fields)), predicate=predicate)
|
|
140
|
+
else:
|
|
141
|
+
reader = table.select(predicate=predicate)
|
|
142
|
+
results = reader.read_all()
|
|
143
|
+
df = results.to_pandas()
|
|
144
|
+
return [tuple(r) for r in df.to_numpy()], results.column_names
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class VastdbUploadStagerConfig(SQLUploadStagerConfig):
|
|
148
|
+
rename_columns_map: Optional[dict] = Field(
|
|
149
|
+
default=None,
|
|
150
|
+
description="Map of column names to rename, ex: {'old_name': 'new_name'}",
|
|
151
|
+
)
|
|
152
|
+
additional_columns: Optional[list[str]] = Field(
|
|
153
|
+
default_factory=list, description="Additional columns to include in the upload"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class VastdbUploadStager(SQLUploadStager):
|
|
158
|
+
upload_stager_config: VastdbUploadStagerConfig
|
|
159
|
+
|
|
160
|
+
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
161
|
+
data = element_dict.copy()
|
|
162
|
+
metadata: dict[str, Any] = data.pop("metadata", {})
|
|
163
|
+
data_source = metadata.pop("data_source", {})
|
|
164
|
+
coordinates = metadata.pop("coordinates", {})
|
|
165
|
+
|
|
166
|
+
data.update(metadata)
|
|
167
|
+
data.update(data_source)
|
|
168
|
+
data.update(coordinates)
|
|
169
|
+
|
|
170
|
+
data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
|
|
171
|
+
|
|
172
|
+
# remove extraneous, not supported columns
|
|
173
|
+
# but also allow for additional columns
|
|
174
|
+
approved_columns = set(_COLUMNS).union(self.upload_stager_config.additional_columns)
|
|
175
|
+
element = {k: v for k, v in data.items() if k in approved_columns}
|
|
176
|
+
element[RECORD_ID_LABEL] = file_data.identifier
|
|
177
|
+
return element
|
|
178
|
+
|
|
179
|
+
def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
180
|
+
df = super().conform_dataframe(df=df)
|
|
181
|
+
if self.upload_stager_config.rename_columns_map:
|
|
182
|
+
df.rename(columns=self.upload_stager_config.rename_columns_map, inplace=True)
|
|
183
|
+
return df
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class VastdbUploaderConfig(SQLUploaderConfig):
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@dataclass
|
|
191
|
+
class VastdbUploader(SQLUploader):
|
|
192
|
+
upload_config: VastdbUploaderConfig = field(default_factory=VastdbUploaderConfig)
|
|
193
|
+
connection_config: VastdbConnectionConfig
|
|
194
|
+
connector_type: str = CONNECTOR_TYPE
|
|
195
|
+
|
|
196
|
+
def precheck(self) -> None:
|
|
197
|
+
try:
|
|
198
|
+
with self.connection_config.get_table(self.upload_config.table_name) as table:
|
|
199
|
+
table.select()
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
202
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
203
|
+
|
|
204
|
+
@requires_dependencies(["pyarrow"], extras="vastdb")
|
|
205
|
+
def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
|
|
206
|
+
import pyarrow as pa
|
|
207
|
+
|
|
208
|
+
if self.can_delete():
|
|
209
|
+
self.delete_by_record_id(file_data=file_data)
|
|
210
|
+
else:
|
|
211
|
+
logger.warning(
|
|
212
|
+
f"table doesn't contain expected "
|
|
213
|
+
f"record id column "
|
|
214
|
+
f"{self.upload_config.record_id_key}, skipping delete"
|
|
215
|
+
)
|
|
216
|
+
df.replace({np.nan: None}, inplace=True)
|
|
217
|
+
df = self._fit_to_schema(df=df)
|
|
218
|
+
|
|
219
|
+
logger.info(
|
|
220
|
+
f"writing a total of {len(df)} elements via"
|
|
221
|
+
f" document batches to destination"
|
|
222
|
+
f" table named {self.upload_config.table_name}"
|
|
223
|
+
f" with batch size {self.upload_config.batch_size}"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
|
|
227
|
+
|
|
228
|
+
with self.connection_config.get_table(self.upload_config.table_name) as table:
|
|
229
|
+
pa_table = pa.Table.from_pandas(rows)
|
|
230
|
+
table.insert(pa_table)
|
|
231
|
+
|
|
232
|
+
def get_table_columns(self) -> list[str]:
|
|
233
|
+
if self._columns is None:
|
|
234
|
+
with self.connection_config.get_table(self.upload_config.table_name) as table:
|
|
235
|
+
self._columns = table.columns().names
|
|
236
|
+
return self._columns
|
|
237
|
+
|
|
238
|
+
@requires_dependencies(["ibis"], extras="vastdb")
|
|
239
|
+
def delete_by_record_id(self, file_data: FileData) -> None:
|
|
240
|
+
from ibis import _ # imports the Ibis deferred expression
|
|
241
|
+
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"deleting any content with data "
|
|
244
|
+
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
245
|
+
f"from table {self.upload_config.table_name}"
|
|
246
|
+
)
|
|
247
|
+
predicate = _[self.upload_config.record_id_key].isin([file_data.identifier])
|
|
248
|
+
with self.connection_config.get_table(self.upload_config.table_name) as table:
|
|
249
|
+
# Get the internal row id
|
|
250
|
+
rows_to_delete = table.select(
|
|
251
|
+
columns=[], predicate=predicate, internal_row_id=True
|
|
252
|
+
).read_all()
|
|
253
|
+
table.delete(rows_to_delete)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
vastdb_source_entry = SourceRegistryEntry(
|
|
257
|
+
connection_config=VastdbConnectionConfig,
|
|
258
|
+
indexer_config=VastdbIndexerConfig,
|
|
259
|
+
indexer=VastdbIndexer,
|
|
260
|
+
downloader_config=VastdbDownloaderConfig,
|
|
261
|
+
downloader=VastdbDownloader,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
vastdb_destination_entry = DestinationRegistryEntry(
|
|
265
|
+
connection_config=VastdbConnectionConfig,
|
|
266
|
+
uploader=VastdbUploader,
|
|
267
|
+
uploader_config=VastdbUploaderConfig,
|
|
268
|
+
upload_stager=VastdbUploadStager,
|
|
269
|
+
upload_stager_config=VastdbUploadStagerConfig,
|
|
270
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,32 +22,31 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: click
|
|
26
25
|
Requires-Dist: pydantic>=2.7
|
|
26
|
+
Requires-Dist: click
|
|
27
|
+
Requires-Dist: tqdm
|
|
28
|
+
Requires-Dist: dataclasses-json
|
|
27
29
|
Requires-Dist: pandas
|
|
28
|
-
Requires-Dist: ndjson
|
|
29
30
|
Requires-Dist: opentelemetry-sdk
|
|
30
31
|
Requires-Dist: python-dateutil
|
|
31
|
-
Requires-Dist: tqdm
|
|
32
|
-
Requires-Dist: dataclasses-json
|
|
33
32
|
Provides-Extra: airtable
|
|
34
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
35
34
|
Provides-Extra: astradb
|
|
36
35
|
Requires-Dist: astrapy; extra == "astradb"
|
|
37
36
|
Provides-Extra: azure
|
|
38
|
-
Requires-Dist: fsspec; extra == "azure"
|
|
39
37
|
Requires-Dist: adlfs; extra == "azure"
|
|
38
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
40
39
|
Provides-Extra: azure-ai-search
|
|
41
40
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
42
41
|
Provides-Extra: bedrock
|
|
43
|
-
Requires-Dist: boto3; extra == "bedrock"
|
|
44
42
|
Requires-Dist: aioboto3; extra == "bedrock"
|
|
43
|
+
Requires-Dist: boto3; extra == "bedrock"
|
|
45
44
|
Provides-Extra: biomed
|
|
46
45
|
Requires-Dist: bs4; extra == "biomed"
|
|
47
46
|
Requires-Dist: requests; extra == "biomed"
|
|
48
47
|
Provides-Extra: box
|
|
49
|
-
Requires-Dist: fsspec; extra == "box"
|
|
50
48
|
Requires-Dist: boxfs; extra == "box"
|
|
49
|
+
Requires-Dist: fsspec; extra == "box"
|
|
51
50
|
Provides-Extra: chroma
|
|
52
51
|
Requires-Dist: chromadb; extra == "chroma"
|
|
53
52
|
Provides-Extra: clarifai
|
|
@@ -64,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
64
63
|
Provides-Extra: databricks-volumes
|
|
65
64
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
66
65
|
Provides-Extra: delta-table
|
|
67
|
-
Requires-Dist: boto3; extra == "delta-table"
|
|
68
66
|
Requires-Dist: deltalake; extra == "delta-table"
|
|
67
|
+
Requires-Dist: boto3; extra == "delta-table"
|
|
69
68
|
Provides-Extra: discord
|
|
70
69
|
Requires-Dist: discord.py; extra == "discord"
|
|
71
70
|
Provides-Extra: doc
|
|
@@ -73,8 +72,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
73
72
|
Provides-Extra: docx
|
|
74
73
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
75
74
|
Provides-Extra: dropbox
|
|
76
|
-
Requires-Dist: fsspec; extra == "dropbox"
|
|
77
75
|
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
76
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
78
77
|
Provides-Extra: duckdb
|
|
79
78
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
80
79
|
Provides-Extra: elasticsearch
|
|
@@ -84,8 +83,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
|
84
83
|
Provides-Extra: embed-mixedbreadai
|
|
85
84
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
86
85
|
Provides-Extra: embed-octoai
|
|
87
|
-
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
88
86
|
Requires-Dist: openai; extra == "embed-octoai"
|
|
87
|
+
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
89
88
|
Provides-Extra: embed-vertexai
|
|
90
89
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
91
90
|
Provides-Extra: embed-voyageai
|
|
@@ -93,8 +92,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
93
92
|
Provides-Extra: epub
|
|
94
93
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
95
94
|
Provides-Extra: gcs
|
|
96
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
97
95
|
Requires-Dist: bs4; extra == "gcs"
|
|
96
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
98
97
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
99
98
|
Provides-Extra: github
|
|
100
99
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
@@ -104,8 +103,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
104
103
|
Provides-Extra: google-drive
|
|
105
104
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
106
105
|
Provides-Extra: hubspot
|
|
107
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
108
106
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
107
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
109
108
|
Provides-Extra: jira
|
|
110
109
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
111
110
|
Provides-Extra: kafka
|
|
@@ -123,23 +122,23 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
123
122
|
Provides-Extra: msg
|
|
124
123
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
125
124
|
Provides-Extra: neo4j
|
|
125
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
126
126
|
Requires-Dist: cymple; extra == "neo4j"
|
|
127
127
|
Requires-Dist: neo4j; extra == "neo4j"
|
|
128
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
129
128
|
Provides-Extra: notion
|
|
130
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
131
129
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
132
130
|
Requires-Dist: backoff; extra == "notion"
|
|
131
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
133
132
|
Requires-Dist: httpx; extra == "notion"
|
|
134
133
|
Provides-Extra: odt
|
|
135
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
136
135
|
Provides-Extra: onedrive
|
|
137
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
138
136
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
139
138
|
Requires-Dist: msal; extra == "onedrive"
|
|
140
139
|
Provides-Extra: openai
|
|
141
|
-
Requires-Dist: tiktoken; extra == "openai"
|
|
142
140
|
Requires-Dist: openai; extra == "openai"
|
|
141
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
143
142
|
Provides-Extra: opensearch
|
|
144
143
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
145
144
|
Provides-Extra: org
|
|
@@ -175,8 +174,8 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
175
174
|
Provides-Extra: salesforce
|
|
176
175
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
177
176
|
Provides-Extra: sftp
|
|
178
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
179
177
|
Requires-Dist: paramiko; extra == "sftp"
|
|
178
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
180
179
|
Provides-Extra: sharepoint
|
|
181
180
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
182
181
|
Requires-Dist: msal; extra == "sharepoint"
|
|
@@ -191,10 +190,14 @@ Provides-Extra: togetherai
|
|
|
191
190
|
Requires-Dist: together; extra == "togetherai"
|
|
192
191
|
Provides-Extra: tsv
|
|
193
192
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
193
|
+
Provides-Extra: vastdb
|
|
194
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
195
|
+
Requires-Dist: pyarrow; extra == "vastdb"
|
|
196
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
194
197
|
Provides-Extra: vectara
|
|
195
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
196
|
-
Requires-Dist: requests; extra == "vectara"
|
|
197
198
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
199
|
+
Requires-Dist: requests; extra == "vectara"
|
|
200
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
198
201
|
Provides-Extra: weaviate
|
|
199
202
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
200
203
|
Provides-Extra: wikipedia
|
|
@@ -34,7 +34,7 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
|
|
|
34
34
|
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=TsSEPsyaTUoEvFBadinrdM0b5C4FoUtEwCv24OUbpO8,12072
|
|
35
35
|
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=7b7z0GqoBsBqA3IK35N6axmwEMjzJ1l3Fg2WT2c7uqs,11450
|
|
36
36
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=
|
|
37
|
+
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=aC8B7peVYR8L2QaR18arqR6ffA197IsVkM2quCOVNSo,5046
|
|
38
38
|
test/integration/connectors/sql/test_postgres.py,sha256=bGDyzLRpgrXO7nl0U8nF2zSNr6ykUG-w8T4daIqUCG4,6970
|
|
39
39
|
test/integration/connectors/sql/test_singlestore.py,sha256=XeU2s4Kt_3tGyaDYYKTgYjdOyb8j2dnz4TgSMwFUjWs,6153
|
|
40
40
|
test/integration/connectors/sql/test_snowflake.py,sha256=LEwsRDoC6-rRiwYsqeo5B9Eo6RYygLLGAUsrtrgI9pM,7494
|
|
@@ -45,7 +45,7 @@ test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6
|
|
|
45
45
|
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
46
46
|
test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
test/integration/connectors/utils/validation/destination.py,sha256=ZvMSvqz9in35xaoUJGx9rG8oWCU3FYlfLLQ6sfdI0pw,2649
|
|
48
|
-
test/integration/connectors/utils/validation/equality.py,sha256=
|
|
48
|
+
test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
|
|
49
49
|
test/integration/connectors/utils/validation/source.py,sha256=VALU5ms_JBu_eFkp2WQ7oZtJKozJ8MZSJ7h7ZA3Fz_Q,12296
|
|
50
50
|
test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
|
|
51
51
|
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -83,8 +83,10 @@ test/unit/v2/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
83
83
|
test/unit/v2/chunkers/test_chunkers.py,sha256=HSr3_lsoMw1nkDhkjO0-NOTEomRdR9oxCrSXvcMFecE,1772
|
|
84
84
|
test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
85
|
test/unit/v2/connectors/test_confluence.py,sha256=bXrn_kRb4IQdqkk4rc-P2gJAtPba7n7pNplQgfbqZDY,1047
|
|
86
|
+
test/unit/v2/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
+
test/unit/v2/connectors/databricks/test_volumes_table.py,sha256=-R_EJHqv1BseGRK9VRAZhF-2EXA64LAlhycoyIu556U,1078
|
|
86
88
|
test/unit/v2/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
-
test/unit/v2/connectors/sql/test_sql.py,sha256=
|
|
89
|
+
test/unit/v2/connectors/sql/test_sql.py,sha256=51-AKUBxw6ThO68bjenLopUUuxM88YZb2rMUV8L6YwY,2464
|
|
88
90
|
test/unit/v2/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
91
|
test/unit/v2/embedders/test_bedrock.py,sha256=sW-Vv-u3Yiw8rHPOfE5x_reywXlnozxO49rIMx6_xjo,1071
|
|
90
92
|
test/unit/v2/embedders/test_huggingface.py,sha256=mkVPym7TZkRJchwHedujgFXWdL9sVMi1W90jpmZ_vxg,1543
|
|
@@ -99,7 +101,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
99
101
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
102
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
101
103
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
102
|
-
unstructured_ingest/__version__.py,sha256=
|
|
104
|
+
unstructured_ingest/__version__.py,sha256=Y85nIpRVpjjjl2MW3ZwhLs55JjhABkZJeXfKDAbsRxM,42
|
|
103
105
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
104
106
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
105
107
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -358,9 +360,11 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
|
|
|
358
360
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
359
361
|
unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
|
|
360
362
|
unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
|
|
361
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
363
|
+
unstructured_ingest/utils/data_prep.py,sha256=X3d8Kos1zqX-HQAicF_8TB0BrstRtHrbMzu_1s7mj7M,7191
|
|
362
364
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
363
365
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
366
|
+
unstructured_ingest/utils/html.py,sha256=gORKKCkva71JBbOilYtAn_MLLCqV8VKmSjSbpwEOlno,4257
|
|
367
|
+
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
364
368
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_ZhW-FEcMC2sCNsSNwDOjo,1729
|
|
365
369
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
366
370
|
unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
@@ -389,19 +393,19 @@ unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJi
|
|
|
389
393
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
390
394
|
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
391
395
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
392
|
-
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=
|
|
396
|
+
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=9EV9863ODDv0Y5liDT3xh2yiVuFiaVVyCcnwCy6nfkM,3172
|
|
393
397
|
unstructured_ingest/v2/interfaces/uploader.py,sha256=T2oHbN-d4Px1w1oATKKYZA10aUssqytEpiaqBM92r0Q,1600
|
|
394
398
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
395
399
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
396
400
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
397
401
|
unstructured_ingest/v2/pipeline/pipeline.py,sha256=7Yg8_xwlSX6lA-oPGlTcn6KXZ9kc51zsoJxME5TiUlw,15956
|
|
398
402
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
|
-
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=
|
|
403
|
+
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
400
404
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
401
|
-
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=
|
|
405
|
+
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=iL6X0G5AvKnlfI-3XRWudlb0-6rD_PqyzA3MFmmcn6M,3199
|
|
402
406
|
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=pju7knTSbB2ll1jC9DPePRDnHlOlvEcU1-sjk6xYGGc,1211
|
|
403
407
|
unstructured_ingest/v2/pipeline/steps/index.py,sha256=uIiGZeI9pFxkwS91IldXE37UUwAopsinfUgGNL7WJaw,3555
|
|
404
|
-
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=
|
|
408
|
+
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=IJQWaOTcyFlH2bz8WbmynE5Zkd5D8ELOKTnSCnt9Wcc,3282
|
|
405
409
|
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=VR8SLUJdVva61aieVKyxUHzupTCQbQeaMA0CKu4Fx7o,2347
|
|
406
410
|
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=p2nPFGbcpivPAZO5jDogTfn0iaL5bCFsgBNMejxVbzE,1768
|
|
407
411
|
unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_vjQt5hgsy_jRCxPzxo4,2010
|
|
@@ -417,7 +421,7 @@ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XE
|
|
|
417
421
|
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
|
|
418
422
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
419
423
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
420
|
-
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=
|
|
424
|
+
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=OdoMK5ZD2HOncquj9c_Xct7bFa6kSGW3qZwfiN1LqtQ,11399
|
|
421
425
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
422
426
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
|
|
423
427
|
unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
|
|
@@ -443,22 +447,22 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2
|
|
|
443
447
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
|
|
444
448
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
|
|
445
449
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
|
|
446
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=
|
|
450
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=2KNLwDZJDhsMAUGCzktEIn4Lvb0nxLWabBOPJbgyoEE,5010
|
|
447
451
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
448
|
-
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=
|
|
452
|
+
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=0YBdOpTX5mbRLhP00lRHSMpl2-LfuRpqB1XPMJMxn04,2647
|
|
449
453
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
|
|
450
454
|
unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
|
|
451
455
|
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
452
456
|
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
|
|
453
457
|
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
|
|
454
458
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
455
|
-
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=
|
|
456
|
-
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=
|
|
457
|
-
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=
|
|
458
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
459
|
-
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=
|
|
460
|
-
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=
|
|
461
|
-
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=
|
|
459
|
+
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=fwbHYoRrN0ZRuLdLb5X-Z7nr11rMSY8VhWMhfR3ljQo,6933
|
|
460
|
+
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=VXxEfgJbW8DCOrqLW7mQkSeWqH-HczidTNIE28SgERY,5658
|
|
461
|
+
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=GflyMNCKxYRj6hgO1btyrZ4hx3lXOwbWjHViRw1LIWw,5707
|
|
462
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=0Z--cPh17W_j4jQkSe2BeeD_j0Tt147Z01gqqF58Z9A,14421
|
|
463
|
+
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=uOfm2tLc0r5U3CNkfauuwhGOhP7RJpjyBpHWMDXCk7c,6954
|
|
464
|
+
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=LcfIU-QgW5oVMF4jMUVm7HSgVcSrQamY6mgXdQuiSjc,6400
|
|
465
|
+
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=3cVwVH3fT_JEYzIbl48-NDXdbo7XWX4C4_eqTvgWIro,6150
|
|
462
466
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
463
467
|
unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=mQJ9Ex-QCfhz-BB5YWTfbPf7xGLd1i7FpjRr0ukbhNw,754
|
|
464
468
|
unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=GdAeQ8Uz-6v1C5byBHtjfevVfbzW3obScBFFLRTb0ps,3441
|
|
@@ -542,21 +546,22 @@ unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-
|
|
|
542
546
|
unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
|
|
543
547
|
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
|
|
544
548
|
unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
|
|
545
|
-
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=
|
|
546
|
-
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=
|
|
549
|
+
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=NSEZwJDHh_9kFc31LnG14iRtYF3meK2UfUlQfYnwYEQ,2059
|
|
550
|
+
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=SRNplobVKd8fSeauYbLzBNlMb3HRHhinFS281B8aYtY,8854
|
|
547
551
|
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
|
|
548
|
-
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256
|
|
552
|
+
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
|
|
549
553
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=QE-WBqrPVjCgcxR5EdVD9iTHBjgDSSSQgWYvq5N61qU,7746
|
|
550
|
-
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=
|
|
551
|
-
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=
|
|
554
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=O2XBu_E2WqNia9OUTdhTWkYo0xhoMMm6ZuanTz-0V9s,16192
|
|
555
|
+
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=PRjN_S7UQv0k4ZpSyclW1AJrsrugyxbR-GoOrHvBpks,5200
|
|
556
|
+
unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=4DckpVAXpmMTcoKrWiJbnFQQlcrwMA-GMaDsAYchTUs,9992
|
|
552
557
|
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
553
558
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
554
559
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
555
560
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
556
561
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
|
|
557
|
-
unstructured_ingest-0.4.
|
|
558
|
-
unstructured_ingest-0.4.
|
|
559
|
-
unstructured_ingest-0.4.
|
|
560
|
-
unstructured_ingest-0.4.
|
|
561
|
-
unstructured_ingest-0.4.
|
|
562
|
-
unstructured_ingest-0.4.
|
|
562
|
+
unstructured_ingest-0.4.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
563
|
+
unstructured_ingest-0.4.2.dist-info/METADATA,sha256=-3ILUK1wZ1fDgJcT22FO9ZhM_NKKHNBCLvgWBgzvVOY,8051
|
|
564
|
+
unstructured_ingest-0.4.2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
565
|
+
unstructured_ingest-0.4.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
566
|
+
unstructured_ingest-0.4.2.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
567
|
+
unstructured_ingest-0.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.0.dist-info → unstructured_ingest-0.4.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|