unstructured-ingest 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/sql/test_databricks_delta_tables.py +10 -10
- test/unit/v2/connectors/databricks/__init__.py +0 -0
- test/unit/v2/connectors/databricks/test_volumes_table.py +44 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/data_prep.py +1 -1
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +14 -11
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +15 -15
- unstructured_ingest/v2/processes/connectors/sql/sql.py +4 -1
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/METADATA +14 -14
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/RECORD +14 -12
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -17,11 +17,11 @@ from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
|
17
17
|
from unstructured_ingest.v2.logger import logger
|
|
18
18
|
from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
|
|
19
19
|
CONNECTOR_TYPE,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
DatabricksDeltaTablesAccessConfig,
|
|
21
|
+
DatabricksDeltaTablesConnectionConfig,
|
|
22
|
+
DatabricksDeltaTablesUploader,
|
|
23
|
+
DatabricksDeltaTablesUploaderConfig,
|
|
24
|
+
DatabricksDeltaTablesUploadStager,
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
CATALOG = "utic-dev-tech-fixtures"
|
|
@@ -112,7 +112,7 @@ async def test_databricks_delta_tables_destination(
|
|
|
112
112
|
connector_type=CONNECTOR_TYPE,
|
|
113
113
|
source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
|
|
114
114
|
)
|
|
115
|
-
stager =
|
|
115
|
+
stager = DatabricksDeltaTablesUploadStager()
|
|
116
116
|
staged_path = stager.run(
|
|
117
117
|
elements_filepath=upload_file,
|
|
118
118
|
file_data=mock_file_data,
|
|
@@ -122,15 +122,15 @@ async def test_databricks_delta_tables_destination(
|
|
|
122
122
|
|
|
123
123
|
assert staged_path.suffix == upload_file.suffix
|
|
124
124
|
|
|
125
|
-
uploader =
|
|
126
|
-
connection_config=
|
|
127
|
-
access_config=
|
|
125
|
+
uploader = DatabricksDeltaTablesUploader(
|
|
126
|
+
connection_config=DatabricksDeltaTablesConnectionConfig(
|
|
127
|
+
access_config=DatabricksDeltaTablesAccessConfig(
|
|
128
128
|
token=env_data.access_token.get_secret_value()
|
|
129
129
|
),
|
|
130
130
|
http_path=env_data.http_path,
|
|
131
131
|
server_hostname=env_data.server_hostname,
|
|
132
132
|
),
|
|
133
|
-
upload_config=
|
|
133
|
+
upload_config=DatabricksDeltaTablesUploaderConfig(
|
|
134
134
|
catalog=CATALOG, database="default", table_name=destination_table
|
|
135
135
|
),
|
|
136
136
|
)
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from pytest_mock import MockerFixture
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.v2.processes.connectors.databricks.volumes_table import (
|
|
7
|
+
DatabricksVolumeDeltaTableStager,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def stager():
|
|
13
|
+
return DatabricksVolumeDeltaTableStager()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.mark.parametrize(
|
|
17
|
+
("output_path", "called_output_path"),
|
|
18
|
+
[
|
|
19
|
+
(
|
|
20
|
+
Path("/fake/path/output"),
|
|
21
|
+
Path("/fake/path/output.json"),
|
|
22
|
+
),
|
|
23
|
+
(
|
|
24
|
+
Path("/fake/path/output.ndjson"),
|
|
25
|
+
Path("/fake/path/output.json"),
|
|
26
|
+
),
|
|
27
|
+
],
|
|
28
|
+
)
|
|
29
|
+
def test_write_output(
|
|
30
|
+
mocker: MockerFixture,
|
|
31
|
+
stager: DatabricksVolumeDeltaTableStager,
|
|
32
|
+
output_path: Path,
|
|
33
|
+
called_output_path: Path,
|
|
34
|
+
):
|
|
35
|
+
data = [{"key1": "value1", "key2": "value2"}]
|
|
36
|
+
|
|
37
|
+
mock_get_data = mocker.patch(
|
|
38
|
+
"unstructured_ingest.v2.processes.connectors.databricks.volumes_table.write_data",
|
|
39
|
+
return_value=None,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
stager.write_output(output_path, data)
|
|
43
|
+
|
|
44
|
+
mock_get_data.assert_called_once_with(path=called_output_path, data=data, indent=None)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.4.
|
|
1
|
+
__version__ = "0.4.2" # pragma: no cover
|
|
@@ -153,7 +153,7 @@ def get_data_by_suffix(path: Path) -> list[dict]:
|
|
|
153
153
|
raise ValueError(f"Unsupported file type: {path}")
|
|
154
154
|
|
|
155
155
|
|
|
156
|
-
def write_data(path: Path, data: list[dict], indent: int = 2) -> None:
|
|
156
|
+
def write_data(path: Path, data: list[dict], indent: Optional[int] = 2) -> None:
|
|
157
157
|
with path.open("w") as f:
|
|
158
158
|
if path.suffix == ".json":
|
|
159
159
|
json.dump(data, f, indent=indent, ensure_ascii=False)
|
|
@@ -3,10 +3,11 @@ import os
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Generator
|
|
6
|
+
from typing import Any, Generator
|
|
7
7
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.utils.data_prep import write_data
|
|
10
11
|
from unstructured_ingest.v2.interfaces import FileData, Uploader, UploaderConfig
|
|
11
12
|
from unstructured_ingest.v2.logger import logger
|
|
12
13
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
@@ -14,9 +15,9 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
14
15
|
)
|
|
15
16
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import DatabricksPathMixin
|
|
16
17
|
from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
DatabricksDeltaTablesConnectionConfig,
|
|
19
|
+
DatabricksDeltaTablesUploadStager,
|
|
20
|
+
DatabricksDeltaTablesUploadStagerConfig,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
CONNECTOR_TYPE = "databricks_volume_delta_tables"
|
|
@@ -28,17 +29,16 @@ class DatabricksVolumeDeltaTableUploaderConfig(UploaderConfig, DatabricksPathMix
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
@dataclass
|
|
31
|
-
class DatabricksVolumeDeltaTableStager(
|
|
32
|
-
def write_output(self, output_path: Path, data: list[dict]
|
|
32
|
+
class DatabricksVolumeDeltaTableStager(DatabricksDeltaTablesUploadStager):
|
|
33
|
+
def write_output(self, output_path: Path, data: list[dict]) -> None:
|
|
33
34
|
# To avoid new line issues when migrating from volumes into delta tables, omit indenting
|
|
34
35
|
# and always write it as a json file
|
|
35
|
-
|
|
36
|
-
json.dump(data, f)
|
|
36
|
+
write_data(path=output_path.with_suffix(".json"), data=data, indent=None)
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
@dataclass
|
|
40
40
|
class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
41
|
-
connection_config:
|
|
41
|
+
connection_config: DatabricksDeltaTablesConnectionConfig
|
|
42
42
|
upload_config: DatabricksVolumeDeltaTableUploaderConfig
|
|
43
43
|
connector_type: str = CONNECTOR_TYPE
|
|
44
44
|
|
|
@@ -78,7 +78,10 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
78
78
|
@contextmanager
|
|
79
79
|
def get_cursor(self, **connect_kwargs) -> Generator[Any, None, None]:
|
|
80
80
|
with self.connection_config.get_cursor(**connect_kwargs) as cursor:
|
|
81
|
+
logger.debug(f"executing: USE CATALOG: '{self.upload_config.catalog}'")
|
|
81
82
|
cursor.execute(f"USE CATALOG '{self.upload_config.catalog}'")
|
|
83
|
+
logger.debug(f"executing: USE DATABASE: {self.upload_config.database}")
|
|
84
|
+
cursor.execute(f"USE DATABASE {self.upload_config.database}")
|
|
82
85
|
yield cursor
|
|
83
86
|
|
|
84
87
|
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
@@ -98,9 +101,9 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
98
101
|
|
|
99
102
|
|
|
100
103
|
databricks_volumes_delta_tables_destination_entry = DestinationRegistryEntry(
|
|
101
|
-
connection_config=
|
|
104
|
+
connection_config=DatabricksDeltaTablesConnectionConfig,
|
|
102
105
|
uploader=DatabricksVolumeDeltaTableUploader,
|
|
103
106
|
uploader_config=DatabricksVolumeDeltaTableUploaderConfig,
|
|
104
107
|
upload_stager=DatabricksVolumeDeltaTableStager,
|
|
105
|
-
upload_stager_config=
|
|
108
|
+
upload_stager_config=DatabricksDeltaTablesUploadStagerConfig,
|
|
106
109
|
)
|
|
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
|
|
|
31
31
|
CONNECTOR_TYPE = "databricks_delta_tables"
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
class
|
|
34
|
+
class DatabricksDeltaTablesAccessConfig(SQLAccessConfig):
|
|
35
35
|
token: Optional[str] = Field(default=None, description="Databricks Personal Access Token")
|
|
36
36
|
client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
|
|
37
37
|
client_secret: Optional[str] = Field(
|
|
@@ -39,8 +39,8 @@ class DatabrickDeltaTablesAccessConfig(SQLAccessConfig):
|
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
43
|
-
access_config: Secret[
|
|
42
|
+
class DatabricksDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
43
|
+
access_config: Secret[DatabricksDeltaTablesAccessConfig]
|
|
44
44
|
server_hostname: str = Field(description="server hostname connection config value")
|
|
45
45
|
http_path: str = Field(description="http path connection config value")
|
|
46
46
|
user_agent: str = "unstructuredio_oss"
|
|
@@ -102,24 +102,24 @@ class DatabrickDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
|
102
102
|
yield cursor
|
|
103
103
|
|
|
104
104
|
|
|
105
|
-
class
|
|
105
|
+
class DatabricksDeltaTablesUploadStagerConfig(SQLUploadStagerConfig):
|
|
106
106
|
pass
|
|
107
107
|
|
|
108
108
|
|
|
109
|
-
class
|
|
110
|
-
upload_stager_config:
|
|
109
|
+
class DatabricksDeltaTablesUploadStager(SQLUploadStager):
|
|
110
|
+
upload_stager_config: DatabricksDeltaTablesUploadStagerConfig
|
|
111
111
|
|
|
112
112
|
|
|
113
|
-
class
|
|
113
|
+
class DatabricksDeltaTablesUploaderConfig(SQLUploaderConfig):
|
|
114
114
|
catalog: str = Field(description="Name of the catalog in the Databricks Unity Catalog service")
|
|
115
115
|
database: str = Field(description="Database name", default="default")
|
|
116
116
|
table_name: str = Field(description="Table name")
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
@dataclass
|
|
120
|
-
class
|
|
121
|
-
upload_config:
|
|
122
|
-
connection_config:
|
|
120
|
+
class DatabricksDeltaTablesUploader(SQLUploader):
|
|
121
|
+
upload_config: DatabricksDeltaTablesUploaderConfig
|
|
122
|
+
connection_config: DatabricksDeltaTablesConnectionConfig
|
|
123
123
|
connector_type: str = CONNECTOR_TYPE
|
|
124
124
|
|
|
125
125
|
@contextmanager
|
|
@@ -205,9 +205,9 @@ class DatabrickDeltaTablesUploader(SQLUploader):
|
|
|
205
205
|
|
|
206
206
|
|
|
207
207
|
databricks_delta_tables_destination_entry = DestinationRegistryEntry(
|
|
208
|
-
connection_config=
|
|
209
|
-
uploader=
|
|
210
|
-
uploader_config=
|
|
211
|
-
upload_stager=
|
|
212
|
-
upload_stager_config=
|
|
208
|
+
connection_config=DatabricksDeltaTablesConnectionConfig,
|
|
209
|
+
uploader=DatabricksDeltaTablesUploader,
|
|
210
|
+
uploader_config=DatabricksDeltaTablesUploaderConfig,
|
|
211
|
+
upload_stager=DatabricksDeltaTablesUploadStager,
|
|
212
|
+
upload_stager_config=DatabricksDeltaTablesUploadStagerConfig,
|
|
213
213
|
)
|
|
@@ -292,6 +292,9 @@ class SQLUploadStager(UploadStager):
|
|
|
292
292
|
df[column] = df[column].apply(str)
|
|
293
293
|
return df
|
|
294
294
|
|
|
295
|
+
def write_output(self, output_path: Path, data: list[dict]) -> None:
|
|
296
|
+
write_data(path=output_path, data=data)
|
|
297
|
+
|
|
295
298
|
def run(
|
|
296
299
|
self,
|
|
297
300
|
elements_filepath: Path,
|
|
@@ -314,7 +317,7 @@ class SQLUploadStager(UploadStager):
|
|
|
314
317
|
output_filename = f"{Path(output_filename).stem}{output_filename_suffix}"
|
|
315
318
|
output_path = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
|
|
316
319
|
|
|
317
|
-
|
|
320
|
+
self.write_output(output_path=output_path, data=df.to_dict(orient="records"))
|
|
318
321
|
return output_path
|
|
319
322
|
|
|
320
323
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: tqdm
|
|
26
25
|
Requires-Dist: pydantic>=2.7
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
26
|
+
Requires-Dist: click
|
|
27
|
+
Requires-Dist: tqdm
|
|
29
28
|
Requires-Dist: dataclasses-json
|
|
29
|
+
Requires-Dist: pandas
|
|
30
30
|
Requires-Dist: opentelemetry-sdk
|
|
31
|
-
Requires-Dist:
|
|
31
|
+
Requires-Dist: python-dateutil
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
@@ -92,9 +92,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
92
92
|
Provides-Extra: epub
|
|
93
93
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
94
94
|
Provides-Extra: gcs
|
|
95
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
96
95
|
Requires-Dist: bs4; extra == "gcs"
|
|
97
96
|
Requires-Dist: fsspec; extra == "gcs"
|
|
97
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
98
98
|
Provides-Extra: github
|
|
99
99
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
100
100
|
Requires-Dist: requests; extra == "github"
|
|
@@ -122,19 +122,19 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
122
122
|
Provides-Extra: msg
|
|
123
123
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
124
124
|
Provides-Extra: neo4j
|
|
125
|
-
Requires-Dist: neo4j; extra == "neo4j"
|
|
126
125
|
Requires-Dist: networkx; extra == "neo4j"
|
|
127
126
|
Requires-Dist: cymple; extra == "neo4j"
|
|
127
|
+
Requires-Dist: neo4j; extra == "neo4j"
|
|
128
128
|
Provides-Extra: notion
|
|
129
|
-
Requires-Dist: httpx; extra == "notion"
|
|
130
|
-
Requires-Dist: backoff; extra == "notion"
|
|
131
129
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
130
|
+
Requires-Dist: backoff; extra == "notion"
|
|
132
131
|
Requires-Dist: notion-client; extra == "notion"
|
|
132
|
+
Requires-Dist: httpx; extra == "notion"
|
|
133
133
|
Provides-Extra: odt
|
|
134
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
137
136
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
138
138
|
Requires-Dist: msal; extra == "onedrive"
|
|
139
139
|
Provides-Extra: openai
|
|
140
140
|
Requires-Dist: openai; extra == "openai"
|
|
@@ -184,20 +184,20 @@ Requires-Dist: singlestoredb; extra == "singlestore"
|
|
|
184
184
|
Provides-Extra: slack
|
|
185
185
|
Requires-Dist: slack-sdk[optional]; extra == "slack"
|
|
186
186
|
Provides-Extra: snowflake
|
|
187
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
188
187
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
188
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
189
189
|
Provides-Extra: togetherai
|
|
190
190
|
Requires-Dist: together; extra == "togetherai"
|
|
191
191
|
Provides-Extra: tsv
|
|
192
192
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
193
193
|
Provides-Extra: vastdb
|
|
194
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
194
195
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
195
196
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
196
|
-
Requires-Dist: ibis; extra == "vastdb"
|
|
197
197
|
Provides-Extra: vectara
|
|
198
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
199
|
-
Requires-Dist: requests; extra == "vectara"
|
|
200
198
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
199
|
+
Requires-Dist: requests; extra == "vectara"
|
|
200
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
201
201
|
Provides-Extra: weaviate
|
|
202
202
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
203
203
|
Provides-Extra: wikipedia
|
|
@@ -34,7 +34,7 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
|
|
|
34
34
|
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=TsSEPsyaTUoEvFBadinrdM0b5C4FoUtEwCv24OUbpO8,12072
|
|
35
35
|
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=7b7z0GqoBsBqA3IK35N6axmwEMjzJ1l3Fg2WT2c7uqs,11450
|
|
36
36
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=
|
|
37
|
+
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=aC8B7peVYR8L2QaR18arqR6ffA197IsVkM2quCOVNSo,5046
|
|
38
38
|
test/integration/connectors/sql/test_postgres.py,sha256=bGDyzLRpgrXO7nl0U8nF2zSNr6ykUG-w8T4daIqUCG4,6970
|
|
39
39
|
test/integration/connectors/sql/test_singlestore.py,sha256=XeU2s4Kt_3tGyaDYYKTgYjdOyb8j2dnz4TgSMwFUjWs,6153
|
|
40
40
|
test/integration/connectors/sql/test_snowflake.py,sha256=LEwsRDoC6-rRiwYsqeo5B9Eo6RYygLLGAUsrtrgI9pM,7494
|
|
@@ -83,6 +83,8 @@ test/unit/v2/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
83
83
|
test/unit/v2/chunkers/test_chunkers.py,sha256=HSr3_lsoMw1nkDhkjO0-NOTEomRdR9oxCrSXvcMFecE,1772
|
|
84
84
|
test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
85
|
test/unit/v2/connectors/test_confluence.py,sha256=bXrn_kRb4IQdqkk4rc-P2gJAtPba7n7pNplQgfbqZDY,1047
|
|
86
|
+
test/unit/v2/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
+
test/unit/v2/connectors/databricks/test_volumes_table.py,sha256=-R_EJHqv1BseGRK9VRAZhF-2EXA64LAlhycoyIu556U,1078
|
|
86
88
|
test/unit/v2/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
89
|
test/unit/v2/connectors/sql/test_sql.py,sha256=51-AKUBxw6ThO68bjenLopUUuxM88YZb2rMUV8L6YwY,2464
|
|
88
90
|
test/unit/v2/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -99,7 +101,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
99
101
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
102
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
101
103
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
102
|
-
unstructured_ingest/__version__.py,sha256=
|
|
104
|
+
unstructured_ingest/__version__.py,sha256=Y85nIpRVpjjjl2MW3ZwhLs55JjhABkZJeXfKDAbsRxM,42
|
|
103
105
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
104
106
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
105
107
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -358,7 +360,7 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
|
|
|
358
360
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
359
361
|
unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
|
|
360
362
|
unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
|
|
361
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
363
|
+
unstructured_ingest/utils/data_prep.py,sha256=X3d8Kos1zqX-HQAicF_8TB0BrstRtHrbMzu_1s7mj7M,7191
|
|
362
364
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
363
365
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
364
366
|
unstructured_ingest/utils/html.py,sha256=gORKKCkva71JBbOilYtAn_MLLCqV8VKmSjSbpwEOlno,4257
|
|
@@ -445,7 +447,7 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2
|
|
|
445
447
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
|
|
446
448
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
|
|
447
449
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
|
|
448
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=
|
|
450
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=2KNLwDZJDhsMAUGCzktEIn4Lvb0nxLWabBOPJbgyoEE,5010
|
|
449
451
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
450
452
|
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=0YBdOpTX5mbRLhP00lRHSMpl2-LfuRpqB1XPMJMxn04,2647
|
|
451
453
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
|
|
@@ -545,11 +547,11 @@ unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ
|
|
|
545
547
|
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
|
|
546
548
|
unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
|
|
547
549
|
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=NSEZwJDHh_9kFc31LnG14iRtYF3meK2UfUlQfYnwYEQ,2059
|
|
548
|
-
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=
|
|
550
|
+
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=SRNplobVKd8fSeauYbLzBNlMb3HRHhinFS281B8aYtY,8854
|
|
549
551
|
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
|
|
550
552
|
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
|
|
551
553
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=QE-WBqrPVjCgcxR5EdVD9iTHBjgDSSSQgWYvq5N61qU,7746
|
|
552
|
-
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=
|
|
554
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=O2XBu_E2WqNia9OUTdhTWkYo0xhoMMm6ZuanTz-0V9s,16192
|
|
553
555
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=PRjN_S7UQv0k4ZpSyclW1AJrsrugyxbR-GoOrHvBpks,5200
|
|
554
556
|
unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=4DckpVAXpmMTcoKrWiJbnFQQlcrwMA-GMaDsAYchTUs,9992
|
|
555
557
|
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
@@ -557,9 +559,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
557
559
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
558
560
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
559
561
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
|
|
560
|
-
unstructured_ingest-0.4.
|
|
561
|
-
unstructured_ingest-0.4.
|
|
562
|
-
unstructured_ingest-0.4.
|
|
563
|
-
unstructured_ingest-0.4.
|
|
564
|
-
unstructured_ingest-0.4.
|
|
565
|
-
unstructured_ingest-0.4.
|
|
562
|
+
unstructured_ingest-0.4.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
563
|
+
unstructured_ingest-0.4.2.dist-info/METADATA,sha256=-3ILUK1wZ1fDgJcT22FO9ZhM_NKKHNBCLvgWBgzvVOY,8051
|
|
564
|
+
unstructured_ingest-0.4.2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
565
|
+
unstructured_ingest-0.4.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
566
|
+
unstructured_ingest-0.4.2.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
567
|
+
unstructured_ingest-0.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.1.dist-info → unstructured_ingest-0.4.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|