unstructured-ingest 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/{databricks_tests → databricks}/test_volumes_native.py +75 -19
- test/integration/connectors/sql/test_postgres.py +6 -2
- test/integration/connectors/sql/test_singlestore.py +6 -2
- test/integration/connectors/sql/test_snowflake.py +6 -2
- test/integration/connectors/sql/test_sqlite.py +6 -2
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/interfaces/file_data.py +11 -1
- unstructured_ingest/v2/processes/connectors/astradb.py +2 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +2 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +2 -2
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +1 -1
- unstructured_ingest/v2/processes/connectors/mongodb.py +3 -4
- unstructured_ingest/v2/processes/connectors/sql/sql.py +5 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/METADATA +15 -15
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/RECORD +24 -24
- /test/integration/connectors/{databricks_tests → databricks}/__init__.py +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import tempfile
|
|
4
3
|
import uuid
|
|
5
4
|
from contextlib import contextmanager
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from pathlib import Path
|
|
7
|
+
from unittest import mock
|
|
8
8
|
|
|
9
9
|
import pytest
|
|
10
10
|
from databricks.sdk import WorkspaceClient
|
|
@@ -31,11 +31,15 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes_native impor
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@dataclass
|
|
34
|
-
class
|
|
34
|
+
class BaseEnvData:
|
|
35
35
|
host: str
|
|
36
|
+
catalog: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class BasicAuthEnvData(BaseEnvData):
|
|
36
41
|
client_id: str
|
|
37
42
|
client_secret: str
|
|
38
|
-
catalog: str
|
|
39
43
|
|
|
40
44
|
def get_connection_config(self) -> DatabricksNativeVolumesConnectionConfig:
|
|
41
45
|
return DatabricksNativeVolumesConnectionConfig(
|
|
@@ -47,8 +51,21 @@ class EnvData:
|
|
|
47
51
|
)
|
|
48
52
|
|
|
49
53
|
|
|
50
|
-
|
|
51
|
-
|
|
54
|
+
@dataclass
|
|
55
|
+
class PATEnvData(BaseEnvData):
|
|
56
|
+
token: str
|
|
57
|
+
|
|
58
|
+
def get_connection_config(self) -> DatabricksNativeVolumesConnectionConfig:
|
|
59
|
+
return DatabricksNativeVolumesConnectionConfig(
|
|
60
|
+
host=self.host,
|
|
61
|
+
access_config=DatabricksNativeVolumesAccessConfig(
|
|
62
|
+
token=self.token,
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_basic_auth_env_data() -> BasicAuthEnvData:
|
|
68
|
+
return BasicAuthEnvData(
|
|
52
69
|
host=os.environ["DATABRICKS_HOST"],
|
|
53
70
|
client_id=os.environ["DATABRICKS_CLIENT_ID"],
|
|
54
71
|
client_secret=os.environ["DATABRICKS_CLIENT_SECRET"],
|
|
@@ -56,23 +73,30 @@ def get_env_data() -> EnvData:
|
|
|
56
73
|
)
|
|
57
74
|
|
|
58
75
|
|
|
76
|
+
def get_pat_env_data() -> PATEnvData:
|
|
77
|
+
return PATEnvData(
|
|
78
|
+
host=os.environ["DATABRICKS_HOST"],
|
|
79
|
+
catalog=os.environ["DATABRICKS_CATALOG"],
|
|
80
|
+
token=os.environ["DATABRICKS_PAT"],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
59
84
|
@pytest.mark.asyncio
|
|
60
85
|
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
61
86
|
@requires_env(
|
|
62
87
|
"DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
|
|
63
88
|
)
|
|
64
|
-
async def test_volumes_native_source():
|
|
65
|
-
env_data =
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tempdir_path)
|
|
89
|
+
async def test_volumes_native_source(tmp_path: Path):
|
|
90
|
+
env_data = get_basic_auth_env_data()
|
|
91
|
+
with mock.patch.dict(os.environ, clear=True):
|
|
92
|
+
indexer_config = DatabricksNativeVolumesIndexerConfig(
|
|
93
|
+
recursive=True,
|
|
94
|
+
volume="test-platform",
|
|
95
|
+
volume_path="databricks-volumes-test-input",
|
|
96
|
+
catalog=env_data.catalog,
|
|
97
|
+
)
|
|
98
|
+
connection_config = env_data.get_connection_config()
|
|
99
|
+
download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tmp_path)
|
|
76
100
|
indexer = DatabricksNativeVolumesIndexer(
|
|
77
101
|
connection_config=connection_config, index_config=indexer_config
|
|
78
102
|
)
|
|
@@ -89,12 +113,44 @@ async def test_volumes_native_source():
|
|
|
89
113
|
)
|
|
90
114
|
|
|
91
115
|
|
|
116
|
+
@pytest.mark.asyncio
|
|
117
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
118
|
+
@requires_env("DATABRICKS_HOST", "DATABRICKS_PAT", "DATABRICKS_CATALOG")
|
|
119
|
+
async def test_volumes_native_source_pat(tmp_path: Path):
|
|
120
|
+
env_data = get_pat_env_data()
|
|
121
|
+
with mock.patch.dict(os.environ, clear=True):
|
|
122
|
+
indexer_config = DatabricksNativeVolumesIndexerConfig(
|
|
123
|
+
recursive=True,
|
|
124
|
+
volume="test-platform",
|
|
125
|
+
volume_path="databricks-volumes-test-input",
|
|
126
|
+
catalog=env_data.catalog,
|
|
127
|
+
)
|
|
128
|
+
connection_config = env_data.get_connection_config()
|
|
129
|
+
download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tmp_path)
|
|
130
|
+
indexer = DatabricksNativeVolumesIndexer(
|
|
131
|
+
connection_config=connection_config, index_config=indexer_config
|
|
132
|
+
)
|
|
133
|
+
downloader = DatabricksNativeVolumesDownloader(
|
|
134
|
+
connection_config=connection_config, download_config=download_config
|
|
135
|
+
)
|
|
136
|
+
await source_connector_validation(
|
|
137
|
+
indexer=indexer,
|
|
138
|
+
downloader=downloader,
|
|
139
|
+
configs=SourceValidationConfigs(
|
|
140
|
+
test_id="databricks_volumes_native_pat",
|
|
141
|
+
expected_num_files=1,
|
|
142
|
+
),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
92
146
|
def _get_volume_path(catalog: str, volume: str, volume_path: str):
|
|
93
147
|
return f"/Volumes/{catalog}/default/{volume}/{volume_path}"
|
|
94
148
|
|
|
95
149
|
|
|
96
150
|
@contextmanager
|
|
97
|
-
def databricks_destination_context(
|
|
151
|
+
def databricks_destination_context(
|
|
152
|
+
env_data: BasicAuthEnvData, volume: str, volume_path
|
|
153
|
+
) -> WorkspaceClient:
|
|
98
154
|
client = WorkspaceClient(
|
|
99
155
|
host=env_data.host, client_id=env_data.client_id, client_secret=env_data.client_secret
|
|
100
156
|
)
|
|
@@ -137,7 +193,7 @@ def validate_upload(client: WorkspaceClient, catalog: str, volume: str, volume_p
|
|
|
137
193
|
"DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
|
|
138
194
|
)
|
|
139
195
|
async def test_volumes_native_destination(upload_file: Path):
|
|
140
|
-
env_data =
|
|
196
|
+
env_data = get_basic_auth_env_data()
|
|
141
197
|
volume_path = f"databricks-volumes-test-output-{uuid.uuid4()}"
|
|
142
198
|
file_data = FileData(
|
|
143
199
|
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
15
15
|
SourceValidationConfigs,
|
|
16
16
|
source_connector_validation,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
18
|
+
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
19
19
|
from unstructured_ingest.v2.processes.connectors.sql.postgres import (
|
|
20
20
|
CONNECTOR_TYPE,
|
|
21
21
|
PostgresAccessConfig,
|
|
@@ -119,7 +119,11 @@ def validate_destination(
|
|
|
119
119
|
async def test_postgres_destination(upload_file: Path, temp_dir: Path):
|
|
120
120
|
# the postgres destination connector doesn't leverage the file data but is required as an input,
|
|
121
121
|
# mocking it with arbitrary values to meet the base requirements:
|
|
122
|
-
mock_file_data = FileData(
|
|
122
|
+
mock_file_data = FileData(
|
|
123
|
+
identifier="mock file data",
|
|
124
|
+
connector_type=CONNECTOR_TYPE,
|
|
125
|
+
source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
|
|
126
|
+
)
|
|
123
127
|
with docker_compose_context(
|
|
124
128
|
docker_compose_path=env_setup_path / "sql" / "postgres" / "destination"
|
|
125
129
|
):
|
|
@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
15
15
|
SourceValidationConfigs,
|
|
16
16
|
source_connector_validation,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
18
|
+
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
19
19
|
from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
20
20
|
CONNECTOR_TYPE,
|
|
21
21
|
SingleStoreAccessConfig,
|
|
@@ -103,7 +103,11 @@ def validate_destination(
|
|
|
103
103
|
@pytest.mark.asyncio
|
|
104
104
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
|
|
105
105
|
async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
|
|
106
|
-
mock_file_data = FileData(
|
|
106
|
+
mock_file_data = FileData(
|
|
107
|
+
identifier="mock file data",
|
|
108
|
+
connector_type=CONNECTOR_TYPE,
|
|
109
|
+
source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
|
|
110
|
+
)
|
|
107
111
|
with docker_compose_context(
|
|
108
112
|
docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
|
|
109
113
|
):
|
|
@@ -17,7 +17,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
17
17
|
source_connector_validation,
|
|
18
18
|
)
|
|
19
19
|
from test.integration.utils import requires_env
|
|
20
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
20
|
+
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
21
21
|
from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
|
|
22
22
|
CONNECTOR_TYPE,
|
|
23
23
|
SnowflakeAccessConfig,
|
|
@@ -170,7 +170,11 @@ async def test_snowflake_destination(
|
|
|
170
170
|
):
|
|
171
171
|
# the postgres destination connector doesn't leverage the file data but is required as an input,
|
|
172
172
|
# mocking it with arbitrary values to meet the base requirements:
|
|
173
|
-
mock_file_data = FileData(
|
|
173
|
+
mock_file_data = FileData(
|
|
174
|
+
identifier="mock file data",
|
|
175
|
+
connector_type=CONNECTOR_TYPE,
|
|
176
|
+
source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
|
|
177
|
+
)
|
|
174
178
|
init_db_destination()
|
|
175
179
|
stager = SnowflakeUploadStager()
|
|
176
180
|
staged_path = stager.run(
|
|
@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
15
15
|
SourceValidationConfigs,
|
|
16
16
|
source_connector_validation,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
18
|
+
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
19
19
|
from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
|
|
20
20
|
CONNECTOR_TYPE,
|
|
21
21
|
SQLiteConnectionConfig,
|
|
@@ -116,7 +116,11 @@ async def test_sqlite_destination(
|
|
|
116
116
|
):
|
|
117
117
|
# the sqlite destination connector doesn't leverage the file data but is required as an input,
|
|
118
118
|
# mocking it with arbitrary values to meet the base requirements:
|
|
119
|
-
mock_file_data = FileData(
|
|
119
|
+
mock_file_data = FileData(
|
|
120
|
+
identifier="mock file data",
|
|
121
|
+
connector_type=CONNECTOR_TYPE,
|
|
122
|
+
source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
|
|
123
|
+
)
|
|
120
124
|
stager = SQLiteUploadStager()
|
|
121
125
|
staged_path = stager.run(
|
|
122
126
|
elements_filepath=upload_file,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.11" # pragma: no cover
|
|
@@ -36,7 +36,7 @@ class FileDataSourceMetadata(BaseModel):
|
|
|
36
36
|
class FileData(BaseModel):
|
|
37
37
|
identifier: str
|
|
38
38
|
connector_type: str
|
|
39
|
-
source_identifiers:
|
|
39
|
+
source_identifiers: SourceIdentifiers
|
|
40
40
|
metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
|
|
41
41
|
additional_metadata: dict[str, Any] = Field(default_factory=dict)
|
|
42
42
|
reprocess: bool = False
|
|
@@ -73,6 +73,7 @@ class BatchItem(BaseModel):
|
|
|
73
73
|
class BatchFileData(FileData):
|
|
74
74
|
identifier: str = Field(init=False)
|
|
75
75
|
batch_items: list[BatchItem]
|
|
76
|
+
source_identifiers: Optional[SourceIdentifiers] = None
|
|
76
77
|
|
|
77
78
|
@field_validator("batch_items")
|
|
78
79
|
@classmethod
|
|
@@ -104,3 +105,12 @@ def file_data_from_file(path: str) -> FileData:
|
|
|
104
105
|
logger.debug(f"{path} not valid for batch file data")
|
|
105
106
|
|
|
106
107
|
return FileData.from_file(path=path)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def file_data_from_dict(data: dict) -> FileData:
|
|
111
|
+
try:
|
|
112
|
+
return BatchFileData.model_validate(data)
|
|
113
|
+
except ValidationError:
|
|
114
|
+
logger.debug(f"{data} not valid for batch file data")
|
|
115
|
+
|
|
116
|
+
return FileData.model_validate(data)
|
|
@@ -30,6 +30,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
30
30
|
FileDataSourceMetadata,
|
|
31
31
|
Indexer,
|
|
32
32
|
IndexerConfig,
|
|
33
|
+
SourceIdentifiers,
|
|
33
34
|
Uploader,
|
|
34
35
|
UploaderConfig,
|
|
35
36
|
UploadStager,
|
|
@@ -267,6 +268,7 @@ class AstraDBDownloader(Downloader):
|
|
|
267
268
|
raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
|
|
268
269
|
|
|
269
270
|
# modify input file_data for download_response
|
|
271
|
+
file_data.source_identifiers = SourceIdentifiers(filename=filename, fullpath=filename)
|
|
270
272
|
cast_file_data = FileData.cast(file_data=file_data)
|
|
271
273
|
cast_file_data.identifier = filename
|
|
272
274
|
cast_file_data.metadata.date_processed = str(time())
|
|
@@ -27,6 +27,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
27
27
|
FileDataSourceMetadata,
|
|
28
28
|
Indexer,
|
|
29
29
|
IndexerConfig,
|
|
30
|
+
SourceIdentifiers,
|
|
30
31
|
Uploader,
|
|
31
32
|
UploaderConfig,
|
|
32
33
|
UploadStager,
|
|
@@ -261,6 +262,7 @@ class CouchbaseDownloader(Downloader):
|
|
|
261
262
|
exc_info=True,
|
|
262
263
|
)
|
|
263
264
|
raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
|
|
265
|
+
file_data.source_identifiers = SourceIdentifiers(filename=filename, fullpath=filename)
|
|
264
266
|
cast_file_data = FileData.cast(file_data=file_data)
|
|
265
267
|
cast_file_data.identifier = filename_id
|
|
266
268
|
cast_file_data.metadata.date_processed = str(time.time())
|
|
@@ -14,6 +14,7 @@ from unstructured_ingest.error import (
|
|
|
14
14
|
)
|
|
15
15
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
16
16
|
from unstructured_ingest.v2.interfaces import (
|
|
17
|
+
AccessConfig,
|
|
17
18
|
ConnectionConfig,
|
|
18
19
|
Downloader,
|
|
19
20
|
DownloaderConfig,
|
|
@@ -52,6 +53,10 @@ class DatabricksPathMixin(BaseModel):
|
|
|
52
53
|
return path
|
|
53
54
|
|
|
54
55
|
|
|
56
|
+
class DatabricksVolumesAccessConfig(AccessConfig):
|
|
57
|
+
token: Optional[str] = Field(default=None, description="Databricks Personal Access Token")
|
|
58
|
+
|
|
59
|
+
|
|
55
60
|
class DatabricksVolumesConnectionConfig(ConnectionConfig, ABC):
|
|
56
61
|
host: Optional[str] = Field(
|
|
57
62
|
default=None,
|
|
@@ -3,12 +3,12 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces import AccessConfig
|
|
7
6
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
7
|
DestinationRegistryEntry,
|
|
9
8
|
SourceRegistryEntry,
|
|
10
9
|
)
|
|
11
10
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
11
|
+
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
14
14
|
DatabricksVolumesDownloaderConfig,
|
|
@@ -21,7 +21,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
21
21
|
CONNECTOR_TYPE = "databricks_volumes_aws"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class DatabricksAWSVolumesAccessConfig(
|
|
24
|
+
class DatabricksAWSVolumesAccessConfig(DatabricksVolumesAccessConfig):
|
|
25
25
|
account_id: Optional[str] = Field(
|
|
26
26
|
default=None,
|
|
27
27
|
description="The Databricks account ID for the Databricks " "accounts endpoint",
|
|
@@ -3,12 +3,12 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces import AccessConfig
|
|
7
6
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
7
|
DestinationRegistryEntry,
|
|
9
8
|
SourceRegistryEntry,
|
|
10
9
|
)
|
|
11
10
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
11
|
+
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
14
14
|
DatabricksVolumesDownloaderConfig,
|
|
@@ -21,7 +21,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
21
21
|
CONNECTOR_TYPE = "databricks_volumes_azure"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class DatabricksAzureVolumesAccessConfig(
|
|
24
|
+
class DatabricksAzureVolumesAccessConfig(DatabricksVolumesAccessConfig):
|
|
25
25
|
account_id: Optional[str] = Field(
|
|
26
26
|
default=None,
|
|
27
27
|
description="The Databricks account ID for the Databricks " "accounts endpoint.",
|
|
@@ -3,12 +3,12 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces import AccessConfig
|
|
7
6
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
7
|
DestinationRegistryEntry,
|
|
9
8
|
SourceRegistryEntry,
|
|
10
9
|
)
|
|
11
10
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
11
|
+
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
14
14
|
DatabricksVolumesDownloaderConfig,
|
|
@@ -21,7 +21,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
21
21
|
CONNECTOR_TYPE = "databricks_volumes_gcp"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class DatabricksGoogleVolumesAccessConfig(
|
|
24
|
+
class DatabricksGoogleVolumesAccessConfig(DatabricksVolumesAccessConfig):
|
|
25
25
|
account_id: Optional[str] = Field(
|
|
26
26
|
default=None,
|
|
27
27
|
description="The Databricks account ID for the Databricks " "accounts endpoint.",
|
|
@@ -3,12 +3,12 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces import AccessConfig
|
|
7
6
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
7
|
DestinationRegistryEntry,
|
|
9
8
|
SourceRegistryEntry,
|
|
10
9
|
)
|
|
11
10
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
11
|
+
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
14
14
|
DatabricksVolumesDownloaderConfig,
|
|
@@ -21,7 +21,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
21
21
|
CONNECTOR_TYPE = "databricks_volumes"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class DatabricksNativeVolumesAccessConfig(
|
|
24
|
+
class DatabricksNativeVolumesAccessConfig(DatabricksVolumesAccessConfig):
|
|
25
25
|
client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
|
|
26
26
|
client_secret: Optional[str] = Field(
|
|
27
27
|
default=None, description="Client Secret of the OAuth app."
|
|
@@ -255,6 +255,7 @@ class ElasticsearchDownloader(Downloader):
|
|
|
255
255
|
exc_info=True,
|
|
256
256
|
)
|
|
257
257
|
raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
|
|
258
|
+
file_data.source_identifiers = SourceIdentifiers(filename=filename, fullpath=filename)
|
|
258
259
|
cast_file_data = FileData.cast(file_data=file_data)
|
|
259
260
|
cast_file_data.identifier = filename_id
|
|
260
261
|
cast_file_data.metadata.date_processed = str(time())
|
|
@@ -264,7 +265,6 @@ class ElasticsearchDownloader(Downloader):
|
|
|
264
265
|
"index_name": index_name,
|
|
265
266
|
"document_id": record_id,
|
|
266
267
|
}
|
|
267
|
-
cast_file_data.source_identifiers = SourceIdentifiers(filename=filename, fullpath=filename)
|
|
268
268
|
return super().generate_download_response(
|
|
269
269
|
file_data=cast_file_data,
|
|
270
270
|
download_path=download_path,
|
|
@@ -198,14 +198,13 @@ class MongoDBDownloader(Downloader):
|
|
|
198
198
|
concatenated_values = "\n".join(str(value) for value in flattened_dict.values())
|
|
199
199
|
|
|
200
200
|
# Create a FileData object for each document with source_identifiers
|
|
201
|
-
cast_file_data = FileData.cast(file_data=file_data)
|
|
202
|
-
cast_file_data.identifier = str(doc_id)
|
|
203
201
|
filename = f"{doc_id}.txt"
|
|
204
|
-
|
|
202
|
+
file_data.source_identifiers = SourceIdentifiers(
|
|
205
203
|
filename=filename,
|
|
206
204
|
fullpath=filename,
|
|
207
|
-
rel_path=filename,
|
|
208
205
|
)
|
|
206
|
+
cast_file_data = FileData.cast(file_data=file_data)
|
|
207
|
+
cast_file_data.identifier = str(doc_id)
|
|
209
208
|
|
|
210
209
|
# Determine the download path
|
|
211
210
|
download_path = self.get_download_path(file_data=cast_file_data)
|
|
@@ -28,6 +28,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
28
28
|
FileDataSourceMetadata,
|
|
29
29
|
Indexer,
|
|
30
30
|
IndexerConfig,
|
|
31
|
+
SourceIdentifiers,
|
|
31
32
|
Uploader,
|
|
32
33
|
UploaderConfig,
|
|
33
34
|
UploadStager,
|
|
@@ -218,6 +219,10 @@ class SQLDownloader(Downloader, ABC):
|
|
|
218
219
|
)
|
|
219
220
|
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
220
221
|
result.to_csv(download_path, index=False)
|
|
222
|
+
file_data.source_identifiers = SourceIdentifiers(
|
|
223
|
+
filename=filename,
|
|
224
|
+
fullpath=filename,
|
|
225
|
+
)
|
|
221
226
|
cast_file_data = FileData.cast(file_data=file_data)
|
|
222
227
|
cast_file_data.identifier = filename_id
|
|
223
228
|
return super().generate_download_response(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.11
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,14 +22,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
+
Requires-Dist: ndjson
|
|
26
|
+
Requires-Dist: pandas
|
|
27
|
+
Requires-Dist: dataclasses-json
|
|
28
|
+
Requires-Dist: pydantic>=2.7
|
|
25
29
|
Requires-Dist: opentelemetry-sdk
|
|
26
30
|
Requires-Dist: python-dateutil
|
|
27
31
|
Requires-Dist: click
|
|
28
|
-
Requires-Dist: ndjson
|
|
29
|
-
Requires-Dist: pydantic>=2.7
|
|
30
|
-
Requires-Dist: pandas
|
|
31
32
|
Requires-Dist: tqdm
|
|
32
|
-
Requires-Dist: dataclasses-json
|
|
33
33
|
Provides-Extra: airtable
|
|
34
34
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
35
35
|
Provides-Extra: astradb
|
|
@@ -42,8 +42,8 @@ Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
|
42
42
|
Provides-Extra: bedrock
|
|
43
43
|
Requires-Dist: boto3; extra == "bedrock"
|
|
44
44
|
Provides-Extra: biomed
|
|
45
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
46
45
|
Requires-Dist: requests; extra == "biomed"
|
|
46
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
47
47
|
Provides-Extra: box
|
|
48
48
|
Requires-Dist: fsspec; extra == "box"
|
|
49
49
|
Requires-Dist: boxfs; extra == "box"
|
|
@@ -61,8 +61,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
|
|
|
61
61
|
Provides-Extra: databricks-volumes
|
|
62
62
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
63
63
|
Provides-Extra: delta-table
|
|
64
|
-
Requires-Dist: deltalake; extra == "delta-table"
|
|
65
64
|
Requires-Dist: boto3; extra == "delta-table"
|
|
65
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
66
66
|
Provides-Extra: discord
|
|
67
67
|
Requires-Dist: discord-py; extra == "discord"
|
|
68
68
|
Provides-Extra: doc
|
|
@@ -70,8 +70,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
70
70
|
Provides-Extra: docx
|
|
71
71
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
72
72
|
Provides-Extra: dropbox
|
|
73
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
74
73
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
74
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
75
75
|
Provides-Extra: duckdb
|
|
76
76
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
77
77
|
Provides-Extra: elasticsearch
|
|
@@ -90,19 +90,19 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
90
90
|
Provides-Extra: epub
|
|
91
91
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
92
92
|
Provides-Extra: gcs
|
|
93
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
94
93
|
Requires-Dist: fsspec; extra == "gcs"
|
|
95
94
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
95
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
96
96
|
Provides-Extra: github
|
|
97
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
98
97
|
Requires-Dist: requests; extra == "github"
|
|
98
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
99
99
|
Provides-Extra: gitlab
|
|
100
100
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
101
101
|
Provides-Extra: google-drive
|
|
102
102
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
103
103
|
Provides-Extra: hubspot
|
|
104
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
105
104
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
105
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
106
106
|
Provides-Extra: jira
|
|
107
107
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
108
108
|
Provides-Extra: kafka
|
|
@@ -120,20 +120,20 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
120
120
|
Provides-Extra: msg
|
|
121
121
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
122
122
|
Provides-Extra: neo4j
|
|
123
|
-
Requires-Dist: cymple; extra == "neo4j"
|
|
124
123
|
Requires-Dist: neo4j; extra == "neo4j"
|
|
124
|
+
Requires-Dist: cymple; extra == "neo4j"
|
|
125
125
|
Requires-Dist: networkx; extra == "neo4j"
|
|
126
126
|
Provides-Extra: notion
|
|
127
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
128
127
|
Requires-Dist: backoff; extra == "notion"
|
|
129
128
|
Requires-Dist: notion-client; extra == "notion"
|
|
130
129
|
Requires-Dist: httpx; extra == "notion"
|
|
130
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
131
131
|
Provides-Extra: odt
|
|
132
132
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
133
133
|
Provides-Extra: onedrive
|
|
134
134
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
135
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
136
135
|
Requires-Dist: msal; extra == "onedrive"
|
|
136
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
137
137
|
Provides-Extra: openai
|
|
138
138
|
Requires-Dist: openai; extra == "openai"
|
|
139
139
|
Requires-Dist: tiktoken; extra == "openai"
|
|
@@ -165,8 +165,8 @@ Requires-Dist: unstructured[rst]; extra == "rst"
|
|
|
165
165
|
Provides-Extra: rtf
|
|
166
166
|
Requires-Dist: unstructured[rtf]; extra == "rtf"
|
|
167
167
|
Provides-Extra: s3
|
|
168
|
-
Requires-Dist: s3fs; extra == "s3"
|
|
169
168
|
Requires-Dist: fsspec; extra == "s3"
|
|
169
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
170
170
|
Provides-Extra: salesforce
|
|
171
171
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
172
172
|
Provides-Extra: sftp
|
|
@@ -19,8 +19,8 @@ test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP
|
|
|
19
19
|
test/integration/connectors/test_pinecone.py,sha256=suPFi40d6rHXurQQLIpCzW5XRTdgzlP-f-KLPhGCUHo,10208
|
|
20
20
|
test/integration/connectors/test_qdrant.py,sha256=hyuqSJDaylkQVxWh7byD8jo8bwPuBxSa8MWRD3sBu-Y,7906
|
|
21
21
|
test/integration/connectors/test_s3.py,sha256=PJaAwFRF2lXMQlkbv9JHpngPc6706ML7zowOlXT3TcY,7033
|
|
22
|
-
test/integration/connectors/
|
|
23
|
-
test/integration/connectors/
|
|
22
|
+
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
test/integration/connectors/databricks/test_volumes_native.py,sha256=ig60-nCdLF0GsgJowG9eRaG28iuoYHtuf12HdK6OE1I,7764
|
|
24
24
|
test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
|
|
26
26
|
test/integration/connectors/duckdb/test_duckdb.py,sha256=tZfHJYNILVqwT20XD-aJUFZ67TnJvHLpfAxNvNiE51o,2891
|
|
@@ -30,10 +30,10 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
|
|
|
30
30
|
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=Lm8topVssTsqcI8H2Tzohuxb9j-CFHv9orM6WfAqCZw,11933
|
|
31
31
|
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=fWpZrhzRiVpm9AOlZvgZRCjyXSYvWG7-8j06x-HR3PY,11311
|
|
32
32
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
-
test/integration/connectors/sql/test_postgres.py,sha256=
|
|
34
|
-
test/integration/connectors/sql/test_singlestore.py,sha256=
|
|
35
|
-
test/integration/connectors/sql/test_snowflake.py,sha256
|
|
36
|
-
test/integration/connectors/sql/test_sqlite.py,sha256=
|
|
33
|
+
test/integration/connectors/sql/test_postgres.py,sha256=DXyHMZBQgrV2HyVflkoBpT1mewSnvw3ugoHtGR5o8OM,6876
|
|
34
|
+
test/integration/connectors/sql/test_singlestore.py,sha256=pzCPo8IW3c9VH-f3UdJS5MjPjkHarJPSepAxV0ZVajo,6059
|
|
35
|
+
test/integration/connectors/sql/test_snowflake.py,sha256=MiTzepeeJlv147CyzCGyd16MRk5QeUw4g4L3TTi5gVY,7400
|
|
36
|
+
test/integration/connectors/sql/test_sqlite.py,sha256=rSkjv3KpslAvt_8LQecJUT0lOLtuZSvhtlW2deJovLI,5862
|
|
37
37
|
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
|
|
39
39
|
test/integration/connectors/utils/docker.py,sha256=8uOTJ3AVG1dxK4OiLvOLfRxL_TsYQX2KKCID9TZ7-Ac,4995
|
|
@@ -94,7 +94,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
94
94
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
96
96
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
97
|
-
unstructured_ingest/__version__.py,sha256=
|
|
97
|
+
unstructured_ingest/__version__.py,sha256=CJalz6YpEm8DAhzCP5dryU5ddzKQaSQOzTObKxfOVHs,43
|
|
98
98
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
99
99
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
100
100
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -379,7 +379,7 @@ unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1K
|
|
|
379
379
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=9VO09XuTvyOcFF8ZDKN169fNb_uA5TAYzPsiPHOyxhQ,963
|
|
380
380
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
381
381
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
382
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
382
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
383
383
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
384
384
|
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
385
385
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
@@ -408,18 +408,18 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcD
|
|
|
408
408
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
409
409
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=CTWLEmaKLTjbqeUQGI0fxJobsqDOc1d2ZKJoXh98Lww,5432
|
|
410
410
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
411
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
411
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
|
|
412
412
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
413
413
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=G1DQHhhFQCS2RLF0cVvoUH9QO8KkVjIyNZ9nKh__aHw,7220
|
|
414
414
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=-Y1OU_ZXhZQNj5NH3EN01CP8QKKZJaJ9xkXoAlSgnIk,7604
|
|
415
|
-
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=
|
|
415
|
+
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
416
416
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
|
|
417
417
|
unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
|
|
418
418
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=5k7pdAzJGXSdyPCzW9vu2OaAjGVTo2JevDyGaXM1Hvk,13370
|
|
419
419
|
unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
|
|
420
420
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
|
|
421
421
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=I57hyH5nz_p7utmUOkvt_6vCPxNIVQMoukplUgIyYi8,8503
|
|
422
|
-
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=
|
|
422
|
+
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
423
423
|
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=lRtWe6xWYogT-y_r_o7HWvlFMf_OIPGQq_Z-5v7IOq0,14163
|
|
424
424
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=H8qk53YJXAPrPyISze0dybZdDFv5B7dVO3fIr10dVU8,15982
|
|
425
425
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
@@ -429,17 +429,17 @@ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtl
|
|
|
429
429
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
430
430
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
431
431
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
|
|
432
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=
|
|
433
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=
|
|
434
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=
|
|
435
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=
|
|
436
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=
|
|
432
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=kI_ThB5e-DS8-GiQP5TQ8cP3fiGRm-V2AuNlGoSjH6I,6613
|
|
433
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
|
|
434
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
|
|
435
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
|
|
437
437
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
438
438
|
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=FVblIddorGCh9D9GZ8zLVUm8n39PJA5JLoJeWd-tSy8,2610
|
|
439
439
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
|
|
440
440
|
unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
|
|
441
441
|
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
442
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=
|
|
442
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
445
445
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=NWsouhaxeyxrS_WlZJ70X2YIdioFH5LSaRLhnCPYAH0,6034
|
|
@@ -470,16 +470,16 @@ unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=E16CXRBw8fZKT
|
|
|
470
470
|
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
|
|
471
471
|
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=-2E9dsdNhjAiuzeSBytBbAhljOhvQ8kN8wvlUESvLo8,5465
|
|
472
472
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=TApGi5G0W0TktJFmo4QWDR3X3R-MUQTKbIxjAX_M8ZI,7402
|
|
473
|
-
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=
|
|
473
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=HC-qnhdpuyScKoGh50pPjkQLGSac_mOAnuB2FZwSVl0,15265
|
|
474
474
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=Q5RAqn5Ccw-pbeKZLkiMn5IVw6EemCMukXzLlS7pDhc,5162
|
|
475
475
|
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
476
476
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
477
477
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
478
478
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
479
479
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
|
|
480
|
-
unstructured_ingest-0.3.
|
|
481
|
-
unstructured_ingest-0.3.
|
|
482
|
-
unstructured_ingest-0.3.
|
|
483
|
-
unstructured_ingest-0.3.
|
|
484
|
-
unstructured_ingest-0.3.
|
|
485
|
-
unstructured_ingest-0.3.
|
|
480
|
+
unstructured_ingest-0.3.11.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
481
|
+
unstructured_ingest-0.3.11.dist-info/METADATA,sha256=4-py_Sf-ahdzXF0l1evY4aI_s5Vz4oc6Gtenhegc6Vo,7623
|
|
482
|
+
unstructured_ingest-0.3.11.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
483
|
+
unstructured_ingest-0.3.11.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
484
|
+
unstructured_ingest-0.3.11.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
485
|
+
unstructured_ingest-0.3.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.11.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|