unstructured-ingest 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +10 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +8 -3
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +93 -59
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/METADATA +95 -95
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/RECORD +10 -11
- test/unit/v2/connectors/databricks/__init__.py +0 -0
- test/unit/v2/connectors/databricks/test_volumes_table.py +0 -44
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.6.
|
|
1
|
+
__version__ = "0.6.2" # pragma: no cover
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
6
6
|
from uuid import NAMESPACE_DNS, uuid5
|
|
7
7
|
|
|
8
|
-
from pydantic import BaseModel, Field
|
|
8
|
+
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
11
|
from unstructured_ingest.v2.errors import (
|
|
@@ -61,12 +61,14 @@ class DatabricksVolumesAccessConfig(AccessConfig):
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class DatabricksVolumesConnectionConfig(ConnectionConfig, ABC):
|
|
64
|
+
access_config: Secret[DatabricksVolumesAccessConfig]
|
|
64
65
|
host: Optional[str] = Field(
|
|
65
66
|
default=None,
|
|
66
67
|
description="The Databricks host URL for either the "
|
|
67
68
|
"Databricks workspace endpoint or the "
|
|
68
69
|
"Databricks accounts endpoint.",
|
|
69
70
|
)
|
|
71
|
+
user_agent: str = "unstructuredio_oss"
|
|
70
72
|
|
|
71
73
|
def wrap_error(self, e: Exception) -> Exception:
|
|
72
74
|
from databricks.sdk.errors.base import DatabricksError
|
|
@@ -94,11 +96,14 @@ class DatabricksVolumesConnectionConfig(ConnectionConfig, ABC):
|
|
|
94
96
|
@requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
|
|
95
97
|
def get_client(self) -> "WorkspaceClient":
|
|
96
98
|
from databricks.sdk import WorkspaceClient
|
|
99
|
+
from databricks.sdk.core import Config
|
|
97
100
|
|
|
98
|
-
|
|
101
|
+
config = Config(
|
|
99
102
|
host=self.host,
|
|
100
103
|
**self.access_config.get_secret_value().model_dump(),
|
|
101
|
-
)
|
|
104
|
+
).with_user_agent_extra("PyDatabricksSdk", self.user_agent)
|
|
105
|
+
|
|
106
|
+
return WorkspaceClient(config=config)
|
|
102
107
|
|
|
103
108
|
|
|
104
109
|
class DatabricksVolumesIndexerConfig(IndexerConfig, DatabricksPathMixin):
|
|
@@ -1,14 +1,20 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
|
-
import tempfile
|
|
3
3
|
from contextlib import contextmanager
|
|
4
|
-
from dataclasses import dataclass
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
7
7
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
|
-
from unstructured_ingest.utils.data_prep import
|
|
11
|
-
from unstructured_ingest.v2.
|
|
10
|
+
from unstructured_ingest.utils.data_prep import get_json_data, write_data
|
|
11
|
+
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
12
|
+
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
Uploader,
|
|
14
|
+
UploaderConfig,
|
|
15
|
+
UploadStager,
|
|
16
|
+
UploadStagerConfig,
|
|
17
|
+
)
|
|
12
18
|
from unstructured_ingest.v2.logger import logger
|
|
13
19
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
14
20
|
DestinationRegistryEntry,
|
|
@@ -16,28 +22,50 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
16
22
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes import DatabricksPathMixin
|
|
17
23
|
from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
|
|
18
24
|
DatabricksDeltaTablesConnectionConfig,
|
|
19
|
-
DatabricksDeltaTablesUploadStager,
|
|
20
25
|
DatabricksDeltaTablesUploadStagerConfig,
|
|
21
26
|
)
|
|
22
27
|
from unstructured_ingest.v2.types.file_data import FileData
|
|
28
|
+
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
23
29
|
|
|
24
30
|
CONNECTOR_TYPE = "databricks_volume_delta_tables"
|
|
25
31
|
|
|
26
32
|
if TYPE_CHECKING:
|
|
27
|
-
|
|
33
|
+
pass
|
|
28
34
|
|
|
29
35
|
|
|
30
36
|
class DatabricksVolumeDeltaTableUploaderConfig(UploaderConfig, DatabricksPathMixin):
|
|
31
37
|
database: str = Field(description="Database name", default="default")
|
|
32
|
-
table_name: str = Field(description="Table name")
|
|
38
|
+
table_name: Optional[str] = Field(description="Table name", default=None)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DatabricksVolumeDeltaTableStagerConfig(UploadStagerConfig):
|
|
42
|
+
pass
|
|
33
43
|
|
|
34
44
|
|
|
35
45
|
@dataclass
|
|
36
|
-
class DatabricksVolumeDeltaTableStager(
|
|
37
|
-
|
|
46
|
+
class DatabricksVolumeDeltaTableStager(UploadStager):
|
|
47
|
+
upload_stager_config: DatabricksVolumeDeltaTableStagerConfig = field(
|
|
48
|
+
default_factory=DatabricksVolumeDeltaTableStagerConfig
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def run(
|
|
52
|
+
self,
|
|
53
|
+
elements_filepath: Path,
|
|
54
|
+
output_dir: Path,
|
|
55
|
+
output_filename: str,
|
|
56
|
+
file_data: FileData,
|
|
57
|
+
**kwargs: Any,
|
|
58
|
+
) -> Path:
|
|
38
59
|
# To avoid new line issues when migrating from volumes into delta tables, omit indenting
|
|
39
60
|
# and always write it as a json file
|
|
61
|
+
output_dir.mkdir(exist_ok=True, parents=True)
|
|
62
|
+
output_path = output_dir / output_filename
|
|
40
63
|
final_output_path = output_path.with_suffix(".json")
|
|
64
|
+
data = get_json_data(path=elements_filepath)
|
|
65
|
+
for element in data:
|
|
66
|
+
element["id"] = get_enhanced_element_id(element_dict=element, file_data=file_data)
|
|
67
|
+
element[RECORD_ID_LABEL] = file_data.identifier
|
|
68
|
+
element["metadata"] = json.dumps(element.get("metadata", {}))
|
|
41
69
|
write_data(path=final_output_path, data=data, indent=None)
|
|
42
70
|
return final_output_path
|
|
43
71
|
|
|
@@ -49,6 +77,29 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
49
77
|
connector_type: str = CONNECTOR_TYPE
|
|
50
78
|
_columns: Optional[dict[str, str]] = None
|
|
51
79
|
|
|
80
|
+
def init(self, **kwargs: Any) -> None:
|
|
81
|
+
self.create_destination(**kwargs)
|
|
82
|
+
|
|
83
|
+
def create_destination(
|
|
84
|
+
self, destination_name: str = "unstructuredautocreated", **kwargs: Any
|
|
85
|
+
) -> bool:
|
|
86
|
+
table_name = self.upload_config.table_name or destination_name
|
|
87
|
+
self.upload_config.table_name = table_name
|
|
88
|
+
connectors_dir = Path(__file__).parents[1]
|
|
89
|
+
collection_config_file = connectors_dir / "assets" / "databricks_delta_table_schema.sql"
|
|
90
|
+
with self.get_cursor() as cursor:
|
|
91
|
+
cursor.execute("SHOW TABLES")
|
|
92
|
+
table_names = [r[1] for r in cursor.fetchall()]
|
|
93
|
+
if table_name in table_names:
|
|
94
|
+
return False
|
|
95
|
+
with collection_config_file.open() as schema_file:
|
|
96
|
+
data_lines = schema_file.readlines()
|
|
97
|
+
data_lines[0] = data_lines[0].replace("elements", table_name)
|
|
98
|
+
destination_schema = "".join([line.strip() for line in data_lines])
|
|
99
|
+
logger.info(f"creating table {table_name} for user")
|
|
100
|
+
cursor.execute(destination_schema)
|
|
101
|
+
return True
|
|
102
|
+
|
|
52
103
|
def precheck(self) -> None:
|
|
53
104
|
with self.connection_config.get_cursor() as cursor:
|
|
54
105
|
cursor.execute("SHOW CATALOGS")
|
|
@@ -68,14 +119,6 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
68
119
|
self.upload_config.database, ", ".join(databases)
|
|
69
120
|
)
|
|
70
121
|
)
|
|
71
|
-
cursor.execute(f"SHOW TABLES IN {self.upload_config.database}")
|
|
72
|
-
table_names = [r[1] for r in cursor.fetchall()]
|
|
73
|
-
if self.upload_config.table_name not in table_names:
|
|
74
|
-
raise ValueError(
|
|
75
|
-
"Table {} not found in {}".format(
|
|
76
|
-
self.upload_config.table_name, ", ".join(table_names)
|
|
77
|
-
)
|
|
78
|
-
)
|
|
79
122
|
|
|
80
123
|
def get_output_path(self, file_data: FileData, suffix: str = ".json") -> str:
|
|
81
124
|
filename = Path(file_data.source_identifiers.filename)
|
|
@@ -98,51 +141,42 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
98
141
|
self._columns = {desc[0]: desc[1] for desc in cursor.description}
|
|
99
142
|
return self._columns
|
|
100
143
|
|
|
101
|
-
def
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
f"{
|
|
114
|
-
)
|
|
115
|
-
if missing_columns and add_missing_columns:
|
|
116
|
-
logger.info(
|
|
117
|
-
"Following null filled columns will be added to match the table's schema:"
|
|
118
|
-
f" {', '.join(missing_columns)} "
|
|
144
|
+
def can_delete(self) -> bool:
|
|
145
|
+
existing_columns = self.get_table_columns()
|
|
146
|
+
return RECORD_ID_LABEL in existing_columns
|
|
147
|
+
|
|
148
|
+
def delete_previous_content(self, file_data: FileData) -> None:
|
|
149
|
+
logger.debug(
|
|
150
|
+
f"deleting any content with metadata "
|
|
151
|
+
f"{RECORD_ID_LABEL}={file_data.identifier} "
|
|
152
|
+
f"from delta table: {self.upload_config.table_name}"
|
|
153
|
+
)
|
|
154
|
+
with self.get_cursor() as cursor:
|
|
155
|
+
cursor.execute(
|
|
156
|
+
f"DELETE FROM {self.upload_config.table_name} WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
|
|
119
157
|
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
if add_missing_columns:
|
|
124
|
-
for column in missing_columns:
|
|
125
|
-
df[column] = pd.Series()
|
|
126
|
-
return df
|
|
158
|
+
results = cursor.fetchall()
|
|
159
|
+
deleted_rows = results[0][0]
|
|
160
|
+
logger.debug(f"deleted {deleted_rows} rows from table {self.upload_config.table_name}")
|
|
127
161
|
|
|
128
162
|
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
163
|
+
if self.can_delete():
|
|
164
|
+
self.delete_previous_content(file_data=file_data)
|
|
165
|
+
with self.get_cursor(staging_allowed_local_path=path.parent.as_posix()) as cursor:
|
|
166
|
+
catalog_path = self.get_output_path(file_data=file_data)
|
|
167
|
+
logger.debug(f"uploading {path.as_posix()} to {catalog_path}")
|
|
168
|
+
cursor.execute(f"PUT '{path.as_posix()}' INTO '{catalog_path}' OVERWRITE")
|
|
169
|
+
logger.debug(
|
|
170
|
+
f"migrating content from {catalog_path} to "
|
|
171
|
+
f"table {self.upload_config.table_name}"
|
|
172
|
+
)
|
|
173
|
+
data = get_json_data(path=path)
|
|
174
|
+
columns = data[0].keys()
|
|
175
|
+
select_columns = ["PARSE_JSON(metadata)" if c == "metadata" else c for c in columns]
|
|
176
|
+
column_str = ", ".join(columns)
|
|
177
|
+
select_column_str = ", ".join(select_columns)
|
|
178
|
+
sql_statment = f"INSERT INTO `{self.upload_config.table_name}` ({column_str}) SELECT {select_column_str} FROM json.`{catalog_path}`" # noqa: E501
|
|
179
|
+
cursor.execute(sql_statment)
|
|
146
180
|
|
|
147
181
|
|
|
148
182
|
databricks_volumes_delta_tables_destination_entry = DestinationRegistryEntry(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -23,347 +23,347 @@ Requires-Python: >=3.9.0,<3.14
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: click
|
|
26
|
-
Requires-Dist: tqdm
|
|
27
|
-
Requires-Dist: opentelemetry-sdk
|
|
28
26
|
Requires-Dist: python-dateutil
|
|
29
27
|
Requires-Dist: dataclasses_json
|
|
30
28
|
Requires-Dist: pydantic>=2.7
|
|
31
|
-
Requires-Dist:
|
|
29
|
+
Requires-Dist: tqdm
|
|
30
|
+
Requires-Dist: opentelemetry-sdk
|
|
32
31
|
Requires-Dist: pandas
|
|
32
|
+
Requires-Dist: numpy
|
|
33
33
|
Provides-Extra: remote
|
|
34
34
|
Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
|
|
35
|
-
Requires-Dist: numpy; extra == "remote"
|
|
36
35
|
Requires-Dist: pandas; extra == "remote"
|
|
36
|
+
Requires-Dist: numpy; extra == "remote"
|
|
37
37
|
Provides-Extra: csv
|
|
38
38
|
Requires-Dist: unstructured[tsv]; extra == "csv"
|
|
39
|
-
Requires-Dist: numpy; extra == "csv"
|
|
40
39
|
Requires-Dist: pandas; extra == "csv"
|
|
40
|
+
Requires-Dist: numpy; extra == "csv"
|
|
41
41
|
Provides-Extra: doc
|
|
42
42
|
Requires-Dist: unstructured[docx]; extra == "doc"
|
|
43
|
-
Requires-Dist: numpy; extra == "doc"
|
|
44
43
|
Requires-Dist: pandas; extra == "doc"
|
|
44
|
+
Requires-Dist: numpy; extra == "doc"
|
|
45
45
|
Provides-Extra: docx
|
|
46
46
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
47
|
-
Requires-Dist: numpy; extra == "docx"
|
|
48
47
|
Requires-Dist: pandas; extra == "docx"
|
|
48
|
+
Requires-Dist: numpy; extra == "docx"
|
|
49
49
|
Provides-Extra: epub
|
|
50
50
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
51
|
-
Requires-Dist: numpy; extra == "epub"
|
|
52
51
|
Requires-Dist: pandas; extra == "epub"
|
|
52
|
+
Requires-Dist: numpy; extra == "epub"
|
|
53
53
|
Provides-Extra: md
|
|
54
54
|
Requires-Dist: unstructured[md]; extra == "md"
|
|
55
|
-
Requires-Dist: numpy; extra == "md"
|
|
56
55
|
Requires-Dist: pandas; extra == "md"
|
|
56
|
+
Requires-Dist: numpy; extra == "md"
|
|
57
57
|
Provides-Extra: msg
|
|
58
58
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
59
|
-
Requires-Dist: numpy; extra == "msg"
|
|
60
59
|
Requires-Dist: pandas; extra == "msg"
|
|
60
|
+
Requires-Dist: numpy; extra == "msg"
|
|
61
61
|
Provides-Extra: odt
|
|
62
62
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
63
|
-
Requires-Dist: numpy; extra == "odt"
|
|
64
63
|
Requires-Dist: pandas; extra == "odt"
|
|
64
|
+
Requires-Dist: numpy; extra == "odt"
|
|
65
65
|
Provides-Extra: org
|
|
66
66
|
Requires-Dist: unstructured[org]; extra == "org"
|
|
67
|
-
Requires-Dist: numpy; extra == "org"
|
|
68
67
|
Requires-Dist: pandas; extra == "org"
|
|
68
|
+
Requires-Dist: numpy; extra == "org"
|
|
69
69
|
Provides-Extra: pdf
|
|
70
70
|
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
71
|
-
Requires-Dist: numpy; extra == "pdf"
|
|
72
71
|
Requires-Dist: pandas; extra == "pdf"
|
|
72
|
+
Requires-Dist: numpy; extra == "pdf"
|
|
73
73
|
Provides-Extra: ppt
|
|
74
74
|
Requires-Dist: unstructured[pptx]; extra == "ppt"
|
|
75
|
-
Requires-Dist: numpy; extra == "ppt"
|
|
76
75
|
Requires-Dist: pandas; extra == "ppt"
|
|
76
|
+
Requires-Dist: numpy; extra == "ppt"
|
|
77
77
|
Provides-Extra: pptx
|
|
78
78
|
Requires-Dist: unstructured[pptx]; extra == "pptx"
|
|
79
|
-
Requires-Dist: numpy; extra == "pptx"
|
|
80
79
|
Requires-Dist: pandas; extra == "pptx"
|
|
80
|
+
Requires-Dist: numpy; extra == "pptx"
|
|
81
81
|
Provides-Extra: rtf
|
|
82
82
|
Requires-Dist: unstructured[rtf]; extra == "rtf"
|
|
83
|
-
Requires-Dist: numpy; extra == "rtf"
|
|
84
83
|
Requires-Dist: pandas; extra == "rtf"
|
|
84
|
+
Requires-Dist: numpy; extra == "rtf"
|
|
85
85
|
Provides-Extra: rst
|
|
86
86
|
Requires-Dist: unstructured[rst]; extra == "rst"
|
|
87
|
-
Requires-Dist: numpy; extra == "rst"
|
|
88
87
|
Requires-Dist: pandas; extra == "rst"
|
|
88
|
+
Requires-Dist: numpy; extra == "rst"
|
|
89
89
|
Provides-Extra: tsv
|
|
90
90
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
91
|
-
Requires-Dist: numpy; extra == "tsv"
|
|
92
91
|
Requires-Dist: pandas; extra == "tsv"
|
|
92
|
+
Requires-Dist: numpy; extra == "tsv"
|
|
93
93
|
Provides-Extra: xlsx
|
|
94
94
|
Requires-Dist: unstructured[xlsx]; extra == "xlsx"
|
|
95
|
-
Requires-Dist: numpy; extra == "xlsx"
|
|
96
95
|
Requires-Dist: pandas; extra == "xlsx"
|
|
96
|
+
Requires-Dist: numpy; extra == "xlsx"
|
|
97
97
|
Provides-Extra: airtable
|
|
98
98
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
99
|
-
Requires-Dist: numpy; extra == "airtable"
|
|
100
99
|
Requires-Dist: pandas; extra == "airtable"
|
|
100
|
+
Requires-Dist: numpy; extra == "airtable"
|
|
101
101
|
Provides-Extra: astradb
|
|
102
102
|
Requires-Dist: astrapy; extra == "astradb"
|
|
103
|
-
Requires-Dist: numpy; extra == "astradb"
|
|
104
103
|
Requires-Dist: pandas; extra == "astradb"
|
|
104
|
+
Requires-Dist: numpy; extra == "astradb"
|
|
105
105
|
Provides-Extra: azure
|
|
106
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
107
106
|
Requires-Dist: fsspec; extra == "azure"
|
|
108
|
-
Requires-Dist:
|
|
107
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
109
108
|
Requires-Dist: pandas; extra == "azure"
|
|
109
|
+
Requires-Dist: numpy; extra == "azure"
|
|
110
110
|
Provides-Extra: azure-ai-search
|
|
111
111
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
112
|
-
Requires-Dist: numpy; extra == "azure-ai-search"
|
|
113
112
|
Requires-Dist: pandas; extra == "azure-ai-search"
|
|
113
|
+
Requires-Dist: numpy; extra == "azure-ai-search"
|
|
114
114
|
Provides-Extra: biomed
|
|
115
115
|
Requires-Dist: requests; extra == "biomed"
|
|
116
116
|
Requires-Dist: bs4; extra == "biomed"
|
|
117
|
-
Requires-Dist: numpy; extra == "biomed"
|
|
118
117
|
Requires-Dist: pandas; extra == "biomed"
|
|
118
|
+
Requires-Dist: numpy; extra == "biomed"
|
|
119
119
|
Provides-Extra: box
|
|
120
120
|
Requires-Dist: fsspec; extra == "box"
|
|
121
121
|
Requires-Dist: boxfs; extra == "box"
|
|
122
|
-
Requires-Dist: numpy; extra == "box"
|
|
123
122
|
Requires-Dist: pandas; extra == "box"
|
|
123
|
+
Requires-Dist: numpy; extra == "box"
|
|
124
124
|
Provides-Extra: chroma
|
|
125
125
|
Requires-Dist: chromadb; extra == "chroma"
|
|
126
|
-
Requires-Dist: numpy; extra == "chroma"
|
|
127
126
|
Requires-Dist: pandas; extra == "chroma"
|
|
127
|
+
Requires-Dist: numpy; extra == "chroma"
|
|
128
128
|
Provides-Extra: clarifai
|
|
129
129
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
130
|
-
Requires-Dist: numpy; extra == "clarifai"
|
|
131
130
|
Requires-Dist: pandas; extra == "clarifai"
|
|
131
|
+
Requires-Dist: numpy; extra == "clarifai"
|
|
132
132
|
Provides-Extra: confluence
|
|
133
|
-
Requires-Dist: requests; extra == "confluence"
|
|
134
133
|
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
135
|
-
Requires-Dist:
|
|
134
|
+
Requires-Dist: requests; extra == "confluence"
|
|
136
135
|
Requires-Dist: pandas; extra == "confluence"
|
|
136
|
+
Requires-Dist: numpy; extra == "confluence"
|
|
137
137
|
Provides-Extra: couchbase
|
|
138
138
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
139
|
-
Requires-Dist: numpy; extra == "couchbase"
|
|
140
139
|
Requires-Dist: pandas; extra == "couchbase"
|
|
140
|
+
Requires-Dist: numpy; extra == "couchbase"
|
|
141
141
|
Provides-Extra: delta-table
|
|
142
142
|
Requires-Dist: boto3; extra == "delta-table"
|
|
143
143
|
Requires-Dist: deltalake; extra == "delta-table"
|
|
144
|
-
Requires-Dist: numpy; extra == "delta-table"
|
|
145
144
|
Requires-Dist: pandas; extra == "delta-table"
|
|
145
|
+
Requires-Dist: numpy; extra == "delta-table"
|
|
146
146
|
Provides-Extra: discord
|
|
147
147
|
Requires-Dist: discord.py; extra == "discord"
|
|
148
|
-
Requires-Dist: numpy; extra == "discord"
|
|
149
148
|
Requires-Dist: pandas; extra == "discord"
|
|
149
|
+
Requires-Dist: numpy; extra == "discord"
|
|
150
150
|
Provides-Extra: dropbox
|
|
151
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
152
151
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
153
|
-
Requires-Dist:
|
|
152
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
154
153
|
Requires-Dist: pandas; extra == "dropbox"
|
|
154
|
+
Requires-Dist: numpy; extra == "dropbox"
|
|
155
155
|
Provides-Extra: duckdb
|
|
156
156
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
157
|
-
Requires-Dist: numpy; extra == "duckdb"
|
|
158
157
|
Requires-Dist: pandas; extra == "duckdb"
|
|
158
|
+
Requires-Dist: numpy; extra == "duckdb"
|
|
159
159
|
Provides-Extra: elasticsearch
|
|
160
160
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
161
|
-
Requires-Dist: numpy; extra == "elasticsearch"
|
|
162
161
|
Requires-Dist: pandas; extra == "elasticsearch"
|
|
162
|
+
Requires-Dist: numpy; extra == "elasticsearch"
|
|
163
163
|
Provides-Extra: gcs
|
|
164
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
164
165
|
Requires-Dist: bs4; extra == "gcs"
|
|
165
166
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
166
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
167
|
-
Requires-Dist: numpy; extra == "gcs"
|
|
168
167
|
Requires-Dist: pandas; extra == "gcs"
|
|
168
|
+
Requires-Dist: numpy; extra == "gcs"
|
|
169
169
|
Provides-Extra: github
|
|
170
170
|
Requires-Dist: requests; extra == "github"
|
|
171
171
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
172
|
-
Requires-Dist: numpy; extra == "github"
|
|
173
172
|
Requires-Dist: pandas; extra == "github"
|
|
173
|
+
Requires-Dist: numpy; extra == "github"
|
|
174
174
|
Provides-Extra: gitlab
|
|
175
175
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
176
|
-
Requires-Dist: numpy; extra == "gitlab"
|
|
177
176
|
Requires-Dist: pandas; extra == "gitlab"
|
|
177
|
+
Requires-Dist: numpy; extra == "gitlab"
|
|
178
178
|
Provides-Extra: google-drive
|
|
179
179
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
180
|
-
Requires-Dist: numpy; extra == "google-drive"
|
|
181
180
|
Requires-Dist: pandas; extra == "google-drive"
|
|
181
|
+
Requires-Dist: numpy; extra == "google-drive"
|
|
182
182
|
Provides-Extra: hubspot
|
|
183
183
|
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
184
184
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
185
|
-
Requires-Dist: numpy; extra == "hubspot"
|
|
186
185
|
Requires-Dist: pandas; extra == "hubspot"
|
|
186
|
+
Requires-Dist: numpy; extra == "hubspot"
|
|
187
187
|
Provides-Extra: ibm-watsonx-s3
|
|
188
|
-
Requires-Dist: httpx; extra == "ibm-watsonx-s3"
|
|
189
|
-
Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
|
|
190
188
|
Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
|
|
191
189
|
Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
|
|
192
|
-
Requires-Dist:
|
|
190
|
+
Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
|
|
191
|
+
Requires-Dist: httpx; extra == "ibm-watsonx-s3"
|
|
193
192
|
Requires-Dist: pandas; extra == "ibm-watsonx-s3"
|
|
193
|
+
Requires-Dist: numpy; extra == "ibm-watsonx-s3"
|
|
194
194
|
Provides-Extra: jira
|
|
195
195
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
196
|
-
Requires-Dist: numpy; extra == "jira"
|
|
197
196
|
Requires-Dist: pandas; extra == "jira"
|
|
197
|
+
Requires-Dist: numpy; extra == "jira"
|
|
198
198
|
Provides-Extra: kafka
|
|
199
199
|
Requires-Dist: confluent-kafka; extra == "kafka"
|
|
200
|
-
Requires-Dist: numpy; extra == "kafka"
|
|
201
200
|
Requires-Dist: pandas; extra == "kafka"
|
|
201
|
+
Requires-Dist: numpy; extra == "kafka"
|
|
202
202
|
Provides-Extra: kdbai
|
|
203
203
|
Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
|
|
204
|
-
Requires-Dist: numpy; extra == "kdbai"
|
|
205
204
|
Requires-Dist: pandas; extra == "kdbai"
|
|
205
|
+
Requires-Dist: numpy; extra == "kdbai"
|
|
206
206
|
Provides-Extra: lancedb
|
|
207
207
|
Requires-Dist: lancedb; extra == "lancedb"
|
|
208
|
-
Requires-Dist: numpy; extra == "lancedb"
|
|
209
208
|
Requires-Dist: pandas; extra == "lancedb"
|
|
209
|
+
Requires-Dist: numpy; extra == "lancedb"
|
|
210
210
|
Provides-Extra: milvus
|
|
211
211
|
Requires-Dist: pymilvus; extra == "milvus"
|
|
212
|
-
Requires-Dist: numpy; extra == "milvus"
|
|
213
212
|
Requires-Dist: pandas; extra == "milvus"
|
|
213
|
+
Requires-Dist: numpy; extra == "milvus"
|
|
214
214
|
Provides-Extra: mongodb
|
|
215
215
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
216
|
-
Requires-Dist: numpy; extra == "mongodb"
|
|
217
216
|
Requires-Dist: pandas; extra == "mongodb"
|
|
217
|
+
Requires-Dist: numpy; extra == "mongodb"
|
|
218
218
|
Provides-Extra: neo4j
|
|
219
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
220
|
-
Requires-Dist: cymple; extra == "neo4j"
|
|
221
219
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
222
|
-
Requires-Dist:
|
|
220
|
+
Requires-Dist: cymple; extra == "neo4j"
|
|
221
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
223
222
|
Requires-Dist: pandas; extra == "neo4j"
|
|
223
|
+
Requires-Dist: numpy; extra == "neo4j"
|
|
224
224
|
Provides-Extra: notion
|
|
225
|
-
Requires-Dist:
|
|
225
|
+
Requires-Dist: backoff; extra == "notion"
|
|
226
226
|
Requires-Dist: notion-client; extra == "notion"
|
|
227
|
+
Requires-Dist: httpx; extra == "notion"
|
|
227
228
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
228
|
-
Requires-Dist: backoff; extra == "notion"
|
|
229
|
-
Requires-Dist: numpy; extra == "notion"
|
|
230
229
|
Requires-Dist: pandas; extra == "notion"
|
|
230
|
+
Requires-Dist: numpy; extra == "notion"
|
|
231
231
|
Provides-Extra: onedrive
|
|
232
232
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
233
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
234
233
|
Requires-Dist: msal; extra == "onedrive"
|
|
235
|
-
Requires-Dist:
|
|
234
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
236
235
|
Requires-Dist: pandas; extra == "onedrive"
|
|
236
|
+
Requires-Dist: numpy; extra == "onedrive"
|
|
237
237
|
Provides-Extra: opensearch
|
|
238
238
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
239
|
-
Requires-Dist: numpy; extra == "opensearch"
|
|
240
239
|
Requires-Dist: pandas; extra == "opensearch"
|
|
240
|
+
Requires-Dist: numpy; extra == "opensearch"
|
|
241
241
|
Provides-Extra: outlook
|
|
242
242
|
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
243
243
|
Requires-Dist: msal; extra == "outlook"
|
|
244
|
-
Requires-Dist: numpy; extra == "outlook"
|
|
245
244
|
Requires-Dist: pandas; extra == "outlook"
|
|
245
|
+
Requires-Dist: numpy; extra == "outlook"
|
|
246
246
|
Provides-Extra: pinecone
|
|
247
247
|
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
|
|
248
|
-
Requires-Dist: numpy; extra == "pinecone"
|
|
249
248
|
Requires-Dist: pandas; extra == "pinecone"
|
|
249
|
+
Requires-Dist: numpy; extra == "pinecone"
|
|
250
250
|
Provides-Extra: postgres
|
|
251
251
|
Requires-Dist: psycopg2-binary; extra == "postgres"
|
|
252
|
-
Requires-Dist: numpy; extra == "postgres"
|
|
253
252
|
Requires-Dist: pandas; extra == "postgres"
|
|
253
|
+
Requires-Dist: numpy; extra == "postgres"
|
|
254
254
|
Provides-Extra: qdrant
|
|
255
255
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
|
256
|
-
Requires-Dist: numpy; extra == "qdrant"
|
|
257
256
|
Requires-Dist: pandas; extra == "qdrant"
|
|
257
|
+
Requires-Dist: numpy; extra == "qdrant"
|
|
258
258
|
Provides-Extra: reddit
|
|
259
259
|
Requires-Dist: praw; extra == "reddit"
|
|
260
|
-
Requires-Dist: numpy; extra == "reddit"
|
|
261
260
|
Requires-Dist: pandas; extra == "reddit"
|
|
261
|
+
Requires-Dist: numpy; extra == "reddit"
|
|
262
262
|
Provides-Extra: redis
|
|
263
263
|
Requires-Dist: redis; extra == "redis"
|
|
264
|
-
Requires-Dist: numpy; extra == "redis"
|
|
265
264
|
Requires-Dist: pandas; extra == "redis"
|
|
265
|
+
Requires-Dist: numpy; extra == "redis"
|
|
266
266
|
Provides-Extra: s3
|
|
267
|
-
Requires-Dist: s3fs; extra == "s3"
|
|
268
267
|
Requires-Dist: fsspec; extra == "s3"
|
|
269
|
-
Requires-Dist:
|
|
268
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
270
269
|
Requires-Dist: pandas; extra == "s3"
|
|
270
|
+
Requires-Dist: numpy; extra == "s3"
|
|
271
271
|
Provides-Extra: sharepoint
|
|
272
272
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
273
273
|
Requires-Dist: msal; extra == "sharepoint"
|
|
274
|
-
Requires-Dist: numpy; extra == "sharepoint"
|
|
275
274
|
Requires-Dist: pandas; extra == "sharepoint"
|
|
275
|
+
Requires-Dist: numpy; extra == "sharepoint"
|
|
276
276
|
Provides-Extra: salesforce
|
|
277
277
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
278
|
-
Requires-Dist: numpy; extra == "salesforce"
|
|
279
278
|
Requires-Dist: pandas; extra == "salesforce"
|
|
279
|
+
Requires-Dist: numpy; extra == "salesforce"
|
|
280
280
|
Provides-Extra: sftp
|
|
281
281
|
Requires-Dist: paramiko; extra == "sftp"
|
|
282
282
|
Requires-Dist: fsspec; extra == "sftp"
|
|
283
|
-
Requires-Dist: numpy; extra == "sftp"
|
|
284
283
|
Requires-Dist: pandas; extra == "sftp"
|
|
284
|
+
Requires-Dist: numpy; extra == "sftp"
|
|
285
285
|
Provides-Extra: slack
|
|
286
286
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
287
|
-
Requires-Dist: numpy; extra == "slack"
|
|
288
287
|
Requires-Dist: pandas; extra == "slack"
|
|
288
|
+
Requires-Dist: numpy; extra == "slack"
|
|
289
289
|
Provides-Extra: snowflake
|
|
290
290
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
291
291
|
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
292
|
-
Requires-Dist: numpy; extra == "snowflake"
|
|
293
292
|
Requires-Dist: pandas; extra == "snowflake"
|
|
293
|
+
Requires-Dist: numpy; extra == "snowflake"
|
|
294
294
|
Provides-Extra: wikipedia
|
|
295
295
|
Requires-Dist: wikipedia; extra == "wikipedia"
|
|
296
|
-
Requires-Dist: numpy; extra == "wikipedia"
|
|
297
296
|
Requires-Dist: pandas; extra == "wikipedia"
|
|
297
|
+
Requires-Dist: numpy; extra == "wikipedia"
|
|
298
298
|
Provides-Extra: weaviate
|
|
299
299
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
300
|
-
Requires-Dist: numpy; extra == "weaviate"
|
|
301
300
|
Requires-Dist: pandas; extra == "weaviate"
|
|
301
|
+
Requires-Dist: numpy; extra == "weaviate"
|
|
302
302
|
Provides-Extra: databricks-volumes
|
|
303
303
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
304
|
-
Requires-Dist: numpy; extra == "databricks-volumes"
|
|
305
304
|
Requires-Dist: pandas; extra == "databricks-volumes"
|
|
305
|
+
Requires-Dist: numpy; extra == "databricks-volumes"
|
|
306
306
|
Provides-Extra: databricks-delta-tables
|
|
307
307
|
Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
308
|
-
Requires-Dist: numpy; extra == "databricks-delta-tables"
|
|
309
308
|
Requires-Dist: pandas; extra == "databricks-delta-tables"
|
|
309
|
+
Requires-Dist: numpy; extra == "databricks-delta-tables"
|
|
310
310
|
Provides-Extra: singlestore
|
|
311
311
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
312
|
-
Requires-Dist: numpy; extra == "singlestore"
|
|
313
312
|
Requires-Dist: pandas; extra == "singlestore"
|
|
313
|
+
Requires-Dist: numpy; extra == "singlestore"
|
|
314
314
|
Provides-Extra: vectara
|
|
315
315
|
Requires-Dist: requests; extra == "vectara"
|
|
316
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
317
316
|
Requires-Dist: httpx; extra == "vectara"
|
|
318
|
-
Requires-Dist:
|
|
317
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
319
318
|
Requires-Dist: pandas; extra == "vectara"
|
|
319
|
+
Requires-Dist: numpy; extra == "vectara"
|
|
320
320
|
Provides-Extra: vastdb
|
|
321
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
322
|
-
Requires-Dist: ibis; extra == "vastdb"
|
|
323
321
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
324
|
-
Requires-Dist:
|
|
322
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
323
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
325
324
|
Requires-Dist: pandas; extra == "vastdb"
|
|
325
|
+
Requires-Dist: numpy; extra == "vastdb"
|
|
326
326
|
Provides-Extra: zendesk
|
|
327
327
|
Requires-Dist: httpx; extra == "zendesk"
|
|
328
|
-
Requires-Dist: aiofiles; extra == "zendesk"
|
|
329
328
|
Requires-Dist: bs4; extra == "zendesk"
|
|
330
|
-
Requires-Dist:
|
|
329
|
+
Requires-Dist: aiofiles; extra == "zendesk"
|
|
331
330
|
Requires-Dist: pandas; extra == "zendesk"
|
|
331
|
+
Requires-Dist: numpy; extra == "zendesk"
|
|
332
332
|
Provides-Extra: embed-huggingface
|
|
333
333
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
334
|
-
Requires-Dist: numpy; extra == "embed-huggingface"
|
|
335
334
|
Requires-Dist: pandas; extra == "embed-huggingface"
|
|
335
|
+
Requires-Dist: numpy; extra == "embed-huggingface"
|
|
336
336
|
Provides-Extra: embed-octoai
|
|
337
337
|
Requires-Dist: openai; extra == "embed-octoai"
|
|
338
338
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
339
|
-
Requires-Dist: numpy; extra == "embed-octoai"
|
|
340
339
|
Requires-Dist: pandas; extra == "embed-octoai"
|
|
340
|
+
Requires-Dist: numpy; extra == "embed-octoai"
|
|
341
341
|
Provides-Extra: embed-vertexai
|
|
342
342
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
343
|
-
Requires-Dist: numpy; extra == "embed-vertexai"
|
|
344
343
|
Requires-Dist: pandas; extra == "embed-vertexai"
|
|
344
|
+
Requires-Dist: numpy; extra == "embed-vertexai"
|
|
345
345
|
Provides-Extra: embed-voyageai
|
|
346
346
|
Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
347
|
-
Requires-Dist: numpy; extra == "embed-voyageai"
|
|
348
347
|
Requires-Dist: pandas; extra == "embed-voyageai"
|
|
348
|
+
Requires-Dist: numpy; extra == "embed-voyageai"
|
|
349
349
|
Provides-Extra: embed-mixedbreadai
|
|
350
350
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
351
|
-
Requires-Dist: numpy; extra == "embed-mixedbreadai"
|
|
352
351
|
Requires-Dist: pandas; extra == "embed-mixedbreadai"
|
|
352
|
+
Requires-Dist: numpy; extra == "embed-mixedbreadai"
|
|
353
353
|
Provides-Extra: openai
|
|
354
354
|
Requires-Dist: openai; extra == "openai"
|
|
355
355
|
Requires-Dist: tiktoken; extra == "openai"
|
|
356
|
-
Requires-Dist: numpy; extra == "openai"
|
|
357
356
|
Requires-Dist: pandas; extra == "openai"
|
|
357
|
+
Requires-Dist: numpy; extra == "openai"
|
|
358
358
|
Provides-Extra: bedrock
|
|
359
359
|
Requires-Dist: boto3; extra == "bedrock"
|
|
360
360
|
Requires-Dist: aioboto3; extra == "bedrock"
|
|
361
|
-
Requires-Dist: numpy; extra == "bedrock"
|
|
362
361
|
Requires-Dist: pandas; extra == "bedrock"
|
|
362
|
+
Requires-Dist: numpy; extra == "bedrock"
|
|
363
363
|
Provides-Extra: togetherai
|
|
364
364
|
Requires-Dist: together; extra == "togetherai"
|
|
365
|
-
Requires-Dist: numpy; extra == "togetherai"
|
|
366
365
|
Requires-Dist: pandas; extra == "togetherai"
|
|
366
|
+
Requires-Dist: numpy; extra == "togetherai"
|
|
367
367
|
Dynamic: author
|
|
368
368
|
Dynamic: author-email
|
|
369
369
|
Dynamic: classifier
|
|
@@ -91,8 +91,6 @@ test/unit/v2/chunkers/test_chunkers.py,sha256=HSr3_lsoMw1nkDhkjO0-NOTEomRdR9oxCr
|
|
|
91
91
|
test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
92
|
test/unit/v2/connectors/test_confluence.py,sha256=lN6nnU5qOtmsjIGcz65roepm76w4vPF7AmSzi9vqV78,1919
|
|
93
93
|
test/unit/v2/connectors/test_jira.py,sha256=XEBBDSdNZWUVO5JbpiSsjazJYmbLsgXUOW-APqPRKLg,12113
|
|
94
|
-
test/unit/v2/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
-
test/unit/v2/connectors/databricks/test_volumes_table.py,sha256=-R_EJHqv1BseGRK9VRAZhF-2EXA64LAlhycoyIu556U,1078
|
|
96
94
|
test/unit/v2/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
95
|
test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=WKpDKvEGalh8LYRqN9xA7CfMPOPHo_VcZbnCXdkVjho,14513
|
|
98
96
|
test/unit/v2/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -113,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
113
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
115
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
116
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=UDy7drjkPUljex5sEiDR3ZALQNnlcrCXwJShdKZ37Ek,42
|
|
117
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
118
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
119
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -454,14 +452,15 @@ unstructured_ingest/v2/processes/connectors/slack.py,sha256=vbBVCYEd741-n2v6eAXL
|
|
|
454
452
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
|
|
455
453
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=KUqgZ6D2KUOrW596ms-EekvQYDh-fXqBTa7KG-leXoo,12301
|
|
456
454
|
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
455
|
+
unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
|
|
457
456
|
unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
458
457
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
|
|
459
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=
|
|
458
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=EghKdkt4nGacGxulSpjhToHOl5BRLbb3xNZpJzpWNX8,8002
|
|
460
459
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=h6qDxQhWlT7H4K1CEfKag1stTiD1o97VckJZERsofqU,2970
|
|
461
460
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=gjICJJwhDHBLt_L-LrMlvJ3DL1DYtwFpyMLb_zYvOIg,3755
|
|
462
461
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=Uss3XPPaq1AsqJOEy4RJgBJw2-bTjrXH2PgtVNYd2w0,3006
|
|
463
462
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=g1qYnIrML4TjN7rmC0MGrD5JzAprb6SymBHlEdOumz0,3113
|
|
464
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=
|
|
463
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=LiSb66039idaRtMnTuHjR5ZqvdmmIu3ByUgFQ1a3iZQ,8264
|
|
465
464
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
466
465
|
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=VCoQ3h289BO4A2kJKZXUVB0QOcaQif-HeRgg-xXzn10,2976
|
|
467
466
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=DM4pygQAnP-dtuFEFAVeBfGt0pzrfkltteCai0GKnG0,4439
|
|
@@ -582,9 +581,9 @@ unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
|
582
581
|
unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=_I3OMdpUElQdIwVs7W9ORU1kncNaZ_nr6lbxeKE8uaU,1014
|
|
583
582
|
unstructured_ingest/v2/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
584
583
|
unstructured_ingest/v2/types/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
|
|
585
|
-
unstructured_ingest-0.6.
|
|
586
|
-
unstructured_ingest-0.6.
|
|
587
|
-
unstructured_ingest-0.6.
|
|
588
|
-
unstructured_ingest-0.6.
|
|
589
|
-
unstructured_ingest-0.6.
|
|
590
|
-
unstructured_ingest-0.6.
|
|
584
|
+
unstructured_ingest-0.6.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
585
|
+
unstructured_ingest-0.6.2.dist-info/METADATA,sha256=yUMpJD0UXDhUG1cIIpHkjn-VU2AScEaA12wLmISmG-A,14998
|
|
586
|
+
unstructured_ingest-0.6.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
587
|
+
unstructured_ingest-0.6.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
588
|
+
unstructured_ingest-0.6.2.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
589
|
+
unstructured_ingest-0.6.2.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
from pytest_mock import MockerFixture
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.v2.processes.connectors.databricks.volumes_table import (
|
|
7
|
-
DatabricksVolumeDeltaTableStager,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pytest.fixture
|
|
12
|
-
def stager():
|
|
13
|
-
return DatabricksVolumeDeltaTableStager()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@pytest.mark.parametrize(
|
|
17
|
-
("output_path", "called_output_path"),
|
|
18
|
-
[
|
|
19
|
-
(
|
|
20
|
-
Path("/fake/path/output"),
|
|
21
|
-
Path("/fake/path/output.json"),
|
|
22
|
-
),
|
|
23
|
-
(
|
|
24
|
-
Path("/fake/path/output.ndjson"),
|
|
25
|
-
Path("/fake/path/output.json"),
|
|
26
|
-
),
|
|
27
|
-
],
|
|
28
|
-
)
|
|
29
|
-
def test_write_output(
|
|
30
|
-
mocker: MockerFixture,
|
|
31
|
-
stager: DatabricksVolumeDeltaTableStager,
|
|
32
|
-
output_path: Path,
|
|
33
|
-
called_output_path: Path,
|
|
34
|
-
):
|
|
35
|
-
data = [{"key1": "value1", "key2": "value2"}]
|
|
36
|
-
|
|
37
|
-
mock_get_data = mocker.patch(
|
|
38
|
-
"unstructured_ingest.v2.processes.connectors.databricks.volumes_table.write_data",
|
|
39
|
-
return_value=None,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
stager.write_output(output_path, data)
|
|
43
|
-
|
|
44
|
-
mock_get_data.assert_called_once_with(path=called_output_path, data=data, indent=None)
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.6.1.dist-info → unstructured_ingest-0.6.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|