unstructured-ingest 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.6.1" # pragma: no cover
1
+ __version__ = "0.6.2" # pragma: no cover
@@ -0,0 +1,10 @@
1
+ CREATE TABLE elements (
2
+ id STRING NOT NULL PRIMARY KEY,
3
+ record_id STRING NOT NULL,
4
+ element_id STRING NOT NULL,
5
+ text STRING,
6
+ embeddings ARRAY<FLOAT>,
7
+ type STRING,
8
+ metadata VARIANT
9
+ );
10
+
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import TYPE_CHECKING, Any, Generator, Optional
6
6
  from uuid import NAMESPACE_DNS, uuid5
7
7
 
8
- from pydantic import BaseModel, Field
8
+ from pydantic import BaseModel, Field, Secret
9
9
 
10
10
  from unstructured_ingest.utils.dep_check import requires_dependencies
11
11
  from unstructured_ingest.v2.errors import (
@@ -61,12 +61,14 @@ class DatabricksVolumesAccessConfig(AccessConfig):
61
61
 
62
62
 
63
63
  class DatabricksVolumesConnectionConfig(ConnectionConfig, ABC):
64
+ access_config: Secret[DatabricksVolumesAccessConfig]
64
65
  host: Optional[str] = Field(
65
66
  default=None,
66
67
  description="The Databricks host URL for either the "
67
68
  "Databricks workspace endpoint or the "
68
69
  "Databricks accounts endpoint.",
69
70
  )
71
+ user_agent: str = "unstructuredio_oss"
70
72
 
71
73
  def wrap_error(self, e: Exception) -> Exception:
72
74
  from databricks.sdk.errors.base import DatabricksError
@@ -94,11 +96,14 @@ class DatabricksVolumesConnectionConfig(ConnectionConfig, ABC):
94
96
  @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
95
97
  def get_client(self) -> "WorkspaceClient":
96
98
  from databricks.sdk import WorkspaceClient
99
+ from databricks.sdk.core import Config
97
100
 
98
- return WorkspaceClient(
101
+ config = Config(
99
102
  host=self.host,
100
103
  **self.access_config.get_secret_value().model_dump(),
101
- )
104
+ ).with_user_agent_extra("PyDatabricksSdk", self.user_agent)
105
+
106
+ return WorkspaceClient(config=config)
102
107
 
103
108
 
104
109
  class DatabricksVolumesIndexerConfig(IndexerConfig, DatabricksPathMixin):
@@ -1,14 +1,20 @@
1
+ import json
1
2
  import os
2
- import tempfile
3
3
  from contextlib import contextmanager
4
- from dataclasses import dataclass
4
+ from dataclasses import dataclass, field
5
5
  from pathlib import Path
6
6
  from typing import TYPE_CHECKING, Any, Generator, Optional
7
7
 
8
8
  from pydantic import Field
9
9
 
10
- from unstructured_ingest.utils.data_prep import get_data_df, write_data
11
- from unstructured_ingest.v2.interfaces import Uploader, UploaderConfig
10
+ from unstructured_ingest.utils.data_prep import get_json_data, write_data
11
+ from unstructured_ingest.v2.constants import RECORD_ID_LABEL
12
+ from unstructured_ingest.v2.interfaces import (
13
+ Uploader,
14
+ UploaderConfig,
15
+ UploadStager,
16
+ UploadStagerConfig,
17
+ )
12
18
  from unstructured_ingest.v2.logger import logger
13
19
  from unstructured_ingest.v2.processes.connector_registry import (
14
20
  DestinationRegistryEntry,
@@ -16,28 +22,50 @@ from unstructured_ingest.v2.processes.connector_registry import (
16
22
  from unstructured_ingest.v2.processes.connectors.databricks.volumes import DatabricksPathMixin
17
23
  from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
18
24
  DatabricksDeltaTablesConnectionConfig,
19
- DatabricksDeltaTablesUploadStager,
20
25
  DatabricksDeltaTablesUploadStagerConfig,
21
26
  )
22
27
  from unstructured_ingest.v2.types.file_data import FileData
28
+ from unstructured_ingest.v2.utils import get_enhanced_element_id
23
29
 
24
30
  CONNECTOR_TYPE = "databricks_volume_delta_tables"
25
31
 
26
32
  if TYPE_CHECKING:
27
- from pandas import DataFrame
33
+ pass
28
34
 
29
35
 
30
36
  class DatabricksVolumeDeltaTableUploaderConfig(UploaderConfig, DatabricksPathMixin):
31
37
  database: str = Field(description="Database name", default="default")
32
- table_name: str = Field(description="Table name")
38
+ table_name: Optional[str] = Field(description="Table name", default=None)
39
+
40
+
41
+ class DatabricksVolumeDeltaTableStagerConfig(UploadStagerConfig):
42
+ pass
33
43
 
34
44
 
35
45
  @dataclass
36
- class DatabricksVolumeDeltaTableStager(DatabricksDeltaTablesUploadStager):
37
- def write_output(self, output_path: Path, data: list[dict]) -> Path:
46
+ class DatabricksVolumeDeltaTableStager(UploadStager):
47
+ upload_stager_config: DatabricksVolumeDeltaTableStagerConfig = field(
48
+ default_factory=DatabricksVolumeDeltaTableStagerConfig
49
+ )
50
+
51
+ def run(
52
+ self,
53
+ elements_filepath: Path,
54
+ output_dir: Path,
55
+ output_filename: str,
56
+ file_data: FileData,
57
+ **kwargs: Any,
58
+ ) -> Path:
38
59
  # To avoid new line issues when migrating from volumes into delta tables, omit indenting
39
60
  # and always write it as a json file
61
+ output_dir.mkdir(exist_ok=True, parents=True)
62
+ output_path = output_dir / output_filename
40
63
  final_output_path = output_path.with_suffix(".json")
64
+ data = get_json_data(path=elements_filepath)
65
+ for element in data:
66
+ element["id"] = get_enhanced_element_id(element_dict=element, file_data=file_data)
67
+ element[RECORD_ID_LABEL] = file_data.identifier
68
+ element["metadata"] = json.dumps(element.get("metadata", {}))
41
69
  write_data(path=final_output_path, data=data, indent=None)
42
70
  return final_output_path
43
71
 
@@ -49,6 +77,29 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
49
77
  connector_type: str = CONNECTOR_TYPE
50
78
  _columns: Optional[dict[str, str]] = None
51
79
 
80
+ def init(self, **kwargs: Any) -> None:
81
+ self.create_destination(**kwargs)
82
+
83
+ def create_destination(
84
+ self, destination_name: str = "unstructuredautocreated", **kwargs: Any
85
+ ) -> bool:
86
+ table_name = self.upload_config.table_name or destination_name
87
+ self.upload_config.table_name = table_name
88
+ connectors_dir = Path(__file__).parents[1]
89
+ collection_config_file = connectors_dir / "assets" / "databricks_delta_table_schema.sql"
90
+ with self.get_cursor() as cursor:
91
+ cursor.execute("SHOW TABLES")
92
+ table_names = [r[1] for r in cursor.fetchall()]
93
+ if table_name in table_names:
94
+ return False
95
+ with collection_config_file.open() as schema_file:
96
+ data_lines = schema_file.readlines()
97
+ data_lines[0] = data_lines[0].replace("elements", table_name)
98
+ destination_schema = "".join([line.strip() for line in data_lines])
99
+ logger.info(f"creating table {table_name} for user")
100
+ cursor.execute(destination_schema)
101
+ return True
102
+
52
103
  def precheck(self) -> None:
53
104
  with self.connection_config.get_cursor() as cursor:
54
105
  cursor.execute("SHOW CATALOGS")
@@ -68,14 +119,6 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
68
119
  self.upload_config.database, ", ".join(databases)
69
120
  )
70
121
  )
71
- cursor.execute(f"SHOW TABLES IN {self.upload_config.database}")
72
- table_names = [r[1] for r in cursor.fetchall()]
73
- if self.upload_config.table_name not in table_names:
74
- raise ValueError(
75
- "Table {} not found in {}".format(
76
- self.upload_config.table_name, ", ".join(table_names)
77
- )
78
- )
79
122
 
80
123
  def get_output_path(self, file_data: FileData, suffix: str = ".json") -> str:
81
124
  filename = Path(file_data.source_identifiers.filename)
@@ -98,51 +141,42 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
98
141
  self._columns = {desc[0]: desc[1] for desc in cursor.description}
99
142
  return self._columns
100
143
 
101
- def _fit_to_schema(self, df: "DataFrame", add_missing_columns: bool = True) -> "DataFrame":
102
- import pandas as pd
103
-
104
- table_columns = self.get_table_columns()
105
- columns = set(df.columns)
106
- schema_fields = set(table_columns.keys())
107
- columns_to_drop = columns - schema_fields
108
- missing_columns = schema_fields - columns
109
-
110
- if columns_to_drop:
111
- logger.info(
112
- "Following columns will be dropped to match the table's schema: "
113
- f"{', '.join(columns_to_drop)}"
114
- )
115
- if missing_columns and add_missing_columns:
116
- logger.info(
117
- "Following null filled columns will be added to match the table's schema:"
118
- f" {', '.join(missing_columns)} "
144
+ def can_delete(self) -> bool:
145
+ existing_columns = self.get_table_columns()
146
+ return RECORD_ID_LABEL in existing_columns
147
+
148
+ def delete_previous_content(self, file_data: FileData) -> None:
149
+ logger.debug(
150
+ f"deleting any content with metadata "
151
+ f"{RECORD_ID_LABEL}={file_data.identifier} "
152
+ f"from delta table: {self.upload_config.table_name}"
153
+ )
154
+ with self.get_cursor() as cursor:
155
+ cursor.execute(
156
+ f"DELETE FROM {self.upload_config.table_name} WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
119
157
  )
120
-
121
- df = df.drop(columns=columns_to_drop)
122
-
123
- if add_missing_columns:
124
- for column in missing_columns:
125
- df[column] = pd.Series()
126
- return df
158
+ results = cursor.fetchall()
159
+ deleted_rows = results[0][0]
160
+ logger.debug(f"deleted {deleted_rows} rows from table {self.upload_config.table_name}")
127
161
 
128
162
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
129
- with tempfile.TemporaryDirectory() as temp_dir:
130
- df = get_data_df()
131
- df = self._fit_to_schema(df=df)
132
- temp_path = Path(temp_dir) / path.name
133
- df.to_json(temp_path, orient="records", lines=False)
134
- with self.get_cursor(staging_allowed_local_path=temp_dir) as cursor:
135
- catalog_path = self.get_output_path(file_data=file_data)
136
- logger.debug(f"uploading {path.as_posix()} to {catalog_path}")
137
- cursor.execute(f"PUT '{temp_path.as_posix()}' INTO '{catalog_path}' OVERWRITE")
138
- logger.debug(
139
- f"migrating content from {catalog_path} to "
140
- f"table {self.upload_config.table_name}"
141
- )
142
- columns = list(df.columns)
143
- column_str = ", ".join(columns)
144
- sql_statment = f"INSERT INTO `{self.upload_config.table_name}` ({column_str}) SELECT {column_str} FROM json.`{catalog_path}`" # noqa: E501
145
- cursor.execute(sql_statment)
163
+ if self.can_delete():
164
+ self.delete_previous_content(file_data=file_data)
165
+ with self.get_cursor(staging_allowed_local_path=path.parent.as_posix()) as cursor:
166
+ catalog_path = self.get_output_path(file_data=file_data)
167
+ logger.debug(f"uploading {path.as_posix()} to {catalog_path}")
168
+ cursor.execute(f"PUT '{path.as_posix()}' INTO '{catalog_path}' OVERWRITE")
169
+ logger.debug(
170
+ f"migrating content from {catalog_path} to "
171
+ f"table {self.upload_config.table_name}"
172
+ )
173
+ data = get_json_data(path=path)
174
+ columns = data[0].keys()
175
+ select_columns = ["PARSE_JSON(metadata)" if c == "metadata" else c for c in columns]
176
+ column_str = ", ".join(columns)
177
+ select_column_str = ", ".join(select_columns)
178
+ sql_statment = f"INSERT INTO `{self.upload_config.table_name}` ({column_str}) SELECT {select_column_str} FROM json.`{catalog_path}`" # noqa: E501
179
+ cursor.execute(sql_statment)
146
180
 
147
181
 
148
182
  databricks_volumes_delta_tables_destination_entry = DestinationRegistryEntry(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -23,347 +23,347 @@ Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: click
26
- Requires-Dist: tqdm
27
- Requires-Dist: opentelemetry-sdk
28
26
  Requires-Dist: python-dateutil
29
27
  Requires-Dist: dataclasses_json
30
28
  Requires-Dist: pydantic>=2.7
31
- Requires-Dist: numpy
29
+ Requires-Dist: tqdm
30
+ Requires-Dist: opentelemetry-sdk
32
31
  Requires-Dist: pandas
32
+ Requires-Dist: numpy
33
33
  Provides-Extra: remote
34
34
  Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
35
- Requires-Dist: numpy; extra == "remote"
36
35
  Requires-Dist: pandas; extra == "remote"
36
+ Requires-Dist: numpy; extra == "remote"
37
37
  Provides-Extra: csv
38
38
  Requires-Dist: unstructured[tsv]; extra == "csv"
39
- Requires-Dist: numpy; extra == "csv"
40
39
  Requires-Dist: pandas; extra == "csv"
40
+ Requires-Dist: numpy; extra == "csv"
41
41
  Provides-Extra: doc
42
42
  Requires-Dist: unstructured[docx]; extra == "doc"
43
- Requires-Dist: numpy; extra == "doc"
44
43
  Requires-Dist: pandas; extra == "doc"
44
+ Requires-Dist: numpy; extra == "doc"
45
45
  Provides-Extra: docx
46
46
  Requires-Dist: unstructured[docx]; extra == "docx"
47
- Requires-Dist: numpy; extra == "docx"
48
47
  Requires-Dist: pandas; extra == "docx"
48
+ Requires-Dist: numpy; extra == "docx"
49
49
  Provides-Extra: epub
50
50
  Requires-Dist: unstructured[epub]; extra == "epub"
51
- Requires-Dist: numpy; extra == "epub"
52
51
  Requires-Dist: pandas; extra == "epub"
52
+ Requires-Dist: numpy; extra == "epub"
53
53
  Provides-Extra: md
54
54
  Requires-Dist: unstructured[md]; extra == "md"
55
- Requires-Dist: numpy; extra == "md"
56
55
  Requires-Dist: pandas; extra == "md"
56
+ Requires-Dist: numpy; extra == "md"
57
57
  Provides-Extra: msg
58
58
  Requires-Dist: unstructured[msg]; extra == "msg"
59
- Requires-Dist: numpy; extra == "msg"
60
59
  Requires-Dist: pandas; extra == "msg"
60
+ Requires-Dist: numpy; extra == "msg"
61
61
  Provides-Extra: odt
62
62
  Requires-Dist: unstructured[odt]; extra == "odt"
63
- Requires-Dist: numpy; extra == "odt"
64
63
  Requires-Dist: pandas; extra == "odt"
64
+ Requires-Dist: numpy; extra == "odt"
65
65
  Provides-Extra: org
66
66
  Requires-Dist: unstructured[org]; extra == "org"
67
- Requires-Dist: numpy; extra == "org"
68
67
  Requires-Dist: pandas; extra == "org"
68
+ Requires-Dist: numpy; extra == "org"
69
69
  Provides-Extra: pdf
70
70
  Requires-Dist: unstructured[pdf]; extra == "pdf"
71
- Requires-Dist: numpy; extra == "pdf"
72
71
  Requires-Dist: pandas; extra == "pdf"
72
+ Requires-Dist: numpy; extra == "pdf"
73
73
  Provides-Extra: ppt
74
74
  Requires-Dist: unstructured[pptx]; extra == "ppt"
75
- Requires-Dist: numpy; extra == "ppt"
76
75
  Requires-Dist: pandas; extra == "ppt"
76
+ Requires-Dist: numpy; extra == "ppt"
77
77
  Provides-Extra: pptx
78
78
  Requires-Dist: unstructured[pptx]; extra == "pptx"
79
- Requires-Dist: numpy; extra == "pptx"
80
79
  Requires-Dist: pandas; extra == "pptx"
80
+ Requires-Dist: numpy; extra == "pptx"
81
81
  Provides-Extra: rtf
82
82
  Requires-Dist: unstructured[rtf]; extra == "rtf"
83
- Requires-Dist: numpy; extra == "rtf"
84
83
  Requires-Dist: pandas; extra == "rtf"
84
+ Requires-Dist: numpy; extra == "rtf"
85
85
  Provides-Extra: rst
86
86
  Requires-Dist: unstructured[rst]; extra == "rst"
87
- Requires-Dist: numpy; extra == "rst"
88
87
  Requires-Dist: pandas; extra == "rst"
88
+ Requires-Dist: numpy; extra == "rst"
89
89
  Provides-Extra: tsv
90
90
  Requires-Dist: unstructured[tsv]; extra == "tsv"
91
- Requires-Dist: numpy; extra == "tsv"
92
91
  Requires-Dist: pandas; extra == "tsv"
92
+ Requires-Dist: numpy; extra == "tsv"
93
93
  Provides-Extra: xlsx
94
94
  Requires-Dist: unstructured[xlsx]; extra == "xlsx"
95
- Requires-Dist: numpy; extra == "xlsx"
96
95
  Requires-Dist: pandas; extra == "xlsx"
96
+ Requires-Dist: numpy; extra == "xlsx"
97
97
  Provides-Extra: airtable
98
98
  Requires-Dist: pyairtable; extra == "airtable"
99
- Requires-Dist: numpy; extra == "airtable"
100
99
  Requires-Dist: pandas; extra == "airtable"
100
+ Requires-Dist: numpy; extra == "airtable"
101
101
  Provides-Extra: astradb
102
102
  Requires-Dist: astrapy; extra == "astradb"
103
- Requires-Dist: numpy; extra == "astradb"
104
103
  Requires-Dist: pandas; extra == "astradb"
104
+ Requires-Dist: numpy; extra == "astradb"
105
105
  Provides-Extra: azure
106
- Requires-Dist: adlfs; extra == "azure"
107
106
  Requires-Dist: fsspec; extra == "azure"
108
- Requires-Dist: numpy; extra == "azure"
107
+ Requires-Dist: adlfs; extra == "azure"
109
108
  Requires-Dist: pandas; extra == "azure"
109
+ Requires-Dist: numpy; extra == "azure"
110
110
  Provides-Extra: azure-ai-search
111
111
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
112
- Requires-Dist: numpy; extra == "azure-ai-search"
113
112
  Requires-Dist: pandas; extra == "azure-ai-search"
113
+ Requires-Dist: numpy; extra == "azure-ai-search"
114
114
  Provides-Extra: biomed
115
115
  Requires-Dist: requests; extra == "biomed"
116
116
  Requires-Dist: bs4; extra == "biomed"
117
- Requires-Dist: numpy; extra == "biomed"
118
117
  Requires-Dist: pandas; extra == "biomed"
118
+ Requires-Dist: numpy; extra == "biomed"
119
119
  Provides-Extra: box
120
120
  Requires-Dist: fsspec; extra == "box"
121
121
  Requires-Dist: boxfs; extra == "box"
122
- Requires-Dist: numpy; extra == "box"
123
122
  Requires-Dist: pandas; extra == "box"
123
+ Requires-Dist: numpy; extra == "box"
124
124
  Provides-Extra: chroma
125
125
  Requires-Dist: chromadb; extra == "chroma"
126
- Requires-Dist: numpy; extra == "chroma"
127
126
  Requires-Dist: pandas; extra == "chroma"
127
+ Requires-Dist: numpy; extra == "chroma"
128
128
  Provides-Extra: clarifai
129
129
  Requires-Dist: clarifai; extra == "clarifai"
130
- Requires-Dist: numpy; extra == "clarifai"
131
130
  Requires-Dist: pandas; extra == "clarifai"
131
+ Requires-Dist: numpy; extra == "clarifai"
132
132
  Provides-Extra: confluence
133
- Requires-Dist: requests; extra == "confluence"
134
133
  Requires-Dist: atlassian-python-api; extra == "confluence"
135
- Requires-Dist: numpy; extra == "confluence"
134
+ Requires-Dist: requests; extra == "confluence"
136
135
  Requires-Dist: pandas; extra == "confluence"
136
+ Requires-Dist: numpy; extra == "confluence"
137
137
  Provides-Extra: couchbase
138
138
  Requires-Dist: couchbase; extra == "couchbase"
139
- Requires-Dist: numpy; extra == "couchbase"
140
139
  Requires-Dist: pandas; extra == "couchbase"
140
+ Requires-Dist: numpy; extra == "couchbase"
141
141
  Provides-Extra: delta-table
142
142
  Requires-Dist: boto3; extra == "delta-table"
143
143
  Requires-Dist: deltalake; extra == "delta-table"
144
- Requires-Dist: numpy; extra == "delta-table"
145
144
  Requires-Dist: pandas; extra == "delta-table"
145
+ Requires-Dist: numpy; extra == "delta-table"
146
146
  Provides-Extra: discord
147
147
  Requires-Dist: discord.py; extra == "discord"
148
- Requires-Dist: numpy; extra == "discord"
149
148
  Requires-Dist: pandas; extra == "discord"
149
+ Requires-Dist: numpy; extra == "discord"
150
150
  Provides-Extra: dropbox
151
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
152
151
  Requires-Dist: fsspec; extra == "dropbox"
153
- Requires-Dist: numpy; extra == "dropbox"
152
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
154
153
  Requires-Dist: pandas; extra == "dropbox"
154
+ Requires-Dist: numpy; extra == "dropbox"
155
155
  Provides-Extra: duckdb
156
156
  Requires-Dist: duckdb; extra == "duckdb"
157
- Requires-Dist: numpy; extra == "duckdb"
158
157
  Requires-Dist: pandas; extra == "duckdb"
158
+ Requires-Dist: numpy; extra == "duckdb"
159
159
  Provides-Extra: elasticsearch
160
160
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
161
- Requires-Dist: numpy; extra == "elasticsearch"
162
161
  Requires-Dist: pandas; extra == "elasticsearch"
162
+ Requires-Dist: numpy; extra == "elasticsearch"
163
163
  Provides-Extra: gcs
164
+ Requires-Dist: fsspec; extra == "gcs"
164
165
  Requires-Dist: bs4; extra == "gcs"
165
166
  Requires-Dist: gcsfs; extra == "gcs"
166
- Requires-Dist: fsspec; extra == "gcs"
167
- Requires-Dist: numpy; extra == "gcs"
168
167
  Requires-Dist: pandas; extra == "gcs"
168
+ Requires-Dist: numpy; extra == "gcs"
169
169
  Provides-Extra: github
170
170
  Requires-Dist: requests; extra == "github"
171
171
  Requires-Dist: pygithub>1.58.0; extra == "github"
172
- Requires-Dist: numpy; extra == "github"
173
172
  Requires-Dist: pandas; extra == "github"
173
+ Requires-Dist: numpy; extra == "github"
174
174
  Provides-Extra: gitlab
175
175
  Requires-Dist: python-gitlab; extra == "gitlab"
176
- Requires-Dist: numpy; extra == "gitlab"
177
176
  Requires-Dist: pandas; extra == "gitlab"
177
+ Requires-Dist: numpy; extra == "gitlab"
178
178
  Provides-Extra: google-drive
179
179
  Requires-Dist: google-api-python-client; extra == "google-drive"
180
- Requires-Dist: numpy; extra == "google-drive"
181
180
  Requires-Dist: pandas; extra == "google-drive"
181
+ Requires-Dist: numpy; extra == "google-drive"
182
182
  Provides-Extra: hubspot
183
183
  Requires-Dist: hubspot-api-client; extra == "hubspot"
184
184
  Requires-Dist: urllib3; extra == "hubspot"
185
- Requires-Dist: numpy; extra == "hubspot"
186
185
  Requires-Dist: pandas; extra == "hubspot"
186
+ Requires-Dist: numpy; extra == "hubspot"
187
187
  Provides-Extra: ibm-watsonx-s3
188
- Requires-Dist: httpx; extra == "ibm-watsonx-s3"
189
- Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
190
188
  Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
191
189
  Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
192
- Requires-Dist: numpy; extra == "ibm-watsonx-s3"
190
+ Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
191
+ Requires-Dist: httpx; extra == "ibm-watsonx-s3"
193
192
  Requires-Dist: pandas; extra == "ibm-watsonx-s3"
193
+ Requires-Dist: numpy; extra == "ibm-watsonx-s3"
194
194
  Provides-Extra: jira
195
195
  Requires-Dist: atlassian-python-api; extra == "jira"
196
- Requires-Dist: numpy; extra == "jira"
197
196
  Requires-Dist: pandas; extra == "jira"
197
+ Requires-Dist: numpy; extra == "jira"
198
198
  Provides-Extra: kafka
199
199
  Requires-Dist: confluent-kafka; extra == "kafka"
200
- Requires-Dist: numpy; extra == "kafka"
201
200
  Requires-Dist: pandas; extra == "kafka"
201
+ Requires-Dist: numpy; extra == "kafka"
202
202
  Provides-Extra: kdbai
203
203
  Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
204
- Requires-Dist: numpy; extra == "kdbai"
205
204
  Requires-Dist: pandas; extra == "kdbai"
205
+ Requires-Dist: numpy; extra == "kdbai"
206
206
  Provides-Extra: lancedb
207
207
  Requires-Dist: lancedb; extra == "lancedb"
208
- Requires-Dist: numpy; extra == "lancedb"
209
208
  Requires-Dist: pandas; extra == "lancedb"
209
+ Requires-Dist: numpy; extra == "lancedb"
210
210
  Provides-Extra: milvus
211
211
  Requires-Dist: pymilvus; extra == "milvus"
212
- Requires-Dist: numpy; extra == "milvus"
213
212
  Requires-Dist: pandas; extra == "milvus"
213
+ Requires-Dist: numpy; extra == "milvus"
214
214
  Provides-Extra: mongodb
215
215
  Requires-Dist: pymongo; extra == "mongodb"
216
- Requires-Dist: numpy; extra == "mongodb"
217
216
  Requires-Dist: pandas; extra == "mongodb"
217
+ Requires-Dist: numpy; extra == "mongodb"
218
218
  Provides-Extra: neo4j
219
- Requires-Dist: networkx; extra == "neo4j"
220
- Requires-Dist: cymple; extra == "neo4j"
221
219
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
222
- Requires-Dist: numpy; extra == "neo4j"
220
+ Requires-Dist: cymple; extra == "neo4j"
221
+ Requires-Dist: networkx; extra == "neo4j"
223
222
  Requires-Dist: pandas; extra == "neo4j"
223
+ Requires-Dist: numpy; extra == "neo4j"
224
224
  Provides-Extra: notion
225
- Requires-Dist: httpx; extra == "notion"
225
+ Requires-Dist: backoff; extra == "notion"
226
226
  Requires-Dist: notion-client; extra == "notion"
227
+ Requires-Dist: httpx; extra == "notion"
227
228
  Requires-Dist: htmlBuilder; extra == "notion"
228
- Requires-Dist: backoff; extra == "notion"
229
- Requires-Dist: numpy; extra == "notion"
230
229
  Requires-Dist: pandas; extra == "notion"
230
+ Requires-Dist: numpy; extra == "notion"
231
231
  Provides-Extra: onedrive
232
232
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
233
- Requires-Dist: bs4; extra == "onedrive"
234
233
  Requires-Dist: msal; extra == "onedrive"
235
- Requires-Dist: numpy; extra == "onedrive"
234
+ Requires-Dist: bs4; extra == "onedrive"
236
235
  Requires-Dist: pandas; extra == "onedrive"
236
+ Requires-Dist: numpy; extra == "onedrive"
237
237
  Provides-Extra: opensearch
238
238
  Requires-Dist: opensearch-py; extra == "opensearch"
239
- Requires-Dist: numpy; extra == "opensearch"
240
239
  Requires-Dist: pandas; extra == "opensearch"
240
+ Requires-Dist: numpy; extra == "opensearch"
241
241
  Provides-Extra: outlook
242
242
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
243
243
  Requires-Dist: msal; extra == "outlook"
244
- Requires-Dist: numpy; extra == "outlook"
245
244
  Requires-Dist: pandas; extra == "outlook"
245
+ Requires-Dist: numpy; extra == "outlook"
246
246
  Provides-Extra: pinecone
247
247
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
248
- Requires-Dist: numpy; extra == "pinecone"
249
248
  Requires-Dist: pandas; extra == "pinecone"
249
+ Requires-Dist: numpy; extra == "pinecone"
250
250
  Provides-Extra: postgres
251
251
  Requires-Dist: psycopg2-binary; extra == "postgres"
252
- Requires-Dist: numpy; extra == "postgres"
253
252
  Requires-Dist: pandas; extra == "postgres"
253
+ Requires-Dist: numpy; extra == "postgres"
254
254
  Provides-Extra: qdrant
255
255
  Requires-Dist: qdrant-client; extra == "qdrant"
256
- Requires-Dist: numpy; extra == "qdrant"
257
256
  Requires-Dist: pandas; extra == "qdrant"
257
+ Requires-Dist: numpy; extra == "qdrant"
258
258
  Provides-Extra: reddit
259
259
  Requires-Dist: praw; extra == "reddit"
260
- Requires-Dist: numpy; extra == "reddit"
261
260
  Requires-Dist: pandas; extra == "reddit"
261
+ Requires-Dist: numpy; extra == "reddit"
262
262
  Provides-Extra: redis
263
263
  Requires-Dist: redis; extra == "redis"
264
- Requires-Dist: numpy; extra == "redis"
265
264
  Requires-Dist: pandas; extra == "redis"
265
+ Requires-Dist: numpy; extra == "redis"
266
266
  Provides-Extra: s3
267
- Requires-Dist: s3fs; extra == "s3"
268
267
  Requires-Dist: fsspec; extra == "s3"
269
- Requires-Dist: numpy; extra == "s3"
268
+ Requires-Dist: s3fs; extra == "s3"
270
269
  Requires-Dist: pandas; extra == "s3"
270
+ Requires-Dist: numpy; extra == "s3"
271
271
  Provides-Extra: sharepoint
272
272
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
273
273
  Requires-Dist: msal; extra == "sharepoint"
274
- Requires-Dist: numpy; extra == "sharepoint"
275
274
  Requires-Dist: pandas; extra == "sharepoint"
275
+ Requires-Dist: numpy; extra == "sharepoint"
276
276
  Provides-Extra: salesforce
277
277
  Requires-Dist: simple-salesforce; extra == "salesforce"
278
- Requires-Dist: numpy; extra == "salesforce"
279
278
  Requires-Dist: pandas; extra == "salesforce"
279
+ Requires-Dist: numpy; extra == "salesforce"
280
280
  Provides-Extra: sftp
281
281
  Requires-Dist: paramiko; extra == "sftp"
282
282
  Requires-Dist: fsspec; extra == "sftp"
283
- Requires-Dist: numpy; extra == "sftp"
284
283
  Requires-Dist: pandas; extra == "sftp"
284
+ Requires-Dist: numpy; extra == "sftp"
285
285
  Provides-Extra: slack
286
286
  Requires-Dist: slack_sdk[optional]; extra == "slack"
287
- Requires-Dist: numpy; extra == "slack"
288
287
  Requires-Dist: pandas; extra == "slack"
288
+ Requires-Dist: numpy; extra == "slack"
289
289
  Provides-Extra: snowflake
290
290
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
291
291
  Requires-Dist: psycopg2-binary; extra == "snowflake"
292
- Requires-Dist: numpy; extra == "snowflake"
293
292
  Requires-Dist: pandas; extra == "snowflake"
293
+ Requires-Dist: numpy; extra == "snowflake"
294
294
  Provides-Extra: wikipedia
295
295
  Requires-Dist: wikipedia; extra == "wikipedia"
296
- Requires-Dist: numpy; extra == "wikipedia"
297
296
  Requires-Dist: pandas; extra == "wikipedia"
297
+ Requires-Dist: numpy; extra == "wikipedia"
298
298
  Provides-Extra: weaviate
299
299
  Requires-Dist: weaviate-client; extra == "weaviate"
300
- Requires-Dist: numpy; extra == "weaviate"
301
300
  Requires-Dist: pandas; extra == "weaviate"
301
+ Requires-Dist: numpy; extra == "weaviate"
302
302
  Provides-Extra: databricks-volumes
303
303
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
304
- Requires-Dist: numpy; extra == "databricks-volumes"
305
304
  Requires-Dist: pandas; extra == "databricks-volumes"
305
+ Requires-Dist: numpy; extra == "databricks-volumes"
306
306
  Provides-Extra: databricks-delta-tables
307
307
  Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
308
- Requires-Dist: numpy; extra == "databricks-delta-tables"
309
308
  Requires-Dist: pandas; extra == "databricks-delta-tables"
309
+ Requires-Dist: numpy; extra == "databricks-delta-tables"
310
310
  Provides-Extra: singlestore
311
311
  Requires-Dist: singlestoredb; extra == "singlestore"
312
- Requires-Dist: numpy; extra == "singlestore"
313
312
  Requires-Dist: pandas; extra == "singlestore"
313
+ Requires-Dist: numpy; extra == "singlestore"
314
314
  Provides-Extra: vectara
315
315
  Requires-Dist: requests; extra == "vectara"
316
- Requires-Dist: aiofiles; extra == "vectara"
317
316
  Requires-Dist: httpx; extra == "vectara"
318
- Requires-Dist: numpy; extra == "vectara"
317
+ Requires-Dist: aiofiles; extra == "vectara"
319
318
  Requires-Dist: pandas; extra == "vectara"
319
+ Requires-Dist: numpy; extra == "vectara"
320
320
  Provides-Extra: vastdb
321
- Requires-Dist: vastdb; extra == "vastdb"
322
- Requires-Dist: ibis; extra == "vastdb"
323
321
  Requires-Dist: pyarrow; extra == "vastdb"
324
- Requires-Dist: numpy; extra == "vastdb"
322
+ Requires-Dist: ibis; extra == "vastdb"
323
+ Requires-Dist: vastdb; extra == "vastdb"
325
324
  Requires-Dist: pandas; extra == "vastdb"
325
+ Requires-Dist: numpy; extra == "vastdb"
326
326
  Provides-Extra: zendesk
327
327
  Requires-Dist: httpx; extra == "zendesk"
328
- Requires-Dist: aiofiles; extra == "zendesk"
329
328
  Requires-Dist: bs4; extra == "zendesk"
330
- Requires-Dist: numpy; extra == "zendesk"
329
+ Requires-Dist: aiofiles; extra == "zendesk"
331
330
  Requires-Dist: pandas; extra == "zendesk"
331
+ Requires-Dist: numpy; extra == "zendesk"
332
332
  Provides-Extra: embed-huggingface
333
333
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
334
- Requires-Dist: numpy; extra == "embed-huggingface"
335
334
  Requires-Dist: pandas; extra == "embed-huggingface"
335
+ Requires-Dist: numpy; extra == "embed-huggingface"
336
336
  Provides-Extra: embed-octoai
337
337
  Requires-Dist: openai; extra == "embed-octoai"
338
338
  Requires-Dist: tiktoken; extra == "embed-octoai"
339
- Requires-Dist: numpy; extra == "embed-octoai"
340
339
  Requires-Dist: pandas; extra == "embed-octoai"
340
+ Requires-Dist: numpy; extra == "embed-octoai"
341
341
  Provides-Extra: embed-vertexai
342
342
  Requires-Dist: vertexai; extra == "embed-vertexai"
343
- Requires-Dist: numpy; extra == "embed-vertexai"
344
343
  Requires-Dist: pandas; extra == "embed-vertexai"
344
+ Requires-Dist: numpy; extra == "embed-vertexai"
345
345
  Provides-Extra: embed-voyageai
346
346
  Requires-Dist: voyageai; extra == "embed-voyageai"
347
- Requires-Dist: numpy; extra == "embed-voyageai"
348
347
  Requires-Dist: pandas; extra == "embed-voyageai"
348
+ Requires-Dist: numpy; extra == "embed-voyageai"
349
349
  Provides-Extra: embed-mixedbreadai
350
350
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
351
- Requires-Dist: numpy; extra == "embed-mixedbreadai"
352
351
  Requires-Dist: pandas; extra == "embed-mixedbreadai"
352
+ Requires-Dist: numpy; extra == "embed-mixedbreadai"
353
353
  Provides-Extra: openai
354
354
  Requires-Dist: openai; extra == "openai"
355
355
  Requires-Dist: tiktoken; extra == "openai"
356
- Requires-Dist: numpy; extra == "openai"
357
356
  Requires-Dist: pandas; extra == "openai"
357
+ Requires-Dist: numpy; extra == "openai"
358
358
  Provides-Extra: bedrock
359
359
  Requires-Dist: boto3; extra == "bedrock"
360
360
  Requires-Dist: aioboto3; extra == "bedrock"
361
- Requires-Dist: numpy; extra == "bedrock"
362
361
  Requires-Dist: pandas; extra == "bedrock"
362
+ Requires-Dist: numpy; extra == "bedrock"
363
363
  Provides-Extra: togetherai
364
364
  Requires-Dist: together; extra == "togetherai"
365
- Requires-Dist: numpy; extra == "togetherai"
366
365
  Requires-Dist: pandas; extra == "togetherai"
366
+ Requires-Dist: numpy; extra == "togetherai"
367
367
  Dynamic: author
368
368
  Dynamic: author-email
369
369
  Dynamic: classifier
@@ -91,8 +91,6 @@ test/unit/v2/chunkers/test_chunkers.py,sha256=HSr3_lsoMw1nkDhkjO0-NOTEomRdR9oxCr
91
91
  test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
92
  test/unit/v2/connectors/test_confluence.py,sha256=lN6nnU5qOtmsjIGcz65roepm76w4vPF7AmSzi9vqV78,1919
93
93
  test/unit/v2/connectors/test_jira.py,sha256=XEBBDSdNZWUVO5JbpiSsjazJYmbLsgXUOW-APqPRKLg,12113
94
- test/unit/v2/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- test/unit/v2/connectors/databricks/test_volumes_table.py,sha256=-R_EJHqv1BseGRK9VRAZhF-2EXA64LAlhycoyIu556U,1078
96
94
  test/unit/v2/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
95
  test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=WKpDKvEGalh8LYRqN9xA7CfMPOPHo_VcZbnCXdkVjho,14513
98
96
  test/unit/v2/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -113,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
113
111
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
112
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
115
113
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
116
- unstructured_ingest/__version__.py,sha256=vYkj5wI9darc7y1Fll8uAtxzlI0lqsa5gGerwBBkeIQ,42
114
+ unstructured_ingest/__version__.py,sha256=UDy7drjkPUljex5sEiDR3ZALQNnlcrCXwJShdKZ37Ek,42
117
115
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
118
116
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
119
117
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -454,14 +452,15 @@ unstructured_ingest/v2/processes/connectors/slack.py,sha256=vbBVCYEd741-n2v6eAXL
454
452
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
455
453
  unstructured_ingest/v2/processes/connectors/vectara.py,sha256=KUqgZ6D2KUOrW596ms-EekvQYDh-fXqBTa7KG-leXoo,12301
456
454
  unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
455
+ unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
457
456
  unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
458
457
  unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
459
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=8fg11-32If4iQGZTT9MEl1DOWZ5s3Qgj1OOzMVaHldU,7749
458
+ unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=EghKdkt4nGacGxulSpjhToHOl5BRLbb3xNZpJzpWNX8,8002
460
459
  unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=h6qDxQhWlT7H4K1CEfKag1stTiD1o97VckJZERsofqU,2970
461
460
  unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=gjICJJwhDHBLt_L-LrMlvJ3DL1DYtwFpyMLb_zYvOIg,3755
462
461
  unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=Uss3XPPaq1AsqJOEy4RJgBJw2-bTjrXH2PgtVNYd2w0,3006
463
462
  unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=g1qYnIrML4TjN7rmC0MGrD5JzAprb6SymBHlEdOumz0,3113
464
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=5BArD1FkLC6wRJC0LxjXxQmYfmtF7r9Zrd8CtaGgWls,6855
463
+ unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=LiSb66039idaRtMnTuHjR5ZqvdmmIu3ByUgFQ1a3iZQ,8264
465
464
  unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
466
465
  unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=VCoQ3h289BO4A2kJKZXUVB0QOcaQif-HeRgg-xXzn10,2976
467
466
  unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=DM4pygQAnP-dtuFEFAVeBfGt0pzrfkltteCai0GKnG0,4439
@@ -582,9 +581,9 @@ unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
582
581
  unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=_I3OMdpUElQdIwVs7W9ORU1kncNaZ_nr6lbxeKE8uaU,1014
583
582
  unstructured_ingest/v2/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
584
583
  unstructured_ingest/v2/types/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
585
- unstructured_ingest-0.6.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
586
- unstructured_ingest-0.6.1.dist-info/METADATA,sha256=Babhsu1h1L0nvRFeImk9Jn-jPjnaW-jdz6mhB3jkmbI,14998
587
- unstructured_ingest-0.6.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
588
- unstructured_ingest-0.6.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
589
- unstructured_ingest-0.6.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
590
- unstructured_ingest-0.6.1.dist-info/RECORD,,
584
+ unstructured_ingest-0.6.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
585
+ unstructured_ingest-0.6.2.dist-info/METADATA,sha256=yUMpJD0UXDhUG1cIIpHkjn-VU2AScEaA12wLmISmG-A,14998
586
+ unstructured_ingest-0.6.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
587
+ unstructured_ingest-0.6.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
588
+ unstructured_ingest-0.6.2.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
589
+ unstructured_ingest-0.6.2.dist-info/RECORD,,
File without changes
@@ -1,44 +0,0 @@
1
- from pathlib import Path
2
-
3
- import pytest
4
- from pytest_mock import MockerFixture
5
-
6
- from unstructured_ingest.v2.processes.connectors.databricks.volumes_table import (
7
- DatabricksVolumeDeltaTableStager,
8
- )
9
-
10
-
11
- @pytest.fixture
12
- def stager():
13
- return DatabricksVolumeDeltaTableStager()
14
-
15
-
16
- @pytest.mark.parametrize(
17
- ("output_path", "called_output_path"),
18
- [
19
- (
20
- Path("/fake/path/output"),
21
- Path("/fake/path/output.json"),
22
- ),
23
- (
24
- Path("/fake/path/output.ndjson"),
25
- Path("/fake/path/output.json"),
26
- ),
27
- ],
28
- )
29
- def test_write_output(
30
- mocker: MockerFixture,
31
- stager: DatabricksVolumeDeltaTableStager,
32
- output_path: Path,
33
- called_output_path: Path,
34
- ):
35
- data = [{"key1": "value1", "key2": "value2"}]
36
-
37
- mock_get_data = mocker.patch(
38
- "unstructured_ingest.v2.processes.connectors.databricks.volumes_table.write_data",
39
- return_value=None,
40
- )
41
-
42
- stager.write_output(output_path, data)
43
-
44
- mock_get_data.assert_called_once_with(path=called_output_path, data=data, indent=None)