unstructured-ingest 0.5.20__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (23) hide show
  1. test/integration/connectors/test_astradb.py +8 -2
  2. unstructured_ingest/__version__.py +1 -1
  3. unstructured_ingest/embed/interfaces.py +7 -3
  4. unstructured_ingest/utils/data_prep.py +17 -5
  5. unstructured_ingest/utils/table.py +11 -4
  6. unstructured_ingest/v2/processes/connectors/delta_table.py +8 -3
  7. unstructured_ingest/v2/processes/connectors/duckdb/base.py +4 -3
  8. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +5 -2
  9. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +5 -2
  10. unstructured_ingest/v2/processes/connectors/kdbai.py +6 -3
  11. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +10 -2
  12. unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +5 -3
  13. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +5 -1
  14. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +7 -3
  15. unstructured_ingest/v2/processes/connectors/sql/sql.py +22 -9
  16. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +5 -1
  17. unstructured_ingest/v2/processes/connectors/sql/vastdb.py +5 -7
  18. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/METADATA +175 -24
  19. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/RECORD +23 -23
  20. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/LICENSE.md +0 -0
  21. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/WHEEL +0 -0
  22. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/entry_points.txt +0 -0
  23. {unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  import json
2
3
  import os
3
4
  from dataclasses import dataclass
@@ -231,6 +232,13 @@ def test_astra_create_destination():
231
232
  )
232
233
  collection_name = "system_created-123"
233
234
  formatted_collection_name = "system_created_123"
235
+
236
+ client = AstraDBClient()
237
+ db = client.get_database(api_endpoint=env_data.api_endpoint, token=env_data.token)
238
+ with contextlib.suppress(Exception):
239
+ # drop collection before trying to create it
240
+ db.drop_collection(formatted_collection_name)
241
+
234
242
  created = uploader.create_destination(destination_name=collection_name, vector_length=3072)
235
243
  assert created
236
244
  assert uploader.upload_config.collection_name == formatted_collection_name
@@ -239,8 +247,6 @@ def test_astra_create_destination():
239
247
  assert not created
240
248
 
241
249
  # cleanup
242
- client = AstraDBClient()
243
- db = client.get_database(api_endpoint=env_data.api_endpoint, token=env_data.token)
244
250
  db.drop_collection(formatted_collection_name)
245
251
 
246
252
 
@@ -1 +1 @@
1
- __version__ = "0.5.20" # pragma: no cover
1
+ __version__ = "0.5.21" # pragma: no cover
@@ -2,10 +2,10 @@ from abc import ABC
2
2
  from dataclasses import dataclass
3
3
  from typing import Any, Optional
4
4
 
5
- import numpy as np
6
5
  from pydantic import BaseModel, Field
7
6
 
8
7
  from unstructured_ingest.utils.data_prep import batch_generator
8
+ from unstructured_ingest.utils.dep_check import requires_dependencies
9
9
 
10
10
  EMBEDDINGS_KEY = "embeddings"
11
11
 
@@ -32,7 +32,6 @@ class BaseEncoder(ABC):
32
32
 
33
33
  @dataclass
34
34
  class BaseEmbeddingEncoder(BaseEncoder, ABC):
35
-
36
35
  def initialize(self):
37
36
  """Initializes the embedding encoder class. Should also validate the instance
38
37
  is properly configured: e.g., embed a single a element"""
@@ -46,8 +45,11 @@ class BaseEmbeddingEncoder(BaseEncoder, ABC):
46
45
  return self.embed_query(query="Q")
47
46
 
48
47
  @property
48
+ @requires_dependencies(["numpy"])
49
49
  def is_unit_vector(self) -> bool:
50
50
  """Denotes if the embedding vector is a unit vector."""
51
+ import numpy as np
52
+
51
53
  exemplary_embedding = self.get_exemplary_embedding()
52
54
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0, rtol=1e-03)
53
55
 
@@ -86,7 +88,6 @@ class BaseEmbeddingEncoder(BaseEncoder, ABC):
86
88
 
87
89
  @dataclass
88
90
  class AsyncBaseEmbeddingEncoder(BaseEncoder, ABC):
89
-
90
91
  async def initialize(self):
91
92
  """Initializes the embedding encoder class. Should also validate the instance
92
93
  is properly configured: e.g., embed a single a element"""
@@ -100,8 +101,11 @@ class AsyncBaseEmbeddingEncoder(BaseEncoder, ABC):
100
101
  return await self.embed_query(query="Q")
101
102
 
102
103
  @property
104
+ @requires_dependencies(["numpy"])
103
105
  async def is_unit_vector(self) -> bool:
104
106
  """Denotes if the embedding vector is a unit vector."""
107
+ import numpy as np
108
+
105
109
  exemplary_embedding = await self.get_exemplary_embedding()
106
110
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0, rtol=1e-03)
107
111
 
@@ -2,20 +2,22 @@ import itertools
2
2
  import json
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
- from typing import Any, Generator, Iterable, Optional, Sequence, TypeVar, Union, cast
6
-
7
- import pandas as pd
5
+ from typing import TYPE_CHECKING, Any, Generator, Iterable, Optional, Sequence, TypeVar, Union, cast
8
6
 
9
7
  from unstructured_ingest.utils import ndjson
8
+ from unstructured_ingest.utils.dep_check import requires_dependencies
10
9
  from unstructured_ingest.v2.logger import logger
11
10
 
11
+ if TYPE_CHECKING:
12
+ from pandas import DataFrame
13
+
12
14
  DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d+%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z")
13
15
 
14
16
  T = TypeVar("T")
15
17
  IterableT = Iterable[T]
16
18
 
17
19
 
18
- def split_dataframe(df: pd.DataFrame, chunk_size: int = 100) -> Generator[pd.DataFrame, None, None]:
20
+ def split_dataframe(df: "DataFrame", chunk_size: int = 100) -> Generator["DataFrame", None, None]:
19
21
  num_chunks = len(df) // chunk_size + 1
20
22
  for i in range(num_chunks):
21
23
  yield df[i * chunk_size : (i + 1) * chunk_size]
@@ -144,9 +146,13 @@ def get_data_by_suffix(path: Path) -> list[dict]:
144
146
  elif path.suffix == ".ndjson":
145
147
  return ndjson.load(f)
146
148
  elif path.suffix == ".csv":
149
+ import pandas as pd
150
+
147
151
  df = pd.read_csv(path)
148
152
  return df.to_dict(orient="records")
149
153
  elif path.suffix == ".parquet":
154
+ import pandas as pd
155
+
150
156
  df = pd.read_parquet(path)
151
157
  return df.to_dict(orient="records")
152
158
  else:
@@ -180,6 +186,9 @@ def get_data(path: Union[Path, str]) -> list[dict]:
180
186
  return ndjson.load(f)
181
187
  except Exception as e:
182
188
  logger.warning(f"failed to read {path} as ndjson: {e}")
189
+
190
+ import pandas as pd
191
+
183
192
  try:
184
193
  df = pd.read_csv(path)
185
194
  return df.to_dict(orient="records")
@@ -202,7 +211,10 @@ def get_json_data(path: Path) -> list[dict]:
202
211
  raise ValueError(f"Unsupported file type: {path}")
203
212
 
204
213
 
205
- def get_data_df(path: Path) -> pd.DataFrame:
214
+ @requires_dependencies(["pandas"])
215
+ def get_data_df(path: Path) -> "DataFrame":
216
+ import pandas as pd
217
+
206
218
  with path.open() as f:
207
219
  if path.suffix == ".json":
208
220
  data = json.load(f)
@@ -1,11 +1,16 @@
1
- from typing import Any
2
-
3
- import pandas as pd
1
+ from typing import TYPE_CHECKING, Any
4
2
 
5
3
  from unstructured_ingest.utils.data_prep import flatten_dict
4
+ from unstructured_ingest.utils.dep_check import requires_dependencies
5
+
6
+ if TYPE_CHECKING:
7
+ from pandas import DataFrame
6
8
 
7
9
 
10
+ @requires_dependencies(["pandas"])
8
11
  def get_default_pandas_dtypes() -> dict[str, Any]:
12
+ import pandas as pd
13
+
9
14
  return {
10
15
  "text": pd.StringDtype(), # type: ignore
11
16
  "type": pd.StringDtype(), # type: ignore
@@ -57,7 +62,9 @@ def get_default_pandas_dtypes() -> dict[str, Any]:
57
62
  def convert_to_pandas_dataframe(
58
63
  elements_dict: list[dict[str, Any]],
59
64
  drop_empty_cols: bool = False,
60
- ) -> pd.DataFrame:
65
+ ) -> "DataFrame":
66
+ import pandas as pd
67
+
61
68
  # Flatten metadata if it hasn't already been flattened
62
69
  for d in elements_dict:
63
70
  if metadata := d.pop("metadata", None):
@@ -3,10 +3,9 @@ import traceback
3
3
  from dataclasses import dataclass, field
4
4
  from multiprocessing import Process, Queue
5
5
  from pathlib import Path
6
- from typing import Any, Optional
6
+ from typing import TYPE_CHECKING, Any, Optional
7
7
  from urllib.parse import urlparse
8
8
 
9
- import pandas as pd
10
9
  from pydantic import Field, Secret
11
10
 
12
11
  from unstructured_ingest.error import DestinationConnectionError
@@ -27,6 +26,9 @@ from unstructured_ingest.v2.processes.connector_registry import DestinationRegis
27
26
 
28
27
  CONNECTOR_TYPE = "delta_table"
29
28
 
29
+ if TYPE_CHECKING:
30
+ from pandas import DataFrame
31
+
30
32
 
31
33
  @requires_dependencies(["deltalake"], extras="delta-table")
32
34
  def write_deltalake_with_error_handling(queue, **kwargs):
@@ -136,7 +138,7 @@ class DeltaTableUploader(Uploader):
136
138
  logger.error(f"failed to validate connection: {e}", exc_info=True)
137
139
  raise DestinationConnectionError(f"failed to validate connection: {e}")
138
140
 
139
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
141
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
140
142
  updated_upload_path = os.path.join(
141
143
  self.connection_config.table_uri, file_data.source_identifiers.relative_path
142
144
  )
@@ -172,7 +174,10 @@ class DeltaTableUploader(Uploader):
172
174
  logger.error(f"Exception occurred in write_deltalake: {error_message}")
173
175
  raise RuntimeError(f"Error in write_deltalake: {error_message}")
174
176
 
177
+ @requires_dependencies(["pandas"], extras="delta-table")
175
178
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
179
+ import pandas as pd
180
+
176
181
  df = pd.DataFrame(data=data)
177
182
  self.upload_dataframe(df=df, file_data=file_data)
178
183
 
@@ -2,9 +2,8 @@ from dataclasses import dataclass
2
2
  from pathlib import Path
3
3
  from typing import Any
4
4
 
5
- import pandas as pd
6
-
7
5
  from unstructured_ingest.utils.data_prep import get_data, write_data
6
+ from unstructured_ingest.utils.dep_check import requires_dependencies
8
7
  from unstructured_ingest.v2.interfaces import FileData, UploadStager
9
8
  from unstructured_ingest.v2.utils import get_enhanced_element_id
10
9
 
@@ -55,7 +54,6 @@ _COLUMNS = (
55
54
 
56
55
  @dataclass
57
56
  class BaseDuckDBUploadStager(UploadStager):
58
-
59
57
  def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
60
58
  data = element_dict.copy()
61
59
  metadata: dict[str, Any] = data.pop("metadata", {})
@@ -72,6 +70,7 @@ class BaseDuckDBUploadStager(UploadStager):
72
70
  data = {k: v for k, v in data.items() if k in _COLUMNS}
73
71
  return data
74
72
 
73
+ @requires_dependencies(["pandas"], extras="duckdb")
75
74
  def run(
76
75
  self,
77
76
  elements_filepath: Path,
@@ -80,6 +79,8 @@ class BaseDuckDBUploadStager(UploadStager):
80
79
  output_filename: str,
81
80
  **kwargs: Any,
82
81
  ) -> Path:
82
+ import pandas as pd
83
+
83
84
  elements_contents = get_data(path=elements_filepath)
84
85
  output_filename_suffix = Path(elements_filepath).suffix
85
86
  output_filename = f"{Path(output_filename).stem}{output_filename_suffix}"
@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
3
3
  from pathlib import Path
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import pandas as pd
7
6
  from pydantic import Field, Secret
8
7
 
9
8
  from unstructured_ingest.error import DestinationConnectionError
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUp
23
22
 
24
23
  if TYPE_CHECKING:
25
24
  from duckdb import DuckDBPyConnection as DuckDBConnection
25
+ from pandas import DataFrame
26
26
 
27
27
  CONNECTOR_TYPE = "duckdb"
28
28
 
@@ -101,7 +101,7 @@ class DuckDBUploader(Uploader):
101
101
  logger.error(f"failed to validate connection: {e}", exc_info=True)
102
102
  raise DestinationConnectionError(f"failed to validate connection: {e}")
103
103
 
104
- def upload_dataframe(self, df: pd.DataFrame) -> None:
104
+ def upload_dataframe(self, df: "DataFrame") -> None:
105
105
  logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
106
106
 
107
107
  with self.connection_config.get_client() as conn:
@@ -109,7 +109,10 @@ class DuckDBUploader(Uploader):
109
109
  f"INSERT INTO {self.connection_config.db_schema}.{self.connection_config.table} BY NAME SELECT * FROM df" # noqa: E501
110
110
  )
111
111
 
112
+ @requires_dependencies(["pandas"], extras="duckdb")
112
113
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
114
+ import pandas as pd
115
+
113
116
  df = pd.DataFrame(data=data)
114
117
  self.upload_dataframe(df=df)
115
118
 
@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
3
3
  from pathlib import Path
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import pandas as pd
7
6
  from pydantic import Field, Secret
8
7
 
9
8
  from unstructured_ingest.__version__ import __version__ as unstructured_io_ingest_version
@@ -24,6 +23,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUp
24
23
 
25
24
  if TYPE_CHECKING:
26
25
  from duckdb import DuckDBPyConnection as MotherDuckConnection
26
+ from pandas import DataFrame
27
27
 
28
28
  CONNECTOR_TYPE = "motherduck"
29
29
 
@@ -100,7 +100,7 @@ class MotherDuckUploader(Uploader):
100
100
  logger.error(f"failed to validate connection: {e}", exc_info=True)
101
101
  raise DestinationConnectionError(f"failed to validate connection: {e}")
102
102
 
103
- def upload_dataframe(self, df: pd.DataFrame) -> None:
103
+ def upload_dataframe(self, df: "DataFrame") -> None:
104
104
  logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
105
105
  database = self.connection_config.database
106
106
  db_schema = self.connection_config.db_schema
@@ -109,7 +109,10 @@ class MotherDuckUploader(Uploader):
109
109
  with self.connection_config.get_client() as conn:
110
110
  conn.query(f'INSERT INTO "{database}"."{db_schema}"."{table}" BY NAME SELECT * FROM df')
111
111
 
112
+ @requires_dependencies(["pandas"], extras="duckdb")
112
113
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
114
+ import pandas as pd
115
+
113
116
  df = pd.DataFrame(data=data)
114
117
  self.upload_dataframe(df=df)
115
118
 
@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
3
3
  from pathlib import Path
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import pandas as pd
7
6
  from pydantic import Field, Secret
8
7
 
9
8
  from unstructured_ingest.error import DestinationConnectionError
@@ -26,6 +25,7 @@ from unstructured_ingest.v2.utils import get_enhanced_element_id
26
25
 
27
26
  if TYPE_CHECKING:
28
27
  from kdbai_client import Database, Session, Table
28
+ from pandas import DataFrame
29
29
 
30
30
  CONNECTOR_TYPE = "kdbai"
31
31
 
@@ -118,11 +118,11 @@ class KdbaiUploader(Uploader):
118
118
  table = db.table(self.upload_config.table_name)
119
119
  yield table
120
120
 
121
- def upsert_batch(self, batch: pd.DataFrame):
121
+ def upsert_batch(self, batch: "DataFrame"):
122
122
  with self.get_table() as table:
123
123
  table.insert(batch)
124
124
 
125
- def process_dataframe(self, df: pd.DataFrame):
125
+ def process_dataframe(self, df: "DataFrame"):
126
126
  logger.debug(
127
127
  f"uploading {len(df)} entries to {self.connection_config.endpoint} "
128
128
  f"db {self.upload_config.database_name} in table {self.upload_config.table_name}"
@@ -130,7 +130,10 @@ class KdbaiUploader(Uploader):
130
130
  for batch_df in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
131
131
  self.upsert_batch(batch=batch_df)
132
132
 
133
+ @requires_dependencies(["pandas"], extras="kdbai")
133
134
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
135
+ import pandas as pd
136
+
134
137
  df = pd.DataFrame(data=data)
135
138
  self.process_dataframe(df=df)
136
139
 
@@ -8,7 +8,6 @@ from dataclasses import dataclass, field
8
8
  from pathlib import Path
9
9
  from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional
10
10
 
11
- import pandas as pd
12
11
  from pydantic import Field
13
12
 
14
13
  from unstructured_ingest.error import DestinationConnectionError
@@ -26,6 +25,7 @@ CONNECTOR_TYPE = "lancedb"
26
25
  if TYPE_CHECKING:
27
26
  from lancedb import AsyncConnection
28
27
  from lancedb.table import AsyncTable
28
+ from pandas import DataFrame
29
29
 
30
30
 
31
31
  class LanceDBConnectionConfig(ConnectionConfig, ABC):
@@ -69,6 +69,7 @@ class LanceDBUploadStager(UploadStager):
69
69
  default_factory=LanceDBUploadStagerConfig
70
70
  )
71
71
 
72
+ @requires_dependencies(["pandas"], extras="lancedb")
72
73
  def run(
73
74
  self,
74
75
  elements_filepath: Path,
@@ -77,6 +78,8 @@ class LanceDBUploadStager(UploadStager):
77
78
  output_filename: str,
78
79
  **kwargs: Any,
79
80
  ) -> Path:
81
+ import pandas as pd
82
+
80
83
  with open(elements_filepath) as elements_file:
81
84
  elements_contents: list[dict] = json.load(elements_file)
82
85
 
@@ -129,7 +132,10 @@ class LanceDBUploader(Uploader):
129
132
  finally:
130
133
  table.close()
131
134
 
135
+ @requires_dependencies(["pandas"], extras="lancedb")
132
136
  async def run_async(self, path, file_data, **kwargs):
137
+ import pandas as pd
138
+
133
139
  df = pd.read_feather(path)
134
140
  async with self.get_table() as table:
135
141
  schema = await table.schema()
@@ -144,7 +150,9 @@ class LanceDBUploader(Uploader):
144
150
  await table.delete(f'{RECORD_ID_LABEL} = "{file_data.identifier}"')
145
151
  await table.add(data=df)
146
152
 
147
- def _fit_to_schema(self, df: pd.DataFrame, schema) -> pd.DataFrame:
153
+ def _fit_to_schema(self, df: "DataFrame", schema) -> "DataFrame":
154
+ import pandas as pd
155
+
148
156
  columns = set(df.columns)
149
157
  schema_fields = set(schema.names)
150
158
  columns_to_drop = columns - schema_fields
@@ -3,8 +3,6 @@ from contextlib import contextmanager
3
3
  from dataclasses import dataclass
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import numpy as np
7
- import pandas as pd
8
6
  from pydantic import Field, Secret
9
7
 
10
8
  from unstructured_ingest.utils.data_prep import split_dataframe
@@ -27,6 +25,7 @@ if TYPE_CHECKING:
27
25
  from databricks.sdk.core import oauth_service_principal
28
26
  from databricks.sql.client import Connection as DeltaTableConnection
29
27
  from databricks.sql.client import Cursor as DeltaTableCursor
28
+ from pandas import DataFrame
30
29
 
31
30
  CONNECTOR_TYPE = "databricks_delta_tables"
32
31
 
@@ -180,7 +179,10 @@ class DatabricksDeltaTablesUploader(SQLUploader):
180
179
  )
181
180
  return statement
182
181
 
183
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
182
+ @requires_dependencies(["pandas"], extras="databricks-delta-tables")
183
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
184
+ import numpy as np
185
+
184
186
  if self.can_delete():
185
187
  self.delete_by_record_id(file_data=file_data)
186
188
  else:
@@ -3,9 +3,9 @@ from contextlib import contextmanager
3
3
  from dataclasses import dataclass, field
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import pandas as pd
7
6
  from pydantic import Field, Secret
8
7
 
8
+ from unstructured_ingest.utils.dep_check import requires_dependencies
9
9
  from unstructured_ingest.v2.logger import logger
10
10
  from unstructured_ingest.v2.processes.connector_registry import (
11
11
  DestinationRegistryEntry,
@@ -46,6 +46,7 @@ class SingleStoreConnectionConfig(SQLConnectionConfig):
46
46
  database: Optional[str] = Field(default=None, description="SingleStore database")
47
47
 
48
48
  @contextmanager
49
+ @requires_dependencies(["singlestoredb"], extras="singlestore")
49
50
  def get_connection(self) -> Generator["SingleStoreConnection", None, None]:
50
51
  import singlestoredb as s2
51
52
 
@@ -130,9 +131,12 @@ class SingleStoreUploader(SQLUploader):
130
131
  values_delimiter: str = "%s"
131
132
  connector_type: str = CONNECTOR_TYPE
132
133
 
134
+ @requires_dependencies(["pandas"], extras="singlestore")
133
135
  def prepare_data(
134
136
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
135
137
  ) -> list[tuple[Any, ...]]:
138
+ import pandas as pd
139
+
136
140
  output = []
137
141
  for row in data:
138
142
  parsed = []
@@ -3,8 +3,6 @@ from contextlib import contextmanager
3
3
  from dataclasses import dataclass, field
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import numpy as np
7
- import pandas as pd
8
6
  from pydantic import Field, Secret
9
7
 
10
8
  from unstructured_ingest.utils.data_prep import split_dataframe
@@ -32,6 +30,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
32
30
  )
33
31
 
34
32
  if TYPE_CHECKING:
33
+ from pandas import DataFrame
35
34
  from snowflake.connector import SnowflakeConnection
36
35
  from snowflake.connector.cursor import SnowflakeCursor
37
36
 
@@ -174,9 +173,12 @@ class SnowflakeUploader(SQLUploader):
174
173
  connector_type: str = CONNECTOR_TYPE
175
174
  values_delimiter: str = "?"
176
175
 
176
+ @requires_dependencies(["pandas"], extras="snowflake")
177
177
  def prepare_data(
178
178
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
179
179
  ) -> list[tuple[Any, ...]]:
180
+ import pandas as pd
181
+
180
182
  output = []
181
183
  for row in data:
182
184
  parsed = []
@@ -210,7 +212,9 @@ class SnowflakeUploader(SQLUploader):
210
212
  ]
211
213
  )
212
214
 
213
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
215
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
216
+ import numpy as np
217
+
214
218
  if self.can_delete():
215
219
  self.delete_by_record_id(file_data=file_data)
216
220
  else:
@@ -6,10 +6,8 @@ from dataclasses import dataclass, field
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
  from time import time
9
- from typing import Any, Generator, Union
9
+ from typing import TYPE_CHECKING, Any, Generator, Union
10
10
 
11
- import numpy as np
12
- import pandas as pd
13
11
  from dateutil import parser
14
12
  from pydantic import BaseModel, Field, Secret
15
13
 
@@ -38,6 +36,9 @@ from unstructured_ingest.v2.interfaces import (
38
36
  from unstructured_ingest.v2.logger import logger
39
37
  from unstructured_ingest.v2.utils import get_enhanced_element_id
40
38
 
39
+ if TYPE_CHECKING:
40
+ from pandas import DataFrame
41
+
41
42
  _DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
42
43
 
43
44
 
@@ -154,13 +155,15 @@ class SQLDownloader(Downloader, ABC):
154
155
  def query_db(self, file_data: SqlBatchFileData) -> tuple[list[tuple], list[str]]:
155
156
  pass
156
157
 
157
- def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list[pd.DataFrame]:
158
+ def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list["DataFrame"]:
159
+ import pandas as pd
160
+
158
161
  data = [dict(zip(columns, row)) for row in rows]
159
162
  df = pd.DataFrame(data)
160
163
  dfs = [pd.DataFrame([row.values], columns=df.columns) for index, row in df.iterrows()]
161
164
  return dfs
162
165
 
163
- def get_data(self, file_data: SqlBatchFileData) -> list[pd.DataFrame]:
166
+ def get_data(self, file_data: SqlBatchFileData) -> list["DataFrame"]:
164
167
  rows, columns = self.query_db(file_data=file_data)
165
168
  return self.sql_to_df(rows=rows, columns=columns)
166
169
 
@@ -174,7 +177,7 @@ class SQLDownloader(Downloader, ABC):
174
177
  return f
175
178
 
176
179
  def generate_download_response(
177
- self, result: pd.DataFrame, file_data: SqlBatchFileData
180
+ self, result: "DataFrame", file_data: SqlBatchFileData
178
181
  ) -> DownloadResponse:
179
182
  id_column = file_data.additional_metadata.id_column
180
183
  table_name = file_data.additional_metadata.table_name
@@ -231,7 +234,7 @@ class SQLUploadStager(UploadStager):
231
234
  data[RECORD_ID_LABEL] = file_data.identifier
232
235
  return data
233
236
 
234
- def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
237
+ def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
235
238
  for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
236
239
  df[column] = df[column].apply(parse_date_string).apply(lambda date: date.timestamp())
237
240
  for column in filter(
@@ -259,6 +262,8 @@ class SQLUploadStager(UploadStager):
259
262
  output_filename: str,
260
263
  **kwargs: Any,
261
264
  ) -> Path:
265
+ import pandas as pd
266
+
262
267
  elements_contents = get_data(path=elements_filepath)
263
268
 
264
269
  df = pd.DataFrame(
@@ -309,6 +314,8 @@ class SQLUploader(Uploader):
309
314
  def prepare_data(
310
315
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
311
316
  ) -> list[tuple[Any, ...]]:
317
+ import pandas as pd
318
+
312
319
  output = []
313
320
  for row in data:
314
321
  parsed = []
@@ -323,7 +330,9 @@ class SQLUploader(Uploader):
323
330
  output.append(tuple(parsed))
324
331
  return output
325
332
 
326
- def _fit_to_schema(self, df: pd.DataFrame, add_missing_columns: bool = True) -> pd.DataFrame:
333
+ def _fit_to_schema(self, df: "DataFrame", add_missing_columns: bool = True) -> "DataFrame":
334
+ import pandas as pd
335
+
327
336
  table_columns = self.get_table_columns()
328
337
  columns = set(df.columns)
329
338
  schema_fields = set(table_columns)
@@ -348,7 +357,9 @@ class SQLUploader(Uploader):
348
357
  df[column] = pd.Series()
349
358
  return df
350
359
 
351
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
360
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
361
+ import numpy as np
362
+
352
363
  if self.can_delete():
353
364
  self.delete_by_record_id(file_data=file_data)
354
365
  else:
@@ -409,6 +420,8 @@ class SQLUploader(Uploader):
409
420
  logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
410
421
 
411
422
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
423
+ import pandas as pd
424
+
412
425
  df = pd.DataFrame(data)
413
426
  self.upload_dataframe(df=df, file_data=file_data)
414
427
 
@@ -4,9 +4,9 @@ from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
5
  from typing import TYPE_CHECKING, Any, Generator
6
6
 
7
- import pandas as pd
8
7
  from pydantic import Field, Secret, model_validator
9
8
 
9
+ from unstructured_ingest.utils.dep_check import requires_dependencies
10
10
  from unstructured_ingest.v2.logger import logger
11
11
  from unstructured_ingest.v2.processes.connector_registry import (
12
12
  DestinationRegistryEntry,
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
32
32
  from sqlite3 import Connection as SqliteConnection
33
33
  from sqlite3 import Cursor as SqliteCursor
34
34
 
35
+
35
36
  CONNECTOR_TYPE = "sqlite"
36
37
 
37
38
 
@@ -132,9 +133,12 @@ class SQLiteUploader(SQLUploader):
132
133
  connection_config: SQLiteConnectionConfig
133
134
  connector_type: str = CONNECTOR_TYPE
134
135
 
136
+ @requires_dependencies(["pandas"])
135
137
  def prepare_data(
136
138
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
137
139
  ) -> list[tuple[Any, ...]]:
140
+ import pandas as pd
141
+
138
142
  output = []
139
143
  for row in data:
140
144
  parsed = []
@@ -2,8 +2,6 @@ from contextlib import contextmanager
2
2
  from dataclasses import dataclass, field
3
3
  from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
- import numpy as np
6
- import pandas as pd
7
5
  from pydantic import Field, Secret
8
6
 
9
7
  from unstructured_ingest.error import DestinationConnectionError
@@ -34,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
34
32
  from unstructured_ingest.v2.utils import get_enhanced_element_id
35
33
 
36
34
  if TYPE_CHECKING:
35
+ from pandas import DataFrame
37
36
  from vastdb import connect as VastdbConnect
38
37
  from vastdb import transaction as VastdbTransaction
39
38
  from vastdb.table import Table as VastdbTable
@@ -128,7 +127,6 @@ class VastdbDownloader(SQLDownloader):
128
127
  ids = tuple([item.identifier for item in file_data.batch_items])
129
128
 
130
129
  with self.connection_config.get_table(table_name) as table:
131
-
132
130
  predicate = _[id_column].isin(ids)
133
131
 
134
132
  if self.download_config.fields:
@@ -168,7 +166,7 @@ class VastdbUploadStager(SQLUploadStager):
168
166
  data[RECORD_ID_LABEL] = file_data.identifier
169
167
  return data
170
168
 
171
- def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
169
+ def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
172
170
  df = super().conform_dataframe(df=df)
173
171
  if self.upload_stager_config.rename_columns_map:
174
172
  df.rename(columns=self.upload_stager_config.rename_columns_map, inplace=True)
@@ -193,8 +191,9 @@ class VastdbUploader(SQLUploader):
193
191
  logger.error(f"failed to validate connection: {e}", exc_info=True)
194
192
  raise DestinationConnectionError(f"failed to validate connection: {e}")
195
193
 
196
- @requires_dependencies(["pyarrow"], extras="vastdb")
197
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
194
+ @requires_dependencies(["pyarrow", "pandas"], extras="vastdb")
195
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
196
+ import numpy as np
198
197
  import pyarrow as pa
199
198
 
200
199
  if self.can_delete():
@@ -216,7 +215,6 @@ class VastdbUploader(SQLUploader):
216
215
  )
217
216
 
218
217
  for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
219
-
220
218
  with self.connection_config.get_table(self.upload_config.table_name) as table:
221
219
  pa_table = pa.Table.from_pandas(rows)
222
220
  table.insert(pa_table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.20
3
+ Version: 0.5.21
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,197 +22,348 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: python-dateutil
26
25
  Requires-Dist: click
27
- Requires-Dist: opentelemetry-sdk
26
+ Requires-Dist: dataclasses_json
28
27
  Requires-Dist: pydantic>=2.7
29
- Requires-Dist: pandas
28
+ Requires-Dist: python-dateutil
29
+ Requires-Dist: opentelemetry-sdk
30
30
  Requires-Dist: tqdm
31
- Requires-Dist: dataclasses_json
31
+ Requires-Dist: numpy
32
+ Requires-Dist: pandas
32
33
  Provides-Extra: remote
33
34
  Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
35
+ Requires-Dist: numpy; extra == "remote"
36
+ Requires-Dist: pandas; extra == "remote"
34
37
  Provides-Extra: csv
35
38
  Requires-Dist: unstructured[tsv]; extra == "csv"
39
+ Requires-Dist: numpy; extra == "csv"
40
+ Requires-Dist: pandas; extra == "csv"
36
41
  Provides-Extra: doc
37
42
  Requires-Dist: unstructured[docx]; extra == "doc"
43
+ Requires-Dist: numpy; extra == "doc"
44
+ Requires-Dist: pandas; extra == "doc"
38
45
  Provides-Extra: docx
39
46
  Requires-Dist: unstructured[docx]; extra == "docx"
47
+ Requires-Dist: numpy; extra == "docx"
48
+ Requires-Dist: pandas; extra == "docx"
40
49
  Provides-Extra: epub
41
50
  Requires-Dist: unstructured[epub]; extra == "epub"
51
+ Requires-Dist: numpy; extra == "epub"
52
+ Requires-Dist: pandas; extra == "epub"
42
53
  Provides-Extra: md
43
54
  Requires-Dist: unstructured[md]; extra == "md"
55
+ Requires-Dist: numpy; extra == "md"
56
+ Requires-Dist: pandas; extra == "md"
44
57
  Provides-Extra: msg
45
58
  Requires-Dist: unstructured[msg]; extra == "msg"
59
+ Requires-Dist: numpy; extra == "msg"
60
+ Requires-Dist: pandas; extra == "msg"
46
61
  Provides-Extra: odt
47
62
  Requires-Dist: unstructured[odt]; extra == "odt"
63
+ Requires-Dist: numpy; extra == "odt"
64
+ Requires-Dist: pandas; extra == "odt"
48
65
  Provides-Extra: org
49
66
  Requires-Dist: unstructured[org]; extra == "org"
67
+ Requires-Dist: numpy; extra == "org"
68
+ Requires-Dist: pandas; extra == "org"
50
69
  Provides-Extra: pdf
51
70
  Requires-Dist: unstructured[pdf]; extra == "pdf"
71
+ Requires-Dist: numpy; extra == "pdf"
72
+ Requires-Dist: pandas; extra == "pdf"
52
73
  Provides-Extra: ppt
53
74
  Requires-Dist: unstructured[pptx]; extra == "ppt"
75
+ Requires-Dist: numpy; extra == "ppt"
76
+ Requires-Dist: pandas; extra == "ppt"
54
77
  Provides-Extra: pptx
55
78
  Requires-Dist: unstructured[pptx]; extra == "pptx"
79
+ Requires-Dist: numpy; extra == "pptx"
80
+ Requires-Dist: pandas; extra == "pptx"
56
81
  Provides-Extra: rtf
57
82
  Requires-Dist: unstructured[rtf]; extra == "rtf"
83
+ Requires-Dist: numpy; extra == "rtf"
84
+ Requires-Dist: pandas; extra == "rtf"
58
85
  Provides-Extra: rst
59
86
  Requires-Dist: unstructured[rst]; extra == "rst"
87
+ Requires-Dist: numpy; extra == "rst"
88
+ Requires-Dist: pandas; extra == "rst"
60
89
  Provides-Extra: tsv
61
90
  Requires-Dist: unstructured[tsv]; extra == "tsv"
91
+ Requires-Dist: numpy; extra == "tsv"
92
+ Requires-Dist: pandas; extra == "tsv"
62
93
  Provides-Extra: xlsx
63
94
  Requires-Dist: unstructured[xlsx]; extra == "xlsx"
95
+ Requires-Dist: numpy; extra == "xlsx"
96
+ Requires-Dist: pandas; extra == "xlsx"
64
97
  Provides-Extra: airtable
65
98
  Requires-Dist: pyairtable; extra == "airtable"
99
+ Requires-Dist: numpy; extra == "airtable"
100
+ Requires-Dist: pandas; extra == "airtable"
66
101
  Provides-Extra: astradb
67
102
  Requires-Dist: astrapy; extra == "astradb"
103
+ Requires-Dist: numpy; extra == "astradb"
104
+ Requires-Dist: pandas; extra == "astradb"
68
105
  Provides-Extra: azure
69
- Requires-Dist: adlfs; extra == "azure"
70
106
  Requires-Dist: fsspec; extra == "azure"
107
+ Requires-Dist: adlfs; extra == "azure"
108
+ Requires-Dist: numpy; extra == "azure"
109
+ Requires-Dist: pandas; extra == "azure"
71
110
  Provides-Extra: azure-ai-search
72
111
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
112
+ Requires-Dist: numpy; extra == "azure-ai-search"
113
+ Requires-Dist: pandas; extra == "azure-ai-search"
73
114
  Provides-Extra: biomed
74
115
  Requires-Dist: bs4; extra == "biomed"
75
116
  Requires-Dist: requests; extra == "biomed"
117
+ Requires-Dist: numpy; extra == "biomed"
118
+ Requires-Dist: pandas; extra == "biomed"
76
119
  Provides-Extra: box
77
- Requires-Dist: boxfs; extra == "box"
78
120
  Requires-Dist: fsspec; extra == "box"
121
+ Requires-Dist: boxfs; extra == "box"
122
+ Requires-Dist: numpy; extra == "box"
123
+ Requires-Dist: pandas; extra == "box"
79
124
  Provides-Extra: chroma
80
125
  Requires-Dist: chromadb; extra == "chroma"
126
+ Requires-Dist: numpy; extra == "chroma"
127
+ Requires-Dist: pandas; extra == "chroma"
81
128
  Provides-Extra: clarifai
82
129
  Requires-Dist: clarifai; extra == "clarifai"
130
+ Requires-Dist: numpy; extra == "clarifai"
131
+ Requires-Dist: pandas; extra == "clarifai"
83
132
  Provides-Extra: confluence
84
- Requires-Dist: requests; extra == "confluence"
85
133
  Requires-Dist: atlassian-python-api; extra == "confluence"
134
+ Requires-Dist: requests; extra == "confluence"
135
+ Requires-Dist: numpy; extra == "confluence"
136
+ Requires-Dist: pandas; extra == "confluence"
86
137
  Provides-Extra: couchbase
87
138
  Requires-Dist: couchbase; extra == "couchbase"
139
+ Requires-Dist: numpy; extra == "couchbase"
140
+ Requires-Dist: pandas; extra == "couchbase"
88
141
  Provides-Extra: delta-table
89
142
  Requires-Dist: boto3; extra == "delta-table"
90
143
  Requires-Dist: deltalake; extra == "delta-table"
144
+ Requires-Dist: numpy; extra == "delta-table"
145
+ Requires-Dist: pandas; extra == "delta-table"
91
146
  Provides-Extra: discord
92
147
  Requires-Dist: discord.py; extra == "discord"
148
+ Requires-Dist: numpy; extra == "discord"
149
+ Requires-Dist: pandas; extra == "discord"
93
150
  Provides-Extra: dropbox
94
151
  Requires-Dist: fsspec; extra == "dropbox"
95
152
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
153
+ Requires-Dist: numpy; extra == "dropbox"
154
+ Requires-Dist: pandas; extra == "dropbox"
96
155
  Provides-Extra: duckdb
97
156
  Requires-Dist: duckdb; extra == "duckdb"
157
+ Requires-Dist: numpy; extra == "duckdb"
158
+ Requires-Dist: pandas; extra == "duckdb"
98
159
  Provides-Extra: elasticsearch
99
160
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
161
+ Requires-Dist: numpy; extra == "elasticsearch"
162
+ Requires-Dist: pandas; extra == "elasticsearch"
100
163
  Provides-Extra: gcs
101
164
  Requires-Dist: bs4; extra == "gcs"
102
- Requires-Dist: gcsfs; extra == "gcs"
103
165
  Requires-Dist: fsspec; extra == "gcs"
166
+ Requires-Dist: gcsfs; extra == "gcs"
167
+ Requires-Dist: numpy; extra == "gcs"
168
+ Requires-Dist: pandas; extra == "gcs"
104
169
  Provides-Extra: github
105
170
  Requires-Dist: pygithub>1.58.0; extra == "github"
106
171
  Requires-Dist: requests; extra == "github"
172
+ Requires-Dist: numpy; extra == "github"
173
+ Requires-Dist: pandas; extra == "github"
107
174
  Provides-Extra: gitlab
108
175
  Requires-Dist: python-gitlab; extra == "gitlab"
176
+ Requires-Dist: numpy; extra == "gitlab"
177
+ Requires-Dist: pandas; extra == "gitlab"
109
178
  Provides-Extra: google-drive
110
179
  Requires-Dist: google-api-python-client; extra == "google-drive"
180
+ Requires-Dist: numpy; extra == "google-drive"
181
+ Requires-Dist: pandas; extra == "google-drive"
111
182
  Provides-Extra: hubspot
112
- Requires-Dist: hubspot-api-client; extra == "hubspot"
113
183
  Requires-Dist: urllib3; extra == "hubspot"
184
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
185
+ Requires-Dist: numpy; extra == "hubspot"
186
+ Requires-Dist: pandas; extra == "hubspot"
114
187
  Provides-Extra: ibm-watsonx-s3
115
- Requires-Dist: httpx; extra == "ibm-watsonx-s3"
188
+ Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
116
189
  Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
190
+ Requires-Dist: httpx; extra == "ibm-watsonx-s3"
117
191
  Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
118
- Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
192
+ Requires-Dist: numpy; extra == "ibm-watsonx-s3"
193
+ Requires-Dist: pandas; extra == "ibm-watsonx-s3"
119
194
  Provides-Extra: jira
120
195
  Requires-Dist: atlassian-python-api; extra == "jira"
196
+ Requires-Dist: numpy; extra == "jira"
197
+ Requires-Dist: pandas; extra == "jira"
121
198
  Provides-Extra: kafka
122
199
  Requires-Dist: confluent-kafka; extra == "kafka"
200
+ Requires-Dist: numpy; extra == "kafka"
201
+ Requires-Dist: pandas; extra == "kafka"
123
202
  Provides-Extra: kdbai
124
203
  Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
204
+ Requires-Dist: numpy; extra == "kdbai"
205
+ Requires-Dist: pandas; extra == "kdbai"
125
206
  Provides-Extra: lancedb
126
207
  Requires-Dist: lancedb; extra == "lancedb"
208
+ Requires-Dist: numpy; extra == "lancedb"
209
+ Requires-Dist: pandas; extra == "lancedb"
127
210
  Provides-Extra: milvus
128
211
  Requires-Dist: pymilvus; extra == "milvus"
212
+ Requires-Dist: numpy; extra == "milvus"
213
+ Requires-Dist: pandas; extra == "milvus"
129
214
  Provides-Extra: mongodb
130
215
  Requires-Dist: pymongo; extra == "mongodb"
216
+ Requires-Dist: numpy; extra == "mongodb"
217
+ Requires-Dist: pandas; extra == "mongodb"
131
218
  Provides-Extra: neo4j
132
219
  Requires-Dist: networkx; extra == "neo4j"
133
- Requires-Dist: cymple; extra == "neo4j"
134
220
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
221
+ Requires-Dist: cymple; extra == "neo4j"
222
+ Requires-Dist: numpy; extra == "neo4j"
223
+ Requires-Dist: pandas; extra == "neo4j"
135
224
  Provides-Extra: notion
225
+ Requires-Dist: httpx; extra == "notion"
136
226
  Requires-Dist: htmlBuilder; extra == "notion"
137
227
  Requires-Dist: notion-client; extra == "notion"
138
- Requires-Dist: httpx; extra == "notion"
139
228
  Requires-Dist: backoff; extra == "notion"
229
+ Requires-Dist: numpy; extra == "notion"
230
+ Requires-Dist: pandas; extra == "notion"
140
231
  Provides-Extra: onedrive
141
232
  Requires-Dist: bs4; extra == "onedrive"
142
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
143
233
  Requires-Dist: msal; extra == "onedrive"
234
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
235
+ Requires-Dist: numpy; extra == "onedrive"
236
+ Requires-Dist: pandas; extra == "onedrive"
144
237
  Provides-Extra: opensearch
145
238
  Requires-Dist: opensearch-py; extra == "opensearch"
239
+ Requires-Dist: numpy; extra == "opensearch"
240
+ Requires-Dist: pandas; extra == "opensearch"
146
241
  Provides-Extra: outlook
147
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
148
242
  Requires-Dist: msal; extra == "outlook"
243
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
244
+ Requires-Dist: numpy; extra == "outlook"
245
+ Requires-Dist: pandas; extra == "outlook"
149
246
  Provides-Extra: pinecone
150
247
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
248
+ Requires-Dist: numpy; extra == "pinecone"
249
+ Requires-Dist: pandas; extra == "pinecone"
151
250
  Provides-Extra: postgres
152
251
  Requires-Dist: psycopg2-binary; extra == "postgres"
252
+ Requires-Dist: numpy; extra == "postgres"
253
+ Requires-Dist: pandas; extra == "postgres"
153
254
  Provides-Extra: qdrant
154
255
  Requires-Dist: qdrant-client; extra == "qdrant"
256
+ Requires-Dist: numpy; extra == "qdrant"
257
+ Requires-Dist: pandas; extra == "qdrant"
155
258
  Provides-Extra: reddit
156
259
  Requires-Dist: praw; extra == "reddit"
260
+ Requires-Dist: numpy; extra == "reddit"
261
+ Requires-Dist: pandas; extra == "reddit"
157
262
  Provides-Extra: redis
158
263
  Requires-Dist: redis; extra == "redis"
264
+ Requires-Dist: numpy; extra == "redis"
265
+ Requires-Dist: pandas; extra == "redis"
159
266
  Provides-Extra: s3
160
- Requires-Dist: s3fs; extra == "s3"
161
267
  Requires-Dist: fsspec; extra == "s3"
268
+ Requires-Dist: s3fs; extra == "s3"
269
+ Requires-Dist: numpy; extra == "s3"
270
+ Requires-Dist: pandas; extra == "s3"
162
271
  Provides-Extra: sharepoint
163
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
164
272
  Requires-Dist: msal; extra == "sharepoint"
273
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
274
+ Requires-Dist: numpy; extra == "sharepoint"
275
+ Requires-Dist: pandas; extra == "sharepoint"
165
276
  Provides-Extra: salesforce
166
277
  Requires-Dist: simple-salesforce; extra == "salesforce"
278
+ Requires-Dist: numpy; extra == "salesforce"
279
+ Requires-Dist: pandas; extra == "salesforce"
167
280
  Provides-Extra: sftp
168
- Requires-Dist: paramiko; extra == "sftp"
169
281
  Requires-Dist: fsspec; extra == "sftp"
282
+ Requires-Dist: paramiko; extra == "sftp"
283
+ Requires-Dist: numpy; extra == "sftp"
284
+ Requires-Dist: pandas; extra == "sftp"
170
285
  Provides-Extra: slack
171
286
  Requires-Dist: slack_sdk[optional]; extra == "slack"
287
+ Requires-Dist: numpy; extra == "slack"
288
+ Requires-Dist: pandas; extra == "slack"
172
289
  Provides-Extra: snowflake
173
- Requires-Dist: snowflake-connector-python; extra == "snowflake"
174
290
  Requires-Dist: psycopg2-binary; extra == "snowflake"
291
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
292
+ Requires-Dist: numpy; extra == "snowflake"
293
+ Requires-Dist: pandas; extra == "snowflake"
175
294
  Provides-Extra: wikipedia
176
295
  Requires-Dist: wikipedia; extra == "wikipedia"
296
+ Requires-Dist: numpy; extra == "wikipedia"
297
+ Requires-Dist: pandas; extra == "wikipedia"
177
298
  Provides-Extra: weaviate
178
299
  Requires-Dist: weaviate-client; extra == "weaviate"
300
+ Requires-Dist: numpy; extra == "weaviate"
301
+ Requires-Dist: pandas; extra == "weaviate"
179
302
  Provides-Extra: databricks-volumes
180
303
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
304
+ Requires-Dist: numpy; extra == "databricks-volumes"
305
+ Requires-Dist: pandas; extra == "databricks-volumes"
181
306
  Provides-Extra: databricks-delta-tables
182
307
  Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
308
+ Requires-Dist: numpy; extra == "databricks-delta-tables"
309
+ Requires-Dist: pandas; extra == "databricks-delta-tables"
183
310
  Provides-Extra: singlestore
184
311
  Requires-Dist: singlestoredb; extra == "singlestore"
312
+ Requires-Dist: numpy; extra == "singlestore"
313
+ Requires-Dist: pandas; extra == "singlestore"
185
314
  Provides-Extra: vectara
186
315
  Requires-Dist: httpx; extra == "vectara"
187
316
  Requires-Dist: aiofiles; extra == "vectara"
188
317
  Requires-Dist: requests; extra == "vectara"
318
+ Requires-Dist: numpy; extra == "vectara"
319
+ Requires-Dist: pandas; extra == "vectara"
189
320
  Provides-Extra: vastdb
321
+ Requires-Dist: ibis; extra == "vastdb"
190
322
  Requires-Dist: pyarrow; extra == "vastdb"
191
323
  Requires-Dist: vastdb; extra == "vastdb"
192
- Requires-Dist: ibis; extra == "vastdb"
324
+ Requires-Dist: numpy; extra == "vastdb"
325
+ Requires-Dist: pandas; extra == "vastdb"
193
326
  Provides-Extra: zendesk
194
327
  Requires-Dist: bs4; extra == "zendesk"
195
- Requires-Dist: aiofiles; extra == "zendesk"
196
328
  Requires-Dist: httpx; extra == "zendesk"
329
+ Requires-Dist: aiofiles; extra == "zendesk"
330
+ Requires-Dist: numpy; extra == "zendesk"
331
+ Requires-Dist: pandas; extra == "zendesk"
197
332
  Provides-Extra: embed-huggingface
198
333
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
334
+ Requires-Dist: numpy; extra == "embed-huggingface"
335
+ Requires-Dist: pandas; extra == "embed-huggingface"
199
336
  Provides-Extra: embed-octoai
200
- Requires-Dist: openai; extra == "embed-octoai"
201
337
  Requires-Dist: tiktoken; extra == "embed-octoai"
338
+ Requires-Dist: openai; extra == "embed-octoai"
339
+ Requires-Dist: numpy; extra == "embed-octoai"
340
+ Requires-Dist: pandas; extra == "embed-octoai"
202
341
  Provides-Extra: embed-vertexai
203
342
  Requires-Dist: vertexai; extra == "embed-vertexai"
343
+ Requires-Dist: numpy; extra == "embed-vertexai"
344
+ Requires-Dist: pandas; extra == "embed-vertexai"
204
345
  Provides-Extra: embed-voyageai
205
346
  Requires-Dist: voyageai; extra == "embed-voyageai"
347
+ Requires-Dist: numpy; extra == "embed-voyageai"
348
+ Requires-Dist: pandas; extra == "embed-voyageai"
206
349
  Provides-Extra: embed-mixedbreadai
207
350
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
351
+ Requires-Dist: numpy; extra == "embed-mixedbreadai"
352
+ Requires-Dist: pandas; extra == "embed-mixedbreadai"
208
353
  Provides-Extra: openai
209
- Requires-Dist: openai; extra == "openai"
210
354
  Requires-Dist: tiktoken; extra == "openai"
355
+ Requires-Dist: openai; extra == "openai"
356
+ Requires-Dist: numpy; extra == "openai"
357
+ Requires-Dist: pandas; extra == "openai"
211
358
  Provides-Extra: bedrock
212
359
  Requires-Dist: aioboto3; extra == "bedrock"
213
360
  Requires-Dist: boto3; extra == "bedrock"
361
+ Requires-Dist: numpy; extra == "bedrock"
362
+ Requires-Dist: pandas; extra == "bedrock"
214
363
  Provides-Extra: togetherai
215
364
  Requires-Dist: together; extra == "togetherai"
365
+ Requires-Dist: numpy; extra == "togetherai"
366
+ Requires-Dist: pandas; extra == "togetherai"
216
367
  Dynamic: author
217
368
  Dynamic: author-email
218
369
  Dynamic: classifier
@@ -5,7 +5,7 @@ test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
5
5
  test/integration/chunkers/test_chunkers.py,sha256=USkltQN_mVVCxI0FkJsrS1gnLXlVr-fvsc0tPaK2sWI,1062
6
6
  test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  test/integration/connectors/conftest.py,sha256=vYs4WDlCuieAwwErkJxCk4a1lGvr3qpeiAm-YaDznSo,1018
8
- test/integration/connectors/test_astradb.py,sha256=c9Lk0dvJVVdzHcokvsc4XMNJ4SIO1k2vGtT5py0cFVM,9753
8
+ test/integration/connectors/test_astradb.py,sha256=hQyxvnbvN1UN-oDOBkXyniAs6GLb0rstQOoLT4LcBNI,9921
9
9
  test/integration/connectors/test_azure_ai_search.py,sha256=MxFwk84vI_HT4taQTGrNpJ8ewGPqHSGrx626j8hC_Pw,9695
10
10
  test/integration/connectors/test_chroma.py,sha256=1uGHbZXkXKGb8wl3p7c9G-L1MViUe283Hw5u3dg8OgI,4532
11
11
  test/integration/connectors/test_confluence.py,sha256=W93znOusdvFXta8q0dqQ1rKhLafRVIqrfaFqk2FY-fo,3590
@@ -113,7 +113,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
113
113
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
115
115
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
116
- unstructured_ingest/__version__.py,sha256=BCszjb86jsmMjfakEG2zLAZFKHpLYTR2k5TCe7RzaBc,43
116
+ unstructured_ingest/__version__.py,sha256=b5BrQJjlBZoPiM_J1cJDbJABGvcwaDFb_Bvwb0AHN10,43
117
117
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
118
118
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
119
119
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -284,7 +284,7 @@ unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
284
284
  unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
285
285
  unstructured_ingest/embed/bedrock.py,sha256=tZumLLXafSr1zIFVjckapRoiiY-7u65GPuWmwsdhY0I,7726
286
286
  unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
287
- unstructured_ingest/embed/interfaces.py,sha256=_-CqasY6R5nnNUY-X6PS5lz8dsmGaUw5zIGRdPfx16o,4918
287
+ unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
288
288
  unstructured_ingest/embed/mixedbreadai.py,sha256=-Y0J27G9CL1t3ZTIeNjTjRviErSMAzJRf2zgDgMHUmg,4499
289
289
  unstructured_ingest/embed/octoai.py,sha256=hNLEskDEP-2qWExUgVz2Eyw3KTIFwdUE9elbJ5qp4Ao,3855
290
290
  unstructured_ingest/embed/openai.py,sha256=EindGUouvP8wolOBNbWQhAkaI6WGyPN4Hh2xyKuR6L8,3372
@@ -372,13 +372,13 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
372
372
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
373
373
  unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
374
374
  unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
375
- unstructured_ingest/utils/data_prep.py,sha256=MfID_7SPZHeZztlNTSXIzilaWvv1mdfCcLlhqpGLYNg,7557
375
+ unstructured_ingest/utils/data_prep.py,sha256=-hhGbWm1Sev57t9z20JJLW0vS6kdhArCbb_xmIlKGaY,7826
376
376
  unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
377
377
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
378
378
  unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
379
379
  unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
380
380
  unstructured_ingest/utils/string_and_date_utils.py,sha256=54tzuqmhPN0uWnPLrzAWAsDGU9s6mQE_KSVywMDwTBk,2522
381
- unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
381
+ unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
382
382
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
383
383
  unstructured_ingest/v2/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
384
384
  unstructured_ingest/v2/errors.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
@@ -435,12 +435,12 @@ unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6
435
435
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
436
436
  unstructured_ingest/v2/processes/connectors/confluence.py,sha256=gSs4-AxL0gfeWdJfP7JfCrQSQNLoJRkvHquKK9RJvpQ,12043
437
437
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
438
- unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=bfEGiepJLOS9TxK-bMkjTTjHLHUc0q7qUzIYdwkLDMs,7104
438
+ unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=Jx2EUqchJDqfPsyw4Ks-HaLSq2rIwXc1l1YFqjh_BbM,7240
439
439
  unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
440
440
  unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
441
441
  unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=QzcHNelUbnubsDtanFIgDCRzmYTuP-GjJ_g9y8fButE,19623
442
442
  unstructured_ingest/v2/processes/connectors/jira.py,sha256=-f_vIWNw6Xr8rMNdAcfCC2cmhB-QndnZk5XymHo60FU,17094
443
- unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
443
+ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=1dXfNb3qaV669-_BjCQdznmfuWLPGjmdkv2ybmkAHjQ,5099
444
444
  unstructured_ingest/v2/processes/connectors/local.py,sha256=FWPRjjUsnQjyZMChuZGuMU04AB5X0sFEOcAXhx1r9sk,7381
445
445
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
446
446
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
@@ -464,9 +464,9 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=Uss
464
464
  unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=g1qYnIrML4TjN7rmC0MGrD5JzAprb6SymBHlEdOumz0,3113
465
465
  unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=FZhjrMYBr_je6mWYp7MUUvyKR9YwGD2HiNljeT7U5ws,5044
466
466
  unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
467
- unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=IHaY1mWuidt6GDEJhB1c_orwmjeyXuRCVJ88djYDciM,2793
468
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
469
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=OsRy-rcrP4_KSustpxlEKoZ_FmJNFMyMmIfFk6WJ3UY,4559
467
+ unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=o3J81DnSwt3lmAh19jXVPAYRZLJ3VyGhaEVO2SIjksQ,2926
468
+ unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=NIo2CCiPiuTFotNC891Mbelzg01knItryYGUtOM96xg,4393
469
+ unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=RW-Cw94Hs3ZsN8Kb4ciSh_N-Qkp0cqkw_xkJbt8CDNU,4656
470
470
  unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
471
471
  unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
472
472
  unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
@@ -490,7 +490,7 @@ unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur
490
490
  unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
491
491
  unstructured_ingest/v2/processes/connectors/lancedb/cloud.py,sha256=BFy0gW2OZ_qaZJM97m-tNsFaJPi9zOKrrd2y4thcNP0,1341
492
492
  unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
493
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=oQbRZfocnRWqc9VIHgloYbEsfV0Ei_s1_-TKmRnTdYg,5714
493
+ unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=Y4waCOrtlz5Eyf3Me6rInzt_Ory0woseLe_hfSD1nDM,5926
494
494
  unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
495
495
  unstructured_ingest/v2/processes/connectors/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
496
496
  unstructured_ingest/v2/processes/connectors/notion/client.py,sha256=8_K6x1Z4bkvSer1NicQeqpX8Y275OUS65kfqTWRU09g,13120
@@ -564,13 +564,13 @@ unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ
564
564
  unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
565
565
  unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
566
566
  unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=NSEZwJDHh_9kFc31LnG14iRtYF3meK2UfUlQfYnwYEQ,2059
567
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=xbZ90rmehiCnBoqFXMz-3ZMXeYb0PzWB6iobCNSHTmQ,8955
567
+ unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=Ys-pRLiYtdvNRdDnWYwhMqteLQPekRFHrqsrr9jQVpo,9049
568
568
  unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
569
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
570
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=0hfiX_u7V38k_RfoeDmXJp8WIHZ19ilIHnrgZVSleKw,9270
571
- unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=wtVK6CHrQ4McwsPifUoa7KKaY-v0cjDZJetASSAaSIA,15415
572
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=PRjN_S7UQv0k4ZpSyclW1AJrsrugyxbR-GoOrHvBpks,5200
573
- unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=0rxrb1ByXIefB9umzMTEJbpvzdTttXHK5DjRY97-GG8,9618
569
+ unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=am2d87kDkpTTB0VbPSX3ce9o6oM9KUQu5y9T_p1kgJw,5711
570
+ unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=r2qgoEF3bUugzgSr3hMJyIm8DKmxsO53ZHXJSNxOsvE,9379
571
+ unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=G28VUR0zaMVmQtbdZG6TRpkWFHvXJqFrr7SBuyM-fME,15608
572
+ unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=6RoBUxMbeuhduvTFlBKMgEH1NKJg7doQjXF_R5cUuX0,5319
573
+ unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=wklJ8p3eMb81FTjS6ukPoILuWN0_KQBfuYGXfE0XrqY,9644
574
574
  unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
575
575
  unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
576
576
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
@@ -581,9 +581,9 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
581
581
  unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
582
582
  unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
583
583
  unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
584
- unstructured_ingest-0.5.20.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
585
- unstructured_ingest-0.5.20.dist-info/METADATA,sha256=S2Yr62sVeW0csT-QRyonnokiHFvvH0FAwQ2x02BqAeM,8697
586
- unstructured_ingest-0.5.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
587
- unstructured_ingest-0.5.20.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
588
- unstructured_ingest-0.5.20.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
589
- unstructured_ingest-0.5.20.dist-info/RECORD,,
584
+ unstructured_ingest-0.5.21.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
585
+ unstructured_ingest-0.5.21.dist-info/METADATA,sha256=c1bUHvgG6X9QOiAD669sVHAFkGfI2tBTRBM-eRJBLiU,14999
586
+ unstructured_ingest-0.5.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
587
+ unstructured_ingest-0.5.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
588
+ unstructured_ingest-0.5.21.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
589
+ unstructured_ingest-0.5.21.dist-info/RECORD,,