unstructured-ingest 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (29) hide show
  1. test/integration/connectors/test_astradb.py +8 -2
  2. test/unit/v2/connectors/ibm_watsonx/__init__.py +0 -0
  3. test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +459 -0
  4. test/unit/v2/connectors/sql/test_sql.py +79 -1
  5. unstructured_ingest/__version__.py +1 -1
  6. unstructured_ingest/embed/interfaces.py +7 -3
  7. unstructured_ingest/utils/data_prep.py +17 -5
  8. unstructured_ingest/utils/table.py +11 -4
  9. unstructured_ingest/v2/processes/connectors/__init__.py +2 -0
  10. unstructured_ingest/v2/processes/connectors/delta_table.py +8 -3
  11. unstructured_ingest/v2/processes/connectors/duckdb/base.py +4 -3
  12. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +5 -2
  13. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +5 -2
  14. unstructured_ingest/v2/processes/connectors/ibm_watsonx/__init__.py +10 -0
  15. unstructured_ingest/v2/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +301 -0
  16. unstructured_ingest/v2/processes/connectors/kdbai.py +6 -3
  17. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +10 -2
  18. unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +5 -3
  19. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +5 -1
  20. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +7 -3
  21. unstructured_ingest/v2/processes/connectors/sql/sql.py +26 -12
  22. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +5 -1
  23. unstructured_ingest/v2/processes/connectors/sql/vastdb.py +5 -7
  24. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/METADATA +174 -18
  25. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/RECORD +29 -25
  26. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/LICENSE.md +0 -0
  27. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/WHEEL +0 -0
  28. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/entry_points.txt +0 -0
  29. {unstructured_ingest-0.5.19.dist-info → unstructured_ingest-0.5.21.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,6 @@ from contextlib import contextmanager
3
3
  from dataclasses import dataclass, field
4
4
  from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
- import numpy as np
7
- import pandas as pd
8
6
  from pydantic import Field, Secret
9
7
 
10
8
  from unstructured_ingest.utils.data_prep import split_dataframe
@@ -32,6 +30,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
32
30
  )
33
31
 
34
32
  if TYPE_CHECKING:
33
+ from pandas import DataFrame
35
34
  from snowflake.connector import SnowflakeConnection
36
35
  from snowflake.connector.cursor import SnowflakeCursor
37
36
 
@@ -174,9 +173,12 @@ class SnowflakeUploader(SQLUploader):
174
173
  connector_type: str = CONNECTOR_TYPE
175
174
  values_delimiter: str = "?"
176
175
 
176
+ @requires_dependencies(["pandas"], extras="snowflake")
177
177
  def prepare_data(
178
178
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
179
179
  ) -> list[tuple[Any, ...]]:
180
+ import pandas as pd
181
+
180
182
  output = []
181
183
  for row in data:
182
184
  parsed = []
@@ -210,7 +212,9 @@ class SnowflakeUploader(SQLUploader):
210
212
  ]
211
213
  )
212
214
 
213
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
215
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
216
+ import numpy as np
217
+
214
218
  if self.can_delete():
215
219
  self.delete_by_record_id(file_data=file_data)
216
220
  else:
@@ -6,10 +6,8 @@ from dataclasses import dataclass, field
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
  from time import time
9
- from typing import Any, Generator, Union
9
+ from typing import TYPE_CHECKING, Any, Generator, Union
10
10
 
11
- import numpy as np
12
- import pandas as pd
13
11
  from dateutil import parser
14
12
  from pydantic import BaseModel, Field, Secret
15
13
 
@@ -38,6 +36,9 @@ from unstructured_ingest.v2.interfaces import (
38
36
  from unstructured_ingest.v2.logger import logger
39
37
  from unstructured_ingest.v2.utils import get_enhanced_element_id
40
38
 
39
+ if TYPE_CHECKING:
40
+ from pandas import DataFrame
41
+
41
42
  _DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
42
43
 
43
44
 
@@ -154,13 +155,15 @@ class SQLDownloader(Downloader, ABC):
154
155
  def query_db(self, file_data: SqlBatchFileData) -> tuple[list[tuple], list[str]]:
155
156
  pass
156
157
 
157
- def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list[pd.DataFrame]:
158
+ def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list["DataFrame"]:
159
+ import pandas as pd
160
+
158
161
  data = [dict(zip(columns, row)) for row in rows]
159
162
  df = pd.DataFrame(data)
160
163
  dfs = [pd.DataFrame([row.values], columns=df.columns) for index, row in df.iterrows()]
161
164
  return dfs
162
165
 
163
- def get_data(self, file_data: SqlBatchFileData) -> list[pd.DataFrame]:
166
+ def get_data(self, file_data: SqlBatchFileData) -> list["DataFrame"]:
164
167
  rows, columns = self.query_db(file_data=file_data)
165
168
  return self.sql_to_df(rows=rows, columns=columns)
166
169
 
@@ -174,7 +177,7 @@ class SQLDownloader(Downloader, ABC):
174
177
  return f
175
178
 
176
179
  def generate_download_response(
177
- self, result: pd.DataFrame, file_data: SqlBatchFileData
180
+ self, result: "DataFrame", file_data: SqlBatchFileData
178
181
  ) -> DownloadResponse:
179
182
  id_column = file_data.additional_metadata.id_column
180
183
  table_name = file_data.additional_metadata.table_name
@@ -231,7 +234,7 @@ class SQLUploadStager(UploadStager):
231
234
  data[RECORD_ID_LABEL] = file_data.identifier
232
235
  return data
233
236
 
234
- def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
237
+ def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
235
238
  for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
236
239
  df[column] = df[column].apply(parse_date_string).apply(lambda date: date.timestamp())
237
240
  for column in filter(
@@ -259,6 +262,8 @@ class SQLUploadStager(UploadStager):
259
262
  output_filename: str,
260
263
  **kwargs: Any,
261
264
  ) -> Path:
265
+ import pandas as pd
266
+
262
267
  elements_contents = get_data(path=elements_filepath)
263
268
 
264
269
  df = pd.DataFrame(
@@ -309,6 +314,8 @@ class SQLUploader(Uploader):
309
314
  def prepare_data(
310
315
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
311
316
  ) -> list[tuple[Any, ...]]:
317
+ import pandas as pd
318
+
312
319
  output = []
313
320
  for row in data:
314
321
  parsed = []
@@ -323,7 +330,9 @@ class SQLUploader(Uploader):
323
330
  output.append(tuple(parsed))
324
331
  return output
325
332
 
326
- def _fit_to_schema(self, df: pd.DataFrame) -> pd.DataFrame:
333
+ def _fit_to_schema(self, df: "DataFrame", add_missing_columns: bool = True) -> "DataFrame":
334
+ import pandas as pd
335
+
327
336
  table_columns = self.get_table_columns()
328
337
  columns = set(df.columns)
329
338
  schema_fields = set(table_columns)
@@ -335,7 +344,7 @@ class SQLUploader(Uploader):
335
344
  "Following columns will be dropped to match the table's schema: "
336
345
  f"{', '.join(columns_to_drop)}"
337
346
  )
338
- if missing_columns:
347
+ if missing_columns and add_missing_columns:
339
348
  logger.info(
340
349
  "Following null filled columns will be added to match the table's schema:"
341
350
  f" {', '.join(missing_columns)} "
@@ -343,11 +352,14 @@ class SQLUploader(Uploader):
343
352
 
344
353
  df = df.drop(columns=columns_to_drop)
345
354
 
346
- for column in missing_columns:
347
- df[column] = pd.Series()
355
+ if add_missing_columns:
356
+ for column in missing_columns:
357
+ df[column] = pd.Series()
348
358
  return df
349
359
 
350
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
360
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
361
+ import numpy as np
362
+
351
363
  if self.can_delete():
352
364
  self.delete_by_record_id(file_data=file_data)
353
365
  else:
@@ -408,6 +420,8 @@ class SQLUploader(Uploader):
408
420
  logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
409
421
 
410
422
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
423
+ import pandas as pd
424
+
411
425
  df = pd.DataFrame(data)
412
426
  self.upload_dataframe(df=df, file_data=file_data)
413
427
 
@@ -4,9 +4,9 @@ from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
5
  from typing import TYPE_CHECKING, Any, Generator
6
6
 
7
- import pandas as pd
8
7
  from pydantic import Field, Secret, model_validator
9
8
 
9
+ from unstructured_ingest.utils.dep_check import requires_dependencies
10
10
  from unstructured_ingest.v2.logger import logger
11
11
  from unstructured_ingest.v2.processes.connector_registry import (
12
12
  DestinationRegistryEntry,
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
32
32
  from sqlite3 import Connection as SqliteConnection
33
33
  from sqlite3 import Cursor as SqliteCursor
34
34
 
35
+
35
36
  CONNECTOR_TYPE = "sqlite"
36
37
 
37
38
 
@@ -132,9 +133,12 @@ class SQLiteUploader(SQLUploader):
132
133
  connection_config: SQLiteConnectionConfig
133
134
  connector_type: str = CONNECTOR_TYPE
134
135
 
136
+ @requires_dependencies(["pandas"])
135
137
  def prepare_data(
136
138
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
137
139
  ) -> list[tuple[Any, ...]]:
140
+ import pandas as pd
141
+
138
142
  output = []
139
143
  for row in data:
140
144
  parsed = []
@@ -2,8 +2,6 @@ from contextlib import contextmanager
2
2
  from dataclasses import dataclass, field
3
3
  from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
- import numpy as np
6
- import pandas as pd
7
5
  from pydantic import Field, Secret
8
6
 
9
7
  from unstructured_ingest.error import DestinationConnectionError
@@ -34,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
34
32
  from unstructured_ingest.v2.utils import get_enhanced_element_id
35
33
 
36
34
  if TYPE_CHECKING:
35
+ from pandas import DataFrame
37
36
  from vastdb import connect as VastdbConnect
38
37
  from vastdb import transaction as VastdbTransaction
39
38
  from vastdb.table import Table as VastdbTable
@@ -128,7 +127,6 @@ class VastdbDownloader(SQLDownloader):
128
127
  ids = tuple([item.identifier for item in file_data.batch_items])
129
128
 
130
129
  with self.connection_config.get_table(table_name) as table:
131
-
132
130
  predicate = _[id_column].isin(ids)
133
131
 
134
132
  if self.download_config.fields:
@@ -168,7 +166,7 @@ class VastdbUploadStager(SQLUploadStager):
168
166
  data[RECORD_ID_LABEL] = file_data.identifier
169
167
  return data
170
168
 
171
- def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
169
+ def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
172
170
  df = super().conform_dataframe(df=df)
173
171
  if self.upload_stager_config.rename_columns_map:
174
172
  df.rename(columns=self.upload_stager_config.rename_columns_map, inplace=True)
@@ -193,8 +191,9 @@ class VastdbUploader(SQLUploader):
193
191
  logger.error(f"failed to validate connection: {e}", exc_info=True)
194
192
  raise DestinationConnectionError(f"failed to validate connection: {e}")
195
193
 
196
- @requires_dependencies(["pyarrow"], extras="vastdb")
197
- def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
194
+ @requires_dependencies(["pyarrow", "pandas"], extras="vastdb")
195
+ def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
196
+ import numpy as np
198
197
  import pyarrow as pa
199
198
 
200
199
  if self.can_delete():
@@ -216,7 +215,6 @@ class VastdbUploader(SQLUploader):
216
215
  )
217
216
 
218
217
  for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
219
-
220
218
  with self.connection_config.get_table(self.upload_config.table_name) as table:
221
219
  pa_table = pa.Table.from_pandas(rows)
222
220
  table.insert(pa_table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.19
3
+ Version: 0.5.21
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,192 +22,348 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: python-dateutil
26
- Requires-Dist: dataclasses_json
27
- Requires-Dist: opentelemetry-sdk
28
- Requires-Dist: pandas
29
25
  Requires-Dist: click
26
+ Requires-Dist: dataclasses_json
30
27
  Requires-Dist: pydantic>=2.7
28
+ Requires-Dist: python-dateutil
29
+ Requires-Dist: opentelemetry-sdk
31
30
  Requires-Dist: tqdm
31
+ Requires-Dist: numpy
32
+ Requires-Dist: pandas
32
33
  Provides-Extra: remote
33
34
  Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
35
+ Requires-Dist: numpy; extra == "remote"
36
+ Requires-Dist: pandas; extra == "remote"
34
37
  Provides-Extra: csv
35
38
  Requires-Dist: unstructured[tsv]; extra == "csv"
39
+ Requires-Dist: numpy; extra == "csv"
40
+ Requires-Dist: pandas; extra == "csv"
36
41
  Provides-Extra: doc
37
42
  Requires-Dist: unstructured[docx]; extra == "doc"
43
+ Requires-Dist: numpy; extra == "doc"
44
+ Requires-Dist: pandas; extra == "doc"
38
45
  Provides-Extra: docx
39
46
  Requires-Dist: unstructured[docx]; extra == "docx"
47
+ Requires-Dist: numpy; extra == "docx"
48
+ Requires-Dist: pandas; extra == "docx"
40
49
  Provides-Extra: epub
41
50
  Requires-Dist: unstructured[epub]; extra == "epub"
51
+ Requires-Dist: numpy; extra == "epub"
52
+ Requires-Dist: pandas; extra == "epub"
42
53
  Provides-Extra: md
43
54
  Requires-Dist: unstructured[md]; extra == "md"
55
+ Requires-Dist: numpy; extra == "md"
56
+ Requires-Dist: pandas; extra == "md"
44
57
  Provides-Extra: msg
45
58
  Requires-Dist: unstructured[msg]; extra == "msg"
59
+ Requires-Dist: numpy; extra == "msg"
60
+ Requires-Dist: pandas; extra == "msg"
46
61
  Provides-Extra: odt
47
62
  Requires-Dist: unstructured[odt]; extra == "odt"
63
+ Requires-Dist: numpy; extra == "odt"
64
+ Requires-Dist: pandas; extra == "odt"
48
65
  Provides-Extra: org
49
66
  Requires-Dist: unstructured[org]; extra == "org"
67
+ Requires-Dist: numpy; extra == "org"
68
+ Requires-Dist: pandas; extra == "org"
50
69
  Provides-Extra: pdf
51
70
  Requires-Dist: unstructured[pdf]; extra == "pdf"
71
+ Requires-Dist: numpy; extra == "pdf"
72
+ Requires-Dist: pandas; extra == "pdf"
52
73
  Provides-Extra: ppt
53
74
  Requires-Dist: unstructured[pptx]; extra == "ppt"
75
+ Requires-Dist: numpy; extra == "ppt"
76
+ Requires-Dist: pandas; extra == "ppt"
54
77
  Provides-Extra: pptx
55
78
  Requires-Dist: unstructured[pptx]; extra == "pptx"
79
+ Requires-Dist: numpy; extra == "pptx"
80
+ Requires-Dist: pandas; extra == "pptx"
56
81
  Provides-Extra: rtf
57
82
  Requires-Dist: unstructured[rtf]; extra == "rtf"
83
+ Requires-Dist: numpy; extra == "rtf"
84
+ Requires-Dist: pandas; extra == "rtf"
58
85
  Provides-Extra: rst
59
86
  Requires-Dist: unstructured[rst]; extra == "rst"
87
+ Requires-Dist: numpy; extra == "rst"
88
+ Requires-Dist: pandas; extra == "rst"
60
89
  Provides-Extra: tsv
61
90
  Requires-Dist: unstructured[tsv]; extra == "tsv"
91
+ Requires-Dist: numpy; extra == "tsv"
92
+ Requires-Dist: pandas; extra == "tsv"
62
93
  Provides-Extra: xlsx
63
94
  Requires-Dist: unstructured[xlsx]; extra == "xlsx"
95
+ Requires-Dist: numpy; extra == "xlsx"
96
+ Requires-Dist: pandas; extra == "xlsx"
64
97
  Provides-Extra: airtable
65
98
  Requires-Dist: pyairtable; extra == "airtable"
99
+ Requires-Dist: numpy; extra == "airtable"
100
+ Requires-Dist: pandas; extra == "airtable"
66
101
  Provides-Extra: astradb
67
102
  Requires-Dist: astrapy; extra == "astradb"
103
+ Requires-Dist: numpy; extra == "astradb"
104
+ Requires-Dist: pandas; extra == "astradb"
68
105
  Provides-Extra: azure
69
106
  Requires-Dist: fsspec; extra == "azure"
70
107
  Requires-Dist: adlfs; extra == "azure"
108
+ Requires-Dist: numpy; extra == "azure"
109
+ Requires-Dist: pandas; extra == "azure"
71
110
  Provides-Extra: azure-ai-search
72
111
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
112
+ Requires-Dist: numpy; extra == "azure-ai-search"
113
+ Requires-Dist: pandas; extra == "azure-ai-search"
73
114
  Provides-Extra: biomed
74
- Requires-Dist: requests; extra == "biomed"
75
115
  Requires-Dist: bs4; extra == "biomed"
116
+ Requires-Dist: requests; extra == "biomed"
117
+ Requires-Dist: numpy; extra == "biomed"
118
+ Requires-Dist: pandas; extra == "biomed"
76
119
  Provides-Extra: box
77
120
  Requires-Dist: fsspec; extra == "box"
78
121
  Requires-Dist: boxfs; extra == "box"
122
+ Requires-Dist: numpy; extra == "box"
123
+ Requires-Dist: pandas; extra == "box"
79
124
  Provides-Extra: chroma
80
125
  Requires-Dist: chromadb; extra == "chroma"
126
+ Requires-Dist: numpy; extra == "chroma"
127
+ Requires-Dist: pandas; extra == "chroma"
81
128
  Provides-Extra: clarifai
82
129
  Requires-Dist: clarifai; extra == "clarifai"
130
+ Requires-Dist: numpy; extra == "clarifai"
131
+ Requires-Dist: pandas; extra == "clarifai"
83
132
  Provides-Extra: confluence
84
- Requires-Dist: requests; extra == "confluence"
85
133
  Requires-Dist: atlassian-python-api; extra == "confluence"
134
+ Requires-Dist: requests; extra == "confluence"
135
+ Requires-Dist: numpy; extra == "confluence"
136
+ Requires-Dist: pandas; extra == "confluence"
86
137
  Provides-Extra: couchbase
87
138
  Requires-Dist: couchbase; extra == "couchbase"
139
+ Requires-Dist: numpy; extra == "couchbase"
140
+ Requires-Dist: pandas; extra == "couchbase"
88
141
  Provides-Extra: delta-table
89
- Requires-Dist: deltalake; extra == "delta-table"
90
142
  Requires-Dist: boto3; extra == "delta-table"
143
+ Requires-Dist: deltalake; extra == "delta-table"
144
+ Requires-Dist: numpy; extra == "delta-table"
145
+ Requires-Dist: pandas; extra == "delta-table"
91
146
  Provides-Extra: discord
92
147
  Requires-Dist: discord.py; extra == "discord"
148
+ Requires-Dist: numpy; extra == "discord"
149
+ Requires-Dist: pandas; extra == "discord"
93
150
  Provides-Extra: dropbox
94
151
  Requires-Dist: fsspec; extra == "dropbox"
95
152
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
153
+ Requires-Dist: numpy; extra == "dropbox"
154
+ Requires-Dist: pandas; extra == "dropbox"
96
155
  Provides-Extra: duckdb
97
156
  Requires-Dist: duckdb; extra == "duckdb"
157
+ Requires-Dist: numpy; extra == "duckdb"
158
+ Requires-Dist: pandas; extra == "duckdb"
98
159
  Provides-Extra: elasticsearch
99
160
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
161
+ Requires-Dist: numpy; extra == "elasticsearch"
162
+ Requires-Dist: pandas; extra == "elasticsearch"
100
163
  Provides-Extra: gcs
101
- Requires-Dist: gcsfs; extra == "gcs"
102
164
  Requires-Dist: bs4; extra == "gcs"
103
165
  Requires-Dist: fsspec; extra == "gcs"
166
+ Requires-Dist: gcsfs; extra == "gcs"
167
+ Requires-Dist: numpy; extra == "gcs"
168
+ Requires-Dist: pandas; extra == "gcs"
104
169
  Provides-Extra: github
105
- Requires-Dist: requests; extra == "github"
106
170
  Requires-Dist: pygithub>1.58.0; extra == "github"
171
+ Requires-Dist: requests; extra == "github"
172
+ Requires-Dist: numpy; extra == "github"
173
+ Requires-Dist: pandas; extra == "github"
107
174
  Provides-Extra: gitlab
108
175
  Requires-Dist: python-gitlab; extra == "gitlab"
176
+ Requires-Dist: numpy; extra == "gitlab"
177
+ Requires-Dist: pandas; extra == "gitlab"
109
178
  Provides-Extra: google-drive
110
179
  Requires-Dist: google-api-python-client; extra == "google-drive"
180
+ Requires-Dist: numpy; extra == "google-drive"
181
+ Requires-Dist: pandas; extra == "google-drive"
111
182
  Provides-Extra: hubspot
112
183
  Requires-Dist: urllib3; extra == "hubspot"
113
184
  Requires-Dist: hubspot-api-client; extra == "hubspot"
185
+ Requires-Dist: numpy; extra == "hubspot"
186
+ Requires-Dist: pandas; extra == "hubspot"
187
+ Provides-Extra: ibm-watsonx-s3
188
+ Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
189
+ Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
190
+ Requires-Dist: httpx; extra == "ibm-watsonx-s3"
191
+ Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
192
+ Requires-Dist: numpy; extra == "ibm-watsonx-s3"
193
+ Requires-Dist: pandas; extra == "ibm-watsonx-s3"
114
194
  Provides-Extra: jira
115
195
  Requires-Dist: atlassian-python-api; extra == "jira"
196
+ Requires-Dist: numpy; extra == "jira"
197
+ Requires-Dist: pandas; extra == "jira"
116
198
  Provides-Extra: kafka
117
199
  Requires-Dist: confluent-kafka; extra == "kafka"
200
+ Requires-Dist: numpy; extra == "kafka"
201
+ Requires-Dist: pandas; extra == "kafka"
118
202
  Provides-Extra: kdbai
119
203
  Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
204
+ Requires-Dist: numpy; extra == "kdbai"
205
+ Requires-Dist: pandas; extra == "kdbai"
120
206
  Provides-Extra: lancedb
121
207
  Requires-Dist: lancedb; extra == "lancedb"
208
+ Requires-Dist: numpy; extra == "lancedb"
209
+ Requires-Dist: pandas; extra == "lancedb"
122
210
  Provides-Extra: milvus
123
211
  Requires-Dist: pymilvus; extra == "milvus"
212
+ Requires-Dist: numpy; extra == "milvus"
213
+ Requires-Dist: pandas; extra == "milvus"
124
214
  Provides-Extra: mongodb
125
215
  Requires-Dist: pymongo; extra == "mongodb"
216
+ Requires-Dist: numpy; extra == "mongodb"
217
+ Requires-Dist: pandas; extra == "mongodb"
126
218
  Provides-Extra: neo4j
127
219
  Requires-Dist: networkx; extra == "neo4j"
128
220
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
129
221
  Requires-Dist: cymple; extra == "neo4j"
222
+ Requires-Dist: numpy; extra == "neo4j"
223
+ Requires-Dist: pandas; extra == "neo4j"
130
224
  Provides-Extra: notion
131
- Requires-Dist: backoff; extra == "notion"
132
225
  Requires-Dist: httpx; extra == "notion"
133
- Requires-Dist: notion-client; extra == "notion"
134
226
  Requires-Dist: htmlBuilder; extra == "notion"
227
+ Requires-Dist: notion-client; extra == "notion"
228
+ Requires-Dist: backoff; extra == "notion"
229
+ Requires-Dist: numpy; extra == "notion"
230
+ Requires-Dist: pandas; extra == "notion"
135
231
  Provides-Extra: onedrive
136
232
  Requires-Dist: bs4; extra == "onedrive"
137
233
  Requires-Dist: msal; extra == "onedrive"
138
234
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
235
+ Requires-Dist: numpy; extra == "onedrive"
236
+ Requires-Dist: pandas; extra == "onedrive"
139
237
  Provides-Extra: opensearch
140
238
  Requires-Dist: opensearch-py; extra == "opensearch"
239
+ Requires-Dist: numpy; extra == "opensearch"
240
+ Requires-Dist: pandas; extra == "opensearch"
141
241
  Provides-Extra: outlook
142
242
  Requires-Dist: msal; extra == "outlook"
143
243
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
244
+ Requires-Dist: numpy; extra == "outlook"
245
+ Requires-Dist: pandas; extra == "outlook"
144
246
  Provides-Extra: pinecone
145
247
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
248
+ Requires-Dist: numpy; extra == "pinecone"
249
+ Requires-Dist: pandas; extra == "pinecone"
146
250
  Provides-Extra: postgres
147
251
  Requires-Dist: psycopg2-binary; extra == "postgres"
252
+ Requires-Dist: numpy; extra == "postgres"
253
+ Requires-Dist: pandas; extra == "postgres"
148
254
  Provides-Extra: qdrant
149
255
  Requires-Dist: qdrant-client; extra == "qdrant"
256
+ Requires-Dist: numpy; extra == "qdrant"
257
+ Requires-Dist: pandas; extra == "qdrant"
150
258
  Provides-Extra: reddit
151
259
  Requires-Dist: praw; extra == "reddit"
260
+ Requires-Dist: numpy; extra == "reddit"
261
+ Requires-Dist: pandas; extra == "reddit"
152
262
  Provides-Extra: redis
153
263
  Requires-Dist: redis; extra == "redis"
264
+ Requires-Dist: numpy; extra == "redis"
265
+ Requires-Dist: pandas; extra == "redis"
154
266
  Provides-Extra: s3
155
- Requires-Dist: s3fs; extra == "s3"
156
267
  Requires-Dist: fsspec; extra == "s3"
268
+ Requires-Dist: s3fs; extra == "s3"
269
+ Requires-Dist: numpy; extra == "s3"
270
+ Requires-Dist: pandas; extra == "s3"
157
271
  Provides-Extra: sharepoint
158
272
  Requires-Dist: msal; extra == "sharepoint"
159
273
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
274
+ Requires-Dist: numpy; extra == "sharepoint"
275
+ Requires-Dist: pandas; extra == "sharepoint"
160
276
  Provides-Extra: salesforce
161
277
  Requires-Dist: simple-salesforce; extra == "salesforce"
278
+ Requires-Dist: numpy; extra == "salesforce"
279
+ Requires-Dist: pandas; extra == "salesforce"
162
280
  Provides-Extra: sftp
163
281
  Requires-Dist: fsspec; extra == "sftp"
164
282
  Requires-Dist: paramiko; extra == "sftp"
283
+ Requires-Dist: numpy; extra == "sftp"
284
+ Requires-Dist: pandas; extra == "sftp"
165
285
  Provides-Extra: slack
166
286
  Requires-Dist: slack_sdk[optional]; extra == "slack"
287
+ Requires-Dist: numpy; extra == "slack"
288
+ Requires-Dist: pandas; extra == "slack"
167
289
  Provides-Extra: snowflake
168
290
  Requires-Dist: psycopg2-binary; extra == "snowflake"
169
291
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
292
+ Requires-Dist: numpy; extra == "snowflake"
293
+ Requires-Dist: pandas; extra == "snowflake"
170
294
  Provides-Extra: wikipedia
171
295
  Requires-Dist: wikipedia; extra == "wikipedia"
296
+ Requires-Dist: numpy; extra == "wikipedia"
297
+ Requires-Dist: pandas; extra == "wikipedia"
172
298
  Provides-Extra: weaviate
173
299
  Requires-Dist: weaviate-client; extra == "weaviate"
300
+ Requires-Dist: numpy; extra == "weaviate"
301
+ Requires-Dist: pandas; extra == "weaviate"
174
302
  Provides-Extra: databricks-volumes
175
303
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
304
+ Requires-Dist: numpy; extra == "databricks-volumes"
305
+ Requires-Dist: pandas; extra == "databricks-volumes"
176
306
  Provides-Extra: databricks-delta-tables
177
307
  Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
308
+ Requires-Dist: numpy; extra == "databricks-delta-tables"
309
+ Requires-Dist: pandas; extra == "databricks-delta-tables"
178
310
  Provides-Extra: singlestore
179
311
  Requires-Dist: singlestoredb; extra == "singlestore"
312
+ Requires-Dist: numpy; extra == "singlestore"
313
+ Requires-Dist: pandas; extra == "singlestore"
180
314
  Provides-Extra: vectara
181
- Requires-Dist: requests; extra == "vectara"
182
315
  Requires-Dist: httpx; extra == "vectara"
183
316
  Requires-Dist: aiofiles; extra == "vectara"
317
+ Requires-Dist: requests; extra == "vectara"
318
+ Requires-Dist: numpy; extra == "vectara"
319
+ Requires-Dist: pandas; extra == "vectara"
184
320
  Provides-Extra: vastdb
321
+ Requires-Dist: ibis; extra == "vastdb"
185
322
  Requires-Dist: pyarrow; extra == "vastdb"
186
323
  Requires-Dist: vastdb; extra == "vastdb"
187
- Requires-Dist: ibis; extra == "vastdb"
324
+ Requires-Dist: numpy; extra == "vastdb"
325
+ Requires-Dist: pandas; extra == "vastdb"
188
326
  Provides-Extra: zendesk
189
327
  Requires-Dist: bs4; extra == "zendesk"
190
328
  Requires-Dist: httpx; extra == "zendesk"
191
329
  Requires-Dist: aiofiles; extra == "zendesk"
330
+ Requires-Dist: numpy; extra == "zendesk"
331
+ Requires-Dist: pandas; extra == "zendesk"
192
332
  Provides-Extra: embed-huggingface
193
333
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
334
+ Requires-Dist: numpy; extra == "embed-huggingface"
335
+ Requires-Dist: pandas; extra == "embed-huggingface"
194
336
  Provides-Extra: embed-octoai
195
- Requires-Dist: openai; extra == "embed-octoai"
196
337
  Requires-Dist: tiktoken; extra == "embed-octoai"
338
+ Requires-Dist: openai; extra == "embed-octoai"
339
+ Requires-Dist: numpy; extra == "embed-octoai"
340
+ Requires-Dist: pandas; extra == "embed-octoai"
197
341
  Provides-Extra: embed-vertexai
198
342
  Requires-Dist: vertexai; extra == "embed-vertexai"
343
+ Requires-Dist: numpy; extra == "embed-vertexai"
344
+ Requires-Dist: pandas; extra == "embed-vertexai"
199
345
  Provides-Extra: embed-voyageai
200
346
  Requires-Dist: voyageai; extra == "embed-voyageai"
347
+ Requires-Dist: numpy; extra == "embed-voyageai"
348
+ Requires-Dist: pandas; extra == "embed-voyageai"
201
349
  Provides-Extra: embed-mixedbreadai
202
350
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
351
+ Requires-Dist: numpy; extra == "embed-mixedbreadai"
352
+ Requires-Dist: pandas; extra == "embed-mixedbreadai"
203
353
  Provides-Extra: openai
204
- Requires-Dist: openai; extra == "openai"
205
354
  Requires-Dist: tiktoken; extra == "openai"
355
+ Requires-Dist: openai; extra == "openai"
356
+ Requires-Dist: numpy; extra == "openai"
357
+ Requires-Dist: pandas; extra == "openai"
206
358
  Provides-Extra: bedrock
207
- Requires-Dist: boto3; extra == "bedrock"
208
359
  Requires-Dist: aioboto3; extra == "bedrock"
360
+ Requires-Dist: boto3; extra == "bedrock"
361
+ Requires-Dist: numpy; extra == "bedrock"
362
+ Requires-Dist: pandas; extra == "bedrock"
209
363
  Provides-Extra: togetherai
210
364
  Requires-Dist: together; extra == "togetherai"
365
+ Requires-Dist: numpy; extra == "togetherai"
366
+ Requires-Dist: pandas; extra == "togetherai"
211
367
  Dynamic: author
212
368
  Dynamic: author-email
213
369
  Dynamic: classifier