unstructured-ingest 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (39) hide show
  1. test/integration/connectors/conftest.py +13 -0
  2. test/integration/connectors/databricks_tests/test_volumes_native.py +8 -4
  3. test/integration/connectors/sql/test_postgres.py +6 -10
  4. test/integration/connectors/sql/test_singlestore.py +156 -0
  5. test/integration/connectors/sql/test_snowflake.py +205 -0
  6. test/integration/connectors/sql/test_sqlite.py +6 -10
  7. test/integration/connectors/test_delta_table.py +138 -0
  8. test/integration/connectors/test_s3.py +1 -1
  9. test/integration/connectors/utils/docker.py +78 -0
  10. test/integration/connectors/utils/docker_compose.py +23 -8
  11. test/integration/connectors/utils/validation.py +93 -2
  12. unstructured_ingest/__version__.py +1 -1
  13. unstructured_ingest/v2/cli/utils/click.py +32 -1
  14. unstructured_ingest/v2/cli/utils/model_conversion.py +10 -3
  15. unstructured_ingest/v2/interfaces/file_data.py +1 -0
  16. unstructured_ingest/v2/interfaces/indexer.py +4 -1
  17. unstructured_ingest/v2/pipeline/pipeline.py +10 -2
  18. unstructured_ingest/v2/pipeline/steps/index.py +18 -1
  19. unstructured_ingest/v2/processes/connectors/__init__.py +13 -6
  20. unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
  21. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +3 -1
  22. unstructured_ingest/v2/processes/connectors/delta_table.py +185 -0
  23. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +1 -0
  24. unstructured_ingest/v2/processes/connectors/slack.py +248 -0
  25. unstructured_ingest/v2/processes/connectors/sql/__init__.py +15 -2
  26. unstructured_ingest/v2/processes/connectors/sql/postgres.py +33 -56
  27. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
  28. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +162 -0
  29. unstructured_ingest/v2/processes/connectors/sql/sql.py +51 -12
  30. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +31 -32
  31. unstructured_ingest/v2/unstructured_api.py +1 -1
  32. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/METADATA +19 -17
  33. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/RECORD +37 -31
  34. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -250
  35. unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
  36. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/LICENSE.md +0 -0
  37. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/WHEEL +0 -0
  38. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt +0 -0
  39. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,162 @@
1
+ from contextlib import contextmanager
2
+ from dataclasses import dataclass, field
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING, Generator, Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import Field, Secret
9
+
10
+ from unstructured_ingest.utils.dep_check import requires_dependencies
11
+ from unstructured_ingest.v2.processes.connector_registry import (
12
+ DestinationRegistryEntry,
13
+ SourceRegistryEntry,
14
+ )
15
+ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
16
+ PostgresDownloader,
17
+ PostgresDownloaderConfig,
18
+ PostgresIndexer,
19
+ PostgresIndexerConfig,
20
+ PostgresUploader,
21
+ PostgresUploaderConfig,
22
+ PostgresUploadStager,
23
+ PostgresUploadStagerConfig,
24
+ )
25
+ from unstructured_ingest.v2.processes.connectors.sql.sql import SQLAccessConfig, SQLConnectionConfig
26
+
27
+ if TYPE_CHECKING:
28
+ from snowflake.connector import SnowflakeConnection
29
+ from snowflake.connector.cursor import SnowflakeCursor
30
+
31
+ CONNECTOR_TYPE = "snowflake"
32
+
33
+
34
+ class SnowflakeAccessConfig(SQLAccessConfig):
35
+ password: Optional[str] = Field(default=None, description="DB password")
36
+
37
+
38
+ class SnowflakeConnectionConfig(SQLConnectionConfig):
39
+ access_config: Secret[SnowflakeAccessConfig] = Field(
40
+ default=SnowflakeAccessConfig(), validate_default=True
41
+ )
42
+ account: str = Field(
43
+ default=None,
44
+ description="Your account identifier. The account identifier "
45
+ "does not include the snowflakecomputing.com suffix.",
46
+ )
47
+ user: Optional[str] = Field(default=None, description="DB username")
48
+ host: Optional[str] = Field(default=None, description="DB host")
49
+ port: Optional[int] = Field(default=443, description="DB host connection port")
50
+ database: str = Field(
51
+ default=None,
52
+ description="Database name.",
53
+ )
54
+ db_schema: str = Field(default=None, description="Database schema.", alias="schema")
55
+ role: str = Field(
56
+ default=None,
57
+ description="Database role.",
58
+ )
59
+ connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
60
+
61
+ @contextmanager
62
+ @requires_dependencies(["snowflake"], extras="snowflake")
63
+ def get_connection(self) -> Generator["SnowflakeConnection", None, None]:
64
+ # https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-snowflake-connector-methods-connect
65
+ from snowflake.connector import connect
66
+
67
+ connect_kwargs = self.model_dump()
68
+ connect_kwargs["schema"] = connect_kwargs.pop("db_schema")
69
+ connect_kwargs.pop("access_configs", None)
70
+ connect_kwargs["password"] = self.access_config.get_secret_value().password
71
+ # https://peps.python.org/pep-0249/#paramstyle
72
+ connect_kwargs["paramstyle"] = "qmark"
73
+ connection = connect(**connect_kwargs)
74
+ try:
75
+ yield connection
76
+ finally:
77
+ connection.commit()
78
+ connection.close()
79
+
80
+ @contextmanager
81
+ def get_cursor(self) -> Generator["SnowflakeCursor", None, None]:
82
+ with self.get_connection() as connection:
83
+ cursor = connection.cursor()
84
+ try:
85
+ yield cursor
86
+ finally:
87
+ cursor.close()
88
+
89
+
90
+ class SnowflakeIndexerConfig(PostgresIndexerConfig):
91
+ pass
92
+
93
+
94
+ @dataclass
95
+ class SnowflakeIndexer(PostgresIndexer):
96
+ connection_config: SnowflakeConnectionConfig
97
+ index_config: SnowflakeIndexerConfig
98
+ connector_type: str = CONNECTOR_TYPE
99
+
100
+
101
+ class SnowflakeDownloaderConfig(PostgresDownloaderConfig):
102
+ pass
103
+
104
+
105
+ @dataclass
106
+ class SnowflakeDownloader(PostgresDownloader):
107
+ connection_config: SnowflakeConnectionConfig
108
+ download_config: SnowflakeDownloaderConfig
109
+ connector_type: str = CONNECTOR_TYPE
110
+
111
+
112
+ class SnowflakeUploadStagerConfig(PostgresUploadStagerConfig):
113
+ pass
114
+
115
+
116
+ class SnowflakeUploadStager(PostgresUploadStager):
117
+ upload_stager_config: SnowflakeUploadStagerConfig
118
+
119
+
120
+ class SnowflakeUploaderConfig(PostgresUploaderConfig):
121
+ pass
122
+
123
+
124
+ @dataclass
125
+ class SnowflakeUploader(PostgresUploader):
126
+ upload_config: SnowflakeUploaderConfig = field(default_factory=SnowflakeUploaderConfig)
127
+ connection_config: SnowflakeConnectionConfig
128
+ connector_type: str = CONNECTOR_TYPE
129
+ values_delimiter: str = "?"
130
+
131
+ def upload_contents(self, path: Path) -> None:
132
+ df = pd.read_json(path, orient="records", lines=True)
133
+ df.replace({np.nan: None}, inplace=True)
134
+
135
+ columns = list(df.columns)
136
+ stmt = f"INSERT INTO {self.upload_config.table_name} ({','.join(columns)}) VALUES({','.join([self.values_delimiter for x in columns])})" # noqa E501
137
+
138
+ for rows in pd.read_json(
139
+ path, orient="records", lines=True, chunksize=self.upload_config.batch_size
140
+ ):
141
+ with self.connection_config.get_cursor() as cursor:
142
+ values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
143
+ # TODO: executemany break on 'Binding data in type (list) is not supported'
144
+ for val in values:
145
+ cursor.execute(stmt, val)
146
+
147
+
148
+ snowflake_source_entry = SourceRegistryEntry(
149
+ connection_config=SnowflakeConnectionConfig,
150
+ indexer_config=SnowflakeIndexerConfig,
151
+ indexer=SnowflakeIndexer,
152
+ downloader_config=SnowflakeDownloaderConfig,
153
+ downloader=SnowflakeDownloader,
154
+ )
155
+
156
+ snowflake_destination_entry = DestinationRegistryEntry(
157
+ connection_config=SnowflakeConnectionConfig,
158
+ uploader=SnowflakeUploader,
159
+ uploader_config=SnowflakeUploaderConfig,
160
+ upload_stager=SnowflakeUploadStager,
161
+ upload_stager_config=SnowflakeUploadStagerConfig,
162
+ )
@@ -3,12 +3,14 @@ import json
3
3
  import sys
4
4
  import uuid
5
5
  from abc import ABC, abstractmethod
6
+ from contextlib import contextmanager
6
7
  from dataclasses import dataclass, field, replace
7
8
  from datetime import date, datetime
8
9
  from pathlib import Path
9
10
  from time import time
10
11
  from typing import Any, Generator, Union
11
12
 
13
+ import numpy as np
12
14
  import pandas as pd
13
15
  from dateutil import parser
14
16
  from pydantic import Field, Secret
@@ -94,7 +96,13 @@ class SQLConnectionConfig(ConnectionConfig, ABC):
94
96
  access_config: Secret[SQLAccessConfig] = Field(default=SQLAccessConfig(), validate_default=True)
95
97
 
96
98
  @abstractmethod
97
- def get_connection(self) -> Any:
99
+ @contextmanager
100
+ def get_connection(self) -> Generator[Any, None, None]:
101
+ pass
102
+
103
+ @abstractmethod
104
+ @contextmanager
105
+ def get_cursor(self) -> Generator[Any, None, None]:
98
106
  pass
99
107
 
100
108
 
@@ -108,16 +116,19 @@ class SQLIndexer(Indexer, ABC):
108
116
  connection_config: SQLConnectionConfig
109
117
  index_config: SQLIndexerConfig
110
118
 
111
- @abstractmethod
112
119
  def _get_doc_ids(self) -> list[str]:
113
- pass
120
+ with self.connection_config.get_cursor() as cursor:
121
+ cursor.execute(
122
+ f"SELECT {self.index_config.id_column} FROM {self.index_config.table_name}"
123
+ )
124
+ results = cursor.fetchall()
125
+ ids = [result[0] for result in results]
126
+ return ids
114
127
 
115
128
  def precheck(self) -> None:
116
129
  try:
117
- connection = self.connection_config.get_connection()
118
- cursor = connection.cursor()
119
- cursor.execute("SELECT 1;")
120
- cursor.close()
130
+ with self.connection_config.get_cursor() as cursor:
131
+ cursor.execute("SELECT 1;")
121
132
  except Exception as e:
122
133
  logger.error(f"failed to validate connection: {e}", exc_info=True)
123
134
  raise SourceConnectionError(f"failed to validate connection: {e}")
@@ -198,7 +209,7 @@ class SQLDownloader(Downloader, ABC):
198
209
  f"Downloading results from table {table_name} and id {record_id} to {download_path}"
199
210
  )
200
211
  download_path.parent.mkdir(parents=True, exist_ok=True)
201
- result.to_csv(download_path)
212
+ result.to_csv(download_path, index=False)
202
213
  copied_file_data = replace(file_data)
203
214
  copied_file_data.identifier = filename_id
204
215
  copied_file_data.doc_type = "file"
@@ -285,6 +296,7 @@ class SQLUploaderConfig(UploaderConfig):
285
296
  class SQLUploader(Uploader):
286
297
  upload_config: SQLUploaderConfig
287
298
  connection_config: SQLConnectionConfig
299
+ values_delimiter: str = "?"
288
300
 
289
301
  def precheck(self) -> None:
290
302
  try:
@@ -296,15 +308,42 @@ class SQLUploader(Uploader):
296
308
  logger.error(f"failed to validate connection: {e}", exc_info=True)
297
309
  raise DestinationConnectionError(f"failed to validate connection: {e}")
298
310
 
299
- @abstractmethod
300
311
  def prepare_data(
301
312
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
302
313
  ) -> list[tuple[Any, ...]]:
303
- pass
314
+ output = []
315
+ for row in data:
316
+ parsed = []
317
+ for column_name, value in zip(columns, row):
318
+ if column_name in _DATE_COLUMNS:
319
+ if value is None:
320
+ parsed.append(None)
321
+ else:
322
+ parsed.append(parse_date_string(value))
323
+ else:
324
+ parsed.append(value)
325
+ output.append(tuple(parsed))
326
+ return output
304
327
 
305
- @abstractmethod
306
328
  def upload_contents(self, path: Path) -> None:
307
- pass
329
+ df = pd.read_json(path, orient="records", lines=True)
330
+ df.replace({np.nan: None}, inplace=True)
331
+
332
+ columns = list(df.columns)
333
+ stmt = f"INSERT INTO {self.upload_config.table_name} ({','.join(columns)}) VALUES({','.join([self.values_delimiter for x in columns])})" # noqa E501
334
+
335
+ for rows in pd.read_json(
336
+ path, orient="records", lines=True, chunksize=self.upload_config.batch_size
337
+ ):
338
+ with self.connection_config.get_cursor() as cursor:
339
+ values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
340
+ # for val in values:
341
+ # try:
342
+ # cursor.execute(stmt, val)
343
+ # except Exception as e:
344
+ # print(f"Error: {e}")
345
+ # print(f"failed to write {len(columns)}, {len(val)}: {stmt} -> {val}")
346
+ cursor.executemany(stmt, values)
308
347
 
309
348
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
310
349
  self.upload_contents(path=path)
@@ -1,15 +1,17 @@
1
1
  import json
2
+ from contextlib import contextmanager
2
3
  from dataclasses import dataclass, field
3
4
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Any
5
+ from typing import TYPE_CHECKING, Any, Generator
5
6
 
6
- import numpy as np
7
- import pandas as pd
8
7
  from pydantic import Field, Secret, model_validator
9
8
 
10
9
  from unstructured_ingest.v2.interfaces import FileData
11
10
  from unstructured_ingest.v2.logger import logger
12
- from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
11
+ from unstructured_ingest.v2.processes.connector_registry import (
12
+ DestinationRegistryEntry,
13
+ SourceRegistryEntry,
14
+ )
13
15
  from unstructured_ingest.v2.processes.connectors.sql.sql import (
14
16
  _DATE_COLUMNS,
15
17
  SQLAccessConfig,
@@ -27,6 +29,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
27
29
 
28
30
  if TYPE_CHECKING:
29
31
  from sqlite3 import Connection as SqliteConnection
32
+ from sqlite3 import Cursor as SqliteCursor
30
33
 
31
34
  CONNECTOR_TYPE = "sqlite"
32
35
 
@@ -51,10 +54,25 @@ class SQLiteConnectionConfig(SQLConnectionConfig):
51
54
  raise ValueError(f"{self.database_path} is not a valid file")
52
55
  return self
53
56
 
54
- def get_connection(self) -> "SqliteConnection":
57
+ @contextmanager
58
+ def get_connection(self) -> Generator["SqliteConnection", None, None]:
55
59
  from sqlite3 import connect
56
60
 
57
- return connect(database=self.database_path)
61
+ connection = connect(database=self.database_path)
62
+ try:
63
+ yield connection
64
+ finally:
65
+ connection.commit()
66
+ connection.close()
67
+
68
+ @contextmanager
69
+ def get_cursor(self) -> Generator["SqliteCursor", None, None]:
70
+ with self.get_connection() as connection:
71
+ cursor = connection.cursor()
72
+ try:
73
+ yield cursor
74
+ finally:
75
+ cursor.close()
58
76
 
59
77
 
60
78
  class SQLiteIndexerConfig(SQLIndexerConfig):
@@ -67,16 +85,6 @@ class SQLiteIndexer(SQLIndexer):
67
85
  index_config: SQLIndexerConfig
68
86
  connector_type: str = CONNECTOR_TYPE
69
87
 
70
- def _get_doc_ids(self) -> list[str]:
71
- with self.connection_config.get_connection() as sqlite_connection:
72
- cursor = sqlite_connection.cursor()
73
- cursor.execute(
74
- f"SELECT {self.index_config.id_column} FROM {self.index_config.table_name}"
75
- )
76
- results = cursor.fetchall()
77
- ids = [result[0] for result in results]
78
- return ids
79
-
80
88
 
81
89
  class SQLiteDownloaderConfig(SQLDownloaderConfig):
82
90
  pass
@@ -145,23 +153,14 @@ class SQLiteUploader(SQLUploader):
145
153
  output.append(tuple(parsed))
146
154
  return output
147
155
 
148
- def upload_contents(self, path: Path) -> None:
149
- df = pd.read_json(path, orient="records", lines=True)
150
- logger.debug(f"uploading {len(df)} entries to {self.connection_config.database_path} ")
151
- df.replace({np.nan: None}, inplace=True)
152
-
153
- columns = tuple(df.columns)
154
- stmt = f"INSERT INTO {self.upload_config.table_name} ({','.join(columns)}) \
155
- VALUES({','.join(['?' for x in columns])})" # noqa E501
156
-
157
- for rows in pd.read_json(
158
- path, orient="records", lines=True, chunksize=self.upload_config.batch_size
159
- ):
160
- with self.connection_config.get_connection() as conn:
161
- values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
162
- conn.executemany(stmt, values)
163
- conn.commit()
164
156
 
157
+ sqlite_source_entry = SourceRegistryEntry(
158
+ connection_config=SQLiteConnectionConfig,
159
+ indexer_config=SQLiteIndexerConfig,
160
+ indexer=SQLIndexer,
161
+ downloader_config=SQLiteDownloaderConfig,
162
+ downloader=SQLiteDownloader,
163
+ )
165
164
 
166
165
  sqlite_destination_entry = DestinationRegistryEntry(
167
166
  connection_config=SQLiteConnectionConfig,
@@ -26,7 +26,7 @@ def create_partition_request(filename: Path, parameters_dict: dict) -> "Partitio
26
26
  # NOTE(austin): PartitionParameters is a Pydantic model in v0.26.0
27
27
  # Prior to this it was a dataclass which doesn't have .__fields
28
28
  try:
29
- possible_fields = PartitionParameters.__fields__
29
+ possible_fields = PartitionParameters.model_fields
30
30
  except AttributeError:
31
31
  possible_fields = [f.name for f in fields(PartitionParameters)]
32
32
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.1.1
3
+ Version: 0.2.1
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,30 +22,30 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
+ Requires-Dist: tqdm
25
26
  Requires-Dist: python-dateutil
26
27
  Requires-Dist: pandas
28
+ Requires-Dist: click
27
29
  Requires-Dist: pydantic>=2.7
28
30
  Requires-Dist: dataclasses-json
29
31
  Requires-Dist: opentelemetry-sdk
30
- Requires-Dist: click
31
- Requires-Dist: tqdm
32
32
  Provides-Extra: airtable
33
33
  Requires-Dist: pyairtable; extra == "airtable"
34
34
  Provides-Extra: astradb
35
35
  Requires-Dist: astrapy; extra == "astradb"
36
36
  Provides-Extra: azure
37
- Requires-Dist: adlfs; extra == "azure"
38
37
  Requires-Dist: fsspec; extra == "azure"
38
+ Requires-Dist: adlfs; extra == "azure"
39
39
  Provides-Extra: azure-cognitive-search
40
40
  Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
41
41
  Provides-Extra: bedrock
42
42
  Requires-Dist: boto3; extra == "bedrock"
43
43
  Provides-Extra: biomed
44
- Requires-Dist: bs4; extra == "biomed"
45
44
  Requires-Dist: requests; extra == "biomed"
45
+ Requires-Dist: bs4; extra == "biomed"
46
46
  Provides-Extra: box
47
- Requires-Dist: boxfs; extra == "box"
48
47
  Requires-Dist: fsspec; extra == "box"
48
+ Requires-Dist: boxfs; extra == "box"
49
49
  Provides-Extra: chroma
50
50
  Requires-Dist: chromadb; extra == "chroma"
51
51
  Provides-Extra: clarifai
@@ -60,8 +60,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
60
60
  Provides-Extra: databricks-volumes
61
61
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
62
62
  Provides-Extra: delta-table
63
- Requires-Dist: deltalake; extra == "delta-table"
64
63
  Requires-Dist: fsspec; extra == "delta-table"
64
+ Requires-Dist: deltalake; extra == "delta-table"
65
65
  Provides-Extra: discord
66
66
  Requires-Dist: discord-py; extra == "discord"
67
67
  Provides-Extra: doc
@@ -88,8 +88,8 @@ Provides-Extra: epub
88
88
  Requires-Dist: unstructured[epub]; extra == "epub"
89
89
  Provides-Extra: gcs
90
90
  Requires-Dist: gcsfs; extra == "gcs"
91
- Requires-Dist: bs4; extra == "gcs"
92
91
  Requires-Dist: fsspec; extra == "gcs"
92
+ Requires-Dist: bs4; extra == "gcs"
93
93
  Provides-Extra: github
94
94
  Requires-Dist: pygithub>1.58.0; extra == "github"
95
95
  Requires-Dist: requests; extra == "github"
@@ -98,8 +98,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
98
98
  Provides-Extra: google-drive
99
99
  Requires-Dist: google-api-python-client; extra == "google-drive"
100
100
  Provides-Extra: hubspot
101
- Requires-Dist: hubspot-api-client; extra == "hubspot"
102
101
  Requires-Dist: urllib3; extra == "hubspot"
102
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
103
103
  Provides-Extra: jira
104
104
  Requires-Dist: atlassian-python-api; extra == "jira"
105
105
  Provides-Extra: kafka
@@ -115,16 +115,16 @@ Requires-Dist: pymongo; extra == "mongodb"
115
115
  Provides-Extra: msg
116
116
  Requires-Dist: unstructured[msg]; extra == "msg"
117
117
  Provides-Extra: notion
118
- Requires-Dist: notion-client; extra == "notion"
119
- Requires-Dist: htmlBuilder; extra == "notion"
120
- Requires-Dist: backoff; extra == "notion"
121
118
  Requires-Dist: httpx; extra == "notion"
119
+ Requires-Dist: backoff; extra == "notion"
120
+ Requires-Dist: htmlBuilder; extra == "notion"
121
+ Requires-Dist: notion-client; extra == "notion"
122
122
  Provides-Extra: odt
123
123
  Requires-Dist: unstructured[odt]; extra == "odt"
124
124
  Provides-Extra: onedrive
125
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
126
- Requires-Dist: bs4; extra == "onedrive"
127
125
  Requires-Dist: msal; extra == "onedrive"
126
+ Requires-Dist: bs4; extra == "onedrive"
127
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
128
128
  Provides-Extra: openai
129
129
  Requires-Dist: openai; extra == "openai"
130
130
  Requires-Dist: tiktoken; extra == "openai"
@@ -133,8 +133,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
133
133
  Provides-Extra: org
134
134
  Requires-Dist: unstructured[org]; extra == "org"
135
135
  Provides-Extra: outlook
136
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
137
136
  Requires-Dist: msal; extra == "outlook"
137
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
138
138
  Provides-Extra: pdf
139
139
  Requires-Dist: unstructured[pdf]; extra == "pdf"
140
140
  Provides-Extra: pinecone
@@ -164,12 +164,14 @@ Provides-Extra: sftp
164
164
  Requires-Dist: fsspec; extra == "sftp"
165
165
  Requires-Dist: paramiko; extra == "sftp"
166
166
  Provides-Extra: sharepoint
167
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
168
167
  Requires-Dist: msal; extra == "sharepoint"
168
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
169
169
  Provides-Extra: singlestore
170
170
  Requires-Dist: singlestoredb; extra == "singlestore"
171
171
  Provides-Extra: slack
172
- Requires-Dist: slack-sdk; extra == "slack"
172
+ Requires-Dist: slack-sdk[optional]; extra == "slack"
173
+ Provides-Extra: snowflake
174
+ Requires-Dist: snowflake; extra == "snowflake"
173
175
  Provides-Extra: togetherai
174
176
  Requires-Dist: together; extra == "togetherai"
175
177
  Provides-Extra: tsv