unstructured-ingest 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (53) hide show
  1. test/integration/connectors/{databricks_tests → databricks}/test_volumes_native.py +75 -19
  2. test/integration/connectors/sql/test_postgres.py +6 -2
  3. test/integration/connectors/sql/test_singlestore.py +6 -2
  4. test/integration/connectors/sql/test_snowflake.py +6 -2
  5. test/integration/connectors/sql/test_sqlite.py +6 -2
  6. test/integration/connectors/test_milvus.py +13 -0
  7. test/integration/connectors/test_onedrive.py +6 -0
  8. test/integration/connectors/test_redis.py +119 -0
  9. test/integration/connectors/test_vectara.py +270 -0
  10. test/integration/embedders/test_bedrock.py +28 -0
  11. test/integration/embedders/test_octoai.py +14 -0
  12. test/integration/embedders/test_openai.py +13 -0
  13. test/integration/embedders/test_togetherai.py +10 -0
  14. test/integration/partitioners/test_partitioner.py +2 -2
  15. test/unit/embed/test_octoai.py +8 -1
  16. unstructured_ingest/__version__.py +1 -1
  17. unstructured_ingest/embed/bedrock.py +39 -11
  18. unstructured_ingest/embed/interfaces.py +5 -0
  19. unstructured_ingest/embed/octoai.py +44 -3
  20. unstructured_ingest/embed/openai.py +37 -1
  21. unstructured_ingest/embed/togetherai.py +28 -1
  22. unstructured_ingest/embed/voyageai.py +33 -1
  23. unstructured_ingest/v2/errors.py +18 -0
  24. unstructured_ingest/v2/interfaces/file_data.py +11 -1
  25. unstructured_ingest/v2/processes/connectors/__init__.py +7 -0
  26. unstructured_ingest/v2/processes/connectors/astradb.py +2 -0
  27. unstructured_ingest/v2/processes/connectors/chroma.py +0 -1
  28. unstructured_ingest/v2/processes/connectors/couchbase.py +2 -0
  29. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -0
  30. unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +2 -2
  31. unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +2 -2
  32. unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +2 -2
  33. unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +2 -2
  34. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +1 -1
  35. unstructured_ingest/v2/processes/connectors/kafka/cloud.py +5 -2
  36. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +14 -3
  37. unstructured_ingest/v2/processes/connectors/milvus.py +15 -6
  38. unstructured_ingest/v2/processes/connectors/mongodb.py +3 -4
  39. unstructured_ingest/v2/processes/connectors/neo4j.py +2 -0
  40. unstructured_ingest/v2/processes/connectors/onedrive.py +79 -25
  41. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +0 -1
  42. unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
  43. unstructured_ingest/v2/processes/connectors/sql/sql.py +5 -0
  44. unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
  45. unstructured_ingest/v2/unstructured_api.py +25 -2
  46. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/METADATA +20 -16
  47. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/RECORD +52 -48
  48. test/integration/connectors/test_kafka.py +0 -304
  49. /test/integration/connectors/{databricks_tests → databricks}/__init__.py +0 -0
  50. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/LICENSE.md +0 -0
  51. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/WHEEL +0 -0
  52. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/entry_points.txt +0 -0
  53. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,350 @@
1
+ import asyncio
2
+ import json
3
+ import uuid
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Mapping, Optional
8
+
9
+ from pydantic import Field, Secret
10
+
11
+ from unstructured_ingest.error import DestinationConnectionError
12
+ from unstructured_ingest.utils.data_prep import flatten_dict
13
+ from unstructured_ingest.utils.dep_check import requires_dependencies
14
+ from unstructured_ingest.v2.interfaces import (
15
+ AccessConfig,
16
+ ConnectionConfig,
17
+ FileData,
18
+ Uploader,
19
+ UploaderConfig,
20
+ UploadStager,
21
+ UploadStagerConfig,
22
+ )
23
+ from unstructured_ingest.v2.logger import logger
24
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
25
+
26
+ BASE_URL = "https://api.vectara.io/v2"
27
+
28
+ CONNECTOR_TYPE = "vectara"
29
+
30
+
31
+ class VectaraAccessConfig(AccessConfig):
32
+ oauth_client_id: str = Field(description="Client ID")
33
+ oauth_secret: str = Field(description="Client Secret")
34
+
35
+
36
+ class VectaraConnectionConfig(ConnectionConfig):
37
+ access_config: Secret[VectaraAccessConfig]
38
+ customer_id: str
39
+ corpus_name: Optional[str] = None
40
+ corpus_key: Optional[str] = None
41
+ token_url: str = "https://vectara-prod-{}.auth.us-west-2.amazoncognito.com/oauth2/token"
42
+
43
+
44
+ class VectaraUploadStagerConfig(UploadStagerConfig):
45
+ pass
46
+
47
+
48
+ @dataclass
49
+ class VectaraUploadStager(UploadStager):
50
+ upload_stager_config: VectaraUploadStagerConfig = field(
51
+ default_factory=lambda: VectaraUploadStagerConfig()
52
+ )
53
+
54
+ @staticmethod
55
+ def conform_dict(data: dict) -> dict:
56
+ """
57
+ Prepares dictionary in the format that Vectara requires.
58
+ See more detail in https://docs.vectara.com/docs/rest-api/create-corpus-document
59
+
60
+ Select which meta-data fields to include and optionally map them to a new format.
61
+ remove the "metadata-" prefix from the keys
62
+ """
63
+ metadata_map = {
64
+ "page_number": "page_number",
65
+ "data_source-url": "url",
66
+ "filename": "filename",
67
+ "filetype": "filetype",
68
+ "last_modified": "last_modified",
69
+ "element_id": "element_id",
70
+ }
71
+ md = flatten_dict(data, separator="-", flatten_lists=True)
72
+ md = {k.replace("metadata-", ""): v for k, v in md.items()}
73
+ md = {metadata_map[k]: v for k, v in md.items() if k in metadata_map}
74
+ return md
75
+
76
+ def process_whole(self, input_file: Path, output_file: Path, file_data: FileData) -> None:
77
+ with input_file.open() as in_f:
78
+ elements_contents = json.load(in_f)
79
+
80
+ logger.info(
81
+ f"Extending {len(elements_contents)} json elements from content in {input_file}"
82
+ )
83
+
84
+ conformed_elements = [
85
+ {
86
+ "id": str(uuid.uuid4()),
87
+ "type": "core",
88
+ "metadata": {
89
+ "title": file_data.identifier,
90
+ },
91
+ "document_parts": [
92
+ {
93
+ "text": element.pop("text", None),
94
+ "metadata": self.conform_dict(data=element),
95
+ }
96
+ for element in elements_contents
97
+ ],
98
+ }
99
+ ]
100
+
101
+ with open(output_file, "w") as out_f:
102
+ json.dump(conformed_elements, out_f, indent=2)
103
+
104
+
105
+ class VectaraUploaderConfig(UploaderConfig):
106
+ pass
107
+
108
+
109
+ @dataclass
110
+ class VectaraUploader(Uploader):
111
+
112
+ connector_type: str = CONNECTOR_TYPE
113
+ upload_config: VectaraUploaderConfig
114
+ connection_config: VectaraConnectionConfig
115
+ _jwt_token: Optional[str] = field(init=False, default=None)
116
+ _jwt_token_expires_ts: Optional[float] = field(init=False, default=None)
117
+
118
+ def is_async(self) -> bool:
119
+ return True
120
+
121
+ def precheck(self) -> None:
122
+ try:
123
+ self._check_connection_and_corpora()
124
+ except Exception as e:
125
+ logger.error(f"Failed to validate connection {e}", exc_info=True)
126
+ raise DestinationConnectionError(f"failed to validate connection: {e}")
127
+
128
+ @property
129
+ async def jwt_token_async(self) -> str:
130
+ if not self._jwt_token or self._jwt_token_expires_ts - datetime.now().timestamp() <= 60:
131
+ self._jwt_token = await self._get_jwt_token_async()
132
+ return self._jwt_token
133
+
134
+ @property
135
+ def jwt_token(self) -> str:
136
+ if not self._jwt_token or self._jwt_token_expires_ts - datetime.now().timestamp() <= 60:
137
+ self._jwt_token = self._get_jwt_token()
138
+ return self._jwt_token
139
+
140
+ # Get Oauth2 JWT token
141
+ @requires_dependencies(["httpx"], extras="vectara")
142
+ async def _get_jwt_token_async(self) -> str:
143
+ import httpx
144
+
145
+ """Connect to the server and get a JWT token."""
146
+ token_endpoint = self.connection_config.token_url.format(self.connection_config.customer_id)
147
+ headers = {
148
+ "Content-Type": "application/x-www-form-urlencoded",
149
+ }
150
+ data = {
151
+ "grant_type": "client_credentials",
152
+ "client_id": self.connection_config.access_config.get_secret_value().oauth_client_id,
153
+ "client_secret": self.connection_config.access_config.get_secret_value().oauth_secret,
154
+ }
155
+
156
+ async with httpx.AsyncClient() as client:
157
+ response = await client.post(token_endpoint, headers=headers, data=data)
158
+ response.raise_for_status()
159
+ response_json = response.json()
160
+
161
+ request_time = datetime.now().timestamp()
162
+ self._jwt_token_expires_ts = request_time + response_json.get("expires_in")
163
+
164
+ return response_json.get("access_token")
165
+
166
+ # Get Oauth2 JWT token
167
+ @requires_dependencies(["httpx"], extras="vectara")
168
+ def _get_jwt_token(self) -> str:
169
+ import httpx
170
+
171
+ """Connect to the server and get a JWT token."""
172
+ token_endpoint = self.connection_config.token_url.format(self.connection_config.customer_id)
173
+ headers = {
174
+ "Content-Type": "application/x-www-form-urlencoded",
175
+ }
176
+ data = {
177
+ "grant_type": "client_credentials",
178
+ "client_id": self.connection_config.access_config.get_secret_value().oauth_client_id,
179
+ "client_secret": self.connection_config.access_config.get_secret_value().oauth_secret,
180
+ }
181
+
182
+ with httpx.Client() as client:
183
+ response = client.post(token_endpoint, headers=headers, data=data)
184
+ response.raise_for_status()
185
+ response_json = response.json()
186
+
187
+ request_time = datetime.now().timestamp()
188
+ self._jwt_token_expires_ts = request_time + response_json.get("expires_in")
189
+
190
+ return response_json.get("access_token")
191
+
192
+ @DestinationConnectionError.wrap
193
+ def _check_connection_and_corpora(self) -> None:
194
+ """
195
+ Check the connection for Vectara and validate corpus exists.
196
+ - If more than one corpus with the same name exists - raise error
197
+ - If exactly one corpus exists with this name - use it.
198
+ - If does not exist - raise error.
199
+ """
200
+ # Get token if not already set
201
+ self.jwt_token
202
+
203
+ _, list_corpora_response = self._request(
204
+ http_method="GET",
205
+ endpoint="corpora",
206
+ )
207
+
208
+ if self.connection_config.corpus_name:
209
+ possible_corpora_keys_names_map = {
210
+ corpus.get("key"): corpus.get("name")
211
+ for corpus in list_corpora_response.get("corpora")
212
+ if corpus.get("name") == self.connection_config.corpus_name
213
+ }
214
+
215
+ if len(possible_corpora_keys_names_map) > 1:
216
+ raise ValueError(
217
+ f"Multiple Corpus exist with name {self.connection_config.corpus_name} in dest."
218
+ )
219
+ if len(possible_corpora_keys_names_map) == 1:
220
+ if not self.connection_config.corpus_key:
221
+ self.connection_config.corpus_key = list(
222
+ possible_corpora_keys_names_map.keys()
223
+ )[0]
224
+ elif (
225
+ self.connection_config.corpus_key
226
+ != list(possible_corpora_keys_names_map.keys())[0]
227
+ ):
228
+ raise ValueError("Corpus key does not match provided corpus name.")
229
+ else:
230
+ raise ValueError(
231
+ f"No Corpora exist with name {self.connection_config.corpus_name} in dest."
232
+ )
233
+
234
+ @requires_dependencies(["httpx"], extras="vectara")
235
+ async def _async_request(
236
+ self,
237
+ endpoint: str,
238
+ http_method: str = "POST",
239
+ params: Mapping[str, Any] = None,
240
+ data: Mapping[str, Any] = None,
241
+ ) -> tuple[bool, dict]:
242
+ import httpx
243
+
244
+ url = f"{BASE_URL}/{endpoint}"
245
+
246
+ headers = {
247
+ "Content-Type": "application/json",
248
+ "Accept": "application/json",
249
+ "Authorization": f"Bearer {await self.jwt_token_async}",
250
+ "X-source": "unstructured",
251
+ }
252
+
253
+ async with httpx.AsyncClient() as client:
254
+ response = await client.request(
255
+ method=http_method, url=url, headers=headers, params=params, json=data
256
+ )
257
+ response.raise_for_status()
258
+ return response.json()
259
+
260
+ @requires_dependencies(["httpx"], extras="vectara")
261
+ def _request(
262
+ self,
263
+ endpoint: str,
264
+ http_method: str = "POST",
265
+ params: Mapping[str, Any] = None,
266
+ data: Mapping[str, Any] = None,
267
+ ) -> tuple[bool, dict]:
268
+ import httpx
269
+
270
+ url = f"{BASE_URL}/{endpoint}"
271
+
272
+ headers = {
273
+ "Content-Type": "application/json",
274
+ "Accept": "application/json",
275
+ "Authorization": f"Bearer {self.jwt_token}",
276
+ "X-source": "unstructured",
277
+ }
278
+
279
+ with httpx.Client() as client:
280
+ response = client.request(
281
+ method=http_method, url=url, headers=headers, params=params, json=data
282
+ )
283
+ response.raise_for_status()
284
+ return response.json()
285
+
286
+ async def _delete_doc(self, doc_id: str) -> tuple[bool, dict]:
287
+ """
288
+ Delete a document from the Vectara corpus.
289
+ """
290
+
291
+ return await self._async_request(
292
+ endpoint=f"corpora/{self.connection_config.corpus_key}/documents/{doc_id}",
293
+ http_method="DELETE",
294
+ )
295
+
296
+ async def _index_document(self, document: Dict[str, Any]) -> None:
297
+ """
298
+ Index a document (by uploading it to the Vectara corpus) from the document dictionary
299
+ """
300
+
301
+ logger.debug(
302
+ f"Indexing document {document['id']} to corpus key {self.connection_config.corpus_key}"
303
+ )
304
+
305
+ try:
306
+ result = await self._async_request(
307
+ endpoint=f"corpora/{self.connection_config.corpus_key}/documents", data=document
308
+ )
309
+ except Exception as e:
310
+ logger.error(f"exception {e} while indexing document {document['id']}")
311
+ return
312
+
313
+ if (
314
+ "messages" in result
315
+ and result["messages"]
316
+ and (
317
+ "ALREADY_EXISTS" in result["messages"]
318
+ or (
319
+ "CONFLICT: Indexing doesn't support updating documents."
320
+ in result["messages"][0]
321
+ )
322
+ )
323
+ ):
324
+ logger.info(f"document {document['id']} already exists, re-indexing")
325
+ await self._delete_doc(document["id"])
326
+ await self._async_request(
327
+ endpoint=f"corpora/{self.connection_config.corpus_key}/documents", data=document
328
+ )
329
+ return
330
+
331
+ logger.info(f"indexing document {document['id']} succeeded")
332
+
333
+ async def run_data_async(
334
+ self,
335
+ data: list[dict],
336
+ file_data: FileData,
337
+ **kwargs: Any,
338
+ ) -> None:
339
+
340
+ logger.info(f"inserting / updating {len(data)} documents to Vectara ")
341
+ await asyncio.gather(*(self._index_document(vdoc) for vdoc in data))
342
+
343
+
344
+ vectara_destination_entry = DestinationRegistryEntry(
345
+ connection_config=VectaraConnectionConfig,
346
+ uploader=VectaraUploader,
347
+ uploader_config=VectaraUploaderConfig,
348
+ upload_stager=VectaraUploadStager,
349
+ upload_stager_config=VectaraUploadStagerConfig,
350
+ )
@@ -2,6 +2,7 @@ from dataclasses import fields
2
2
  from pathlib import Path
3
3
  from typing import TYPE_CHECKING, Optional
4
4
 
5
+ from unstructured_ingest.v2.errors import ProviderError, UserError
5
6
  from unstructured_ingest.v2.logger import logger
6
7
 
7
8
  if TYPE_CHECKING:
@@ -51,6 +52,22 @@ def create_partition_request(filename: Path, parameters_dict: dict) -> "Partitio
51
52
  return PartitionRequest(partition_parameters=partition_params)
52
53
 
53
54
 
55
+ def handle_error(e: Exception):
56
+ from unstructured_client.models.errors.sdkerror import SDKError
57
+
58
+ if isinstance(e, SDKError):
59
+ logger.error(f"Error calling Unstructured API: {e}")
60
+ if 400 <= e.status_code < 500:
61
+ raise UserError(e.body)
62
+ elif e.status_code >= 500:
63
+ raise ProviderError(e.body)
64
+ else:
65
+ raise e
66
+ else:
67
+ logger.error(f"Uncaught Error calling API: {e}")
68
+ raise e
69
+
70
+
54
71
  async def call_api_async(
55
72
  server_url: Optional[str], api_key: Optional[str], filename: Path, api_parameters: dict
56
73
  ) -> list[dict]:
@@ -71,7 +88,10 @@ async def call_api_async(
71
88
  api_key_auth=api_key,
72
89
  )
73
90
  partition_request = create_partition_request(filename=filename, parameters_dict=api_parameters)
74
- res = await client.general.partition_async(request=partition_request)
91
+ try:
92
+ res = await client.general.partition_async(request=partition_request)
93
+ except Exception as e:
94
+ handle_error(e)
75
95
 
76
96
  return res.elements or []
77
97
 
@@ -96,6 +116,9 @@ def call_api(
96
116
  api_key_auth=api_key,
97
117
  )
98
118
  partition_request = create_partition_request(filename=filename, parameters_dict=api_parameters)
99
- res = client.general.partition(request=partition_request)
119
+ try:
120
+ res = client.general.partition(request=partition_request)
121
+ except Exception as e:
122
+ handle_error(e)
100
123
 
101
124
  return res.elements or []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.3.10
3
+ Version: 0.3.12
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,14 +22,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: opentelemetry-sdk
26
- Requires-Dist: python-dateutil
27
- Requires-Dist: click
28
25
  Requires-Dist: ndjson
26
+ Requires-Dist: python-dateutil
29
27
  Requires-Dist: pydantic>=2.7
30
28
  Requires-Dist: pandas
31
- Requires-Dist: tqdm
32
29
  Requires-Dist: dataclasses-json
30
+ Requires-Dist: tqdm
31
+ Requires-Dist: click
32
+ Requires-Dist: opentelemetry-sdk
33
33
  Provides-Extra: airtable
34
34
  Requires-Dist: pyairtable; extra == "airtable"
35
35
  Provides-Extra: astradb
@@ -42,11 +42,11 @@ Requires-Dist: azure-search-documents; extra == "azure-ai-search"
42
42
  Provides-Extra: bedrock
43
43
  Requires-Dist: boto3; extra == "bedrock"
44
44
  Provides-Extra: biomed
45
- Requires-Dist: bs4; extra == "biomed"
46
45
  Requires-Dist: requests; extra == "biomed"
46
+ Requires-Dist: bs4; extra == "biomed"
47
47
  Provides-Extra: box
48
- Requires-Dist: fsspec; extra == "box"
49
48
  Requires-Dist: boxfs; extra == "box"
49
+ Requires-Dist: fsspec; extra == "box"
50
50
  Provides-Extra: chroma
51
51
  Requires-Dist: chromadb; extra == "chroma"
52
52
  Provides-Extra: clarifai
@@ -90,9 +90,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
90
90
  Provides-Extra: epub
91
91
  Requires-Dist: unstructured[epub]; extra == "epub"
92
92
  Provides-Extra: gcs
93
+ Requires-Dist: gcsfs; extra == "gcs"
93
94
  Requires-Dist: bs4; extra == "gcs"
94
95
  Requires-Dist: fsspec; extra == "gcs"
95
- Requires-Dist: gcsfs; extra == "gcs"
96
96
  Provides-Extra: github
97
97
  Requires-Dist: pygithub>1.58.0; extra == "github"
98
98
  Requires-Dist: requests; extra == "github"
@@ -101,8 +101,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
101
101
  Provides-Extra: google-drive
102
102
  Requires-Dist: google-api-python-client; extra == "google-drive"
103
103
  Provides-Extra: hubspot
104
- Requires-Dist: hubspot-api-client; extra == "hubspot"
105
104
  Requires-Dist: urllib3; extra == "hubspot"
105
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
106
106
  Provides-Extra: jira
107
107
  Requires-Dist: atlassian-python-api; extra == "jira"
108
108
  Provides-Extra: kafka
@@ -120,20 +120,20 @@ Requires-Dist: pymongo; extra == "mongodb"
120
120
  Provides-Extra: msg
121
121
  Requires-Dist: unstructured[msg]; extra == "msg"
122
122
  Provides-Extra: neo4j
123
- Requires-Dist: cymple; extra == "neo4j"
124
123
  Requires-Dist: neo4j; extra == "neo4j"
125
124
  Requires-Dist: networkx; extra == "neo4j"
125
+ Requires-Dist: cymple; extra == "neo4j"
126
126
  Provides-Extra: notion
127
- Requires-Dist: htmlBuilder; extra == "notion"
128
127
  Requires-Dist: backoff; extra == "notion"
129
- Requires-Dist: notion-client; extra == "notion"
128
+ Requires-Dist: htmlBuilder; extra == "notion"
130
129
  Requires-Dist: httpx; extra == "notion"
130
+ Requires-Dist: notion-client; extra == "notion"
131
131
  Provides-Extra: odt
132
132
  Requires-Dist: unstructured[odt]; extra == "odt"
133
133
  Provides-Extra: onedrive
134
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
135
134
  Requires-Dist: bs4; extra == "onedrive"
136
135
  Requires-Dist: msal; extra == "onedrive"
136
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
137
137
  Provides-Extra: openai
138
138
  Requires-Dist: openai; extra == "openai"
139
139
  Requires-Dist: tiktoken; extra == "openai"
@@ -142,8 +142,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
142
142
  Provides-Extra: org
143
143
  Requires-Dist: unstructured[org]; extra == "org"
144
144
  Provides-Extra: outlook
145
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
146
145
  Requires-Dist: msal; extra == "outlook"
146
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
147
147
  Provides-Extra: pdf
148
148
  Requires-Dist: unstructured[pdf]; extra == "pdf"
149
149
  Provides-Extra: pinecone
@@ -158,6 +158,8 @@ Provides-Extra: qdrant
158
158
  Requires-Dist: qdrant-client; extra == "qdrant"
159
159
  Provides-Extra: reddit
160
160
  Requires-Dist: praw; extra == "reddit"
161
+ Provides-Extra: redis
162
+ Requires-Dist: redis; extra == "redis"
161
163
  Provides-Extra: remote
162
164
  Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
163
165
  Provides-Extra: rst
@@ -170,11 +172,11 @@ Requires-Dist: fsspec; extra == "s3"
170
172
  Provides-Extra: salesforce
171
173
  Requires-Dist: simple-salesforce; extra == "salesforce"
172
174
  Provides-Extra: sftp
173
- Requires-Dist: fsspec; extra == "sftp"
174
175
  Requires-Dist: paramiko; extra == "sftp"
176
+ Requires-Dist: fsspec; extra == "sftp"
175
177
  Provides-Extra: sharepoint
176
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
177
178
  Requires-Dist: msal; extra == "sharepoint"
179
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
178
180
  Provides-Extra: singlestore
179
181
  Requires-Dist: singlestoredb; extra == "singlestore"
180
182
  Provides-Extra: slack
@@ -187,6 +189,8 @@ Requires-Dist: together; extra == "togetherai"
187
189
  Provides-Extra: tsv
188
190
  Requires-Dist: unstructured[tsv]; extra == "tsv"
189
191
  Provides-Extra: vectara
192
+ Requires-Dist: httpx; extra == "vectara"
193
+ Requires-Dist: aiofiles; extra == "vectara"
190
194
  Requires-Dist: requests; extra == "vectara"
191
195
  Provides-Extra: weaviate
192
196
  Requires-Dist: weaviate-client; extra == "weaviate"