unstructured-ingest 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (45) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/interfaces.py +5 -5
  3. unstructured_ingest/embed/__init__.py +0 -17
  4. unstructured_ingest/embed/bedrock.py +56 -19
  5. unstructured_ingest/embed/huggingface.py +22 -22
  6. unstructured_ingest/embed/interfaces.py +11 -4
  7. unstructured_ingest/embed/mixedbreadai.py +17 -17
  8. unstructured_ingest/embed/octoai.py +7 -7
  9. unstructured_ingest/embed/openai.py +15 -20
  10. unstructured_ingest/embed/vertexai.py +26 -18
  11. unstructured_ingest/embed/voyageai.py +25 -20
  12. unstructured_ingest/interfaces.py +5 -5
  13. unstructured_ingest/v2/cli/base/cmd.py +1 -1
  14. unstructured_ingest/v2/interfaces/connector.py +1 -1
  15. unstructured_ingest/v2/pipeline/pipeline.py +3 -1
  16. unstructured_ingest/v2/pipeline/steps/chunk.py +1 -1
  17. unstructured_ingest/v2/pipeline/steps/download.py +6 -2
  18. unstructured_ingest/v2/pipeline/steps/embed.py +1 -1
  19. unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  20. unstructured_ingest/v2/pipeline/steps/index.py +4 -2
  21. unstructured_ingest/v2/pipeline/steps/partition.py +1 -1
  22. unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
  23. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  24. unstructured_ingest/v2/pipeline/steps/upload.py +6 -2
  25. unstructured_ingest/v2/processes/connectors/airtable.py +1 -1
  26. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +1 -1
  27. unstructured_ingest/v2/processes/connectors/elasticsearch.py +2 -2
  28. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +31 -5
  29. unstructured_ingest/v2/processes/connectors/fsspec/box.py +31 -2
  30. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +36 -8
  31. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +25 -77
  32. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +30 -1
  33. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +15 -18
  34. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +22 -1
  35. unstructured_ingest/v2/processes/connectors/milvus.py +2 -2
  36. unstructured_ingest/v2/processes/connectors/opensearch.py +2 -2
  37. unstructured_ingest/v2/processes/embedder.py +10 -10
  38. unstructured_ingest/v2/utils.py +1 -1
  39. unstructured_ingest-0.0.23.dist-info/METADATA +186 -0
  40. {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/RECORD +44 -44
  41. {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/WHEEL +1 -1
  42. unstructured_ingest-0.0.21.dist-info/METADATA +0 -639
  43. {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/LICENSE.md +0 -0
  44. {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/entry_points.txt +0 -0
  45. {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/top_level.txt +0 -0
@@ -1,639 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: unstructured-ingest
3
- Version: 0.0.21
4
- Summary: A library that prepares raw documents for downstream ML tasks.
5
- Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
- Author: Unstructured Technologies
7
- Author-email: devops@unstructuredai.io
8
- License: Apache-2.0
9
- Keywords: NLP PDF HTML CV XML parsing preprocessing
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Intended Audience :: Education
13
- Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: Apache Software License
15
- Classifier: Operating System :: OS Independent
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
- Requires-Python: >=3.9.0,<3.13
23
- Description-Content-Type: text/markdown
24
- License-File: LICENSE.md
25
- Requires-Dist: tqdm
26
- Requires-Dist: dataclasses-json
27
- Requires-Dist: pydantic>=2.7
28
- Requires-Dist: pandas
29
- Requires-Dist: python-dateutil
30
- Requires-Dist: opentelemetry-sdk
31
- Requires-Dist: click
32
- Provides-Extra: airtable
33
- Requires-Dist: pyairtable; extra == "airtable"
34
- Requires-Dist: tqdm; extra == "airtable"
35
- Requires-Dist: dataclasses-json; extra == "airtable"
36
- Requires-Dist: pydantic>=2.7; extra == "airtable"
37
- Requires-Dist: pandas; extra == "airtable"
38
- Requires-Dist: python-dateutil; extra == "airtable"
39
- Requires-Dist: opentelemetry-sdk; extra == "airtable"
40
- Requires-Dist: click; extra == "airtable"
41
- Provides-Extra: astradb
42
- Requires-Dist: tqdm; extra == "astradb"
43
- Requires-Dist: dataclasses-json; extra == "astradb"
44
- Requires-Dist: pydantic>=2.7; extra == "astradb"
45
- Requires-Dist: pandas; extra == "astradb"
46
- Requires-Dist: astrapy; extra == "astradb"
47
- Requires-Dist: python-dateutil; extra == "astradb"
48
- Requires-Dist: opentelemetry-sdk; extra == "astradb"
49
- Requires-Dist: click; extra == "astradb"
50
- Provides-Extra: azure
51
- Requires-Dist: tqdm; extra == "azure"
52
- Requires-Dist: dataclasses-json; extra == "azure"
53
- Requires-Dist: pydantic>=2.7; extra == "azure"
54
- Requires-Dist: pandas; extra == "azure"
55
- Requires-Dist: python-dateutil; extra == "azure"
56
- Requires-Dist: opentelemetry-sdk; extra == "azure"
57
- Requires-Dist: click; extra == "azure"
58
- Requires-Dist: adlfs; extra == "azure"
59
- Requires-Dist: fsspec; extra == "azure"
60
- Provides-Extra: azure-cognitive-search
61
- Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
62
- Requires-Dist: tqdm; extra == "azure-cognitive-search"
63
- Requires-Dist: dataclasses-json; extra == "azure-cognitive-search"
64
- Requires-Dist: pydantic>=2.7; extra == "azure-cognitive-search"
65
- Requires-Dist: pandas; extra == "azure-cognitive-search"
66
- Requires-Dist: python-dateutil; extra == "azure-cognitive-search"
67
- Requires-Dist: opentelemetry-sdk; extra == "azure-cognitive-search"
68
- Requires-Dist: click; extra == "azure-cognitive-search"
69
- Provides-Extra: bedrock
70
- Requires-Dist: langchain-community; extra == "bedrock"
71
- Requires-Dist: pydantic>=2.7; extra == "bedrock"
72
- Requires-Dist: tqdm; extra == "bedrock"
73
- Requires-Dist: dataclasses-json; extra == "bedrock"
74
- Requires-Dist: pandas; extra == "bedrock"
75
- Requires-Dist: python-dateutil; extra == "bedrock"
76
- Requires-Dist: opentelemetry-sdk; extra == "bedrock"
77
- Requires-Dist: click; extra == "bedrock"
78
- Requires-Dist: boto3; extra == "bedrock"
79
- Provides-Extra: biomed
80
- Requires-Dist: bs4; extra == "biomed"
81
- Requires-Dist: tqdm; extra == "biomed"
82
- Requires-Dist: dataclasses-json; extra == "biomed"
83
- Requires-Dist: pydantic>=2.7; extra == "biomed"
84
- Requires-Dist: requests; extra == "biomed"
85
- Requires-Dist: pandas; extra == "biomed"
86
- Requires-Dist: python-dateutil; extra == "biomed"
87
- Requires-Dist: opentelemetry-sdk; extra == "biomed"
88
- Requires-Dist: click; extra == "biomed"
89
- Provides-Extra: box
90
- Requires-Dist: tqdm; extra == "box"
91
- Requires-Dist: dataclasses-json; extra == "box"
92
- Requires-Dist: pydantic>=2.7; extra == "box"
93
- Requires-Dist: pandas; extra == "box"
94
- Requires-Dist: python-dateutil; extra == "box"
95
- Requires-Dist: opentelemetry-sdk; extra == "box"
96
- Requires-Dist: click; extra == "box"
97
- Requires-Dist: fsspec; extra == "box"
98
- Requires-Dist: boxfs; extra == "box"
99
- Provides-Extra: chroma
100
- Requires-Dist: tqdm; extra == "chroma"
101
- Requires-Dist: dataclasses-json; extra == "chroma"
102
- Requires-Dist: pydantic>=2.7; extra == "chroma"
103
- Requires-Dist: pandas; extra == "chroma"
104
- Requires-Dist: python-dateutil; extra == "chroma"
105
- Requires-Dist: opentelemetry-sdk; extra == "chroma"
106
- Requires-Dist: click; extra == "chroma"
107
- Requires-Dist: chromadb; extra == "chroma"
108
- Provides-Extra: clarifai
109
- Requires-Dist: tqdm; extra == "clarifai"
110
- Requires-Dist: dataclasses-json; extra == "clarifai"
111
- Requires-Dist: pydantic>=2.7; extra == "clarifai"
112
- Requires-Dist: clarifai; extra == "clarifai"
113
- Requires-Dist: pandas; extra == "clarifai"
114
- Requires-Dist: python-dateutil; extra == "clarifai"
115
- Requires-Dist: opentelemetry-sdk; extra == "clarifai"
116
- Requires-Dist: click; extra == "clarifai"
117
- Provides-Extra: confluence
118
- Requires-Dist: click; extra == "confluence"
119
- Requires-Dist: tqdm; extra == "confluence"
120
- Requires-Dist: dataclasses-json; extra == "confluence"
121
- Requires-Dist: pydantic>=2.7; extra == "confluence"
122
- Requires-Dist: requests; extra == "confluence"
123
- Requires-Dist: pandas; extra == "confluence"
124
- Requires-Dist: python-dateutil; extra == "confluence"
125
- Requires-Dist: opentelemetry-sdk; extra == "confluence"
126
- Requires-Dist: atlassian-python-api; extra == "confluence"
127
- Provides-Extra: couchbase
128
- Requires-Dist: pydantic>=2.7; extra == "couchbase"
129
- Requires-Dist: tqdm; extra == "couchbase"
130
- Requires-Dist: dataclasses-json; extra == "couchbase"
131
- Requires-Dist: couchbase; extra == "couchbase"
132
- Requires-Dist: pandas; extra == "couchbase"
133
- Requires-Dist: python-dateutil; extra == "couchbase"
134
- Requires-Dist: opentelemetry-sdk; extra == "couchbase"
135
- Requires-Dist: click; extra == "couchbase"
136
- Provides-Extra: csv
137
- Requires-Dist: unstructured[tsv]; extra == "csv"
138
- Requires-Dist: tqdm; extra == "csv"
139
- Requires-Dist: dataclasses-json; extra == "csv"
140
- Requires-Dist: pydantic>=2.7; extra == "csv"
141
- Requires-Dist: pandas; extra == "csv"
142
- Requires-Dist: python-dateutil; extra == "csv"
143
- Requires-Dist: opentelemetry-sdk; extra == "csv"
144
- Requires-Dist: click; extra == "csv"
145
- Provides-Extra: databricks-volumes
146
- Requires-Dist: databricks-sdk; extra == "databricks-volumes"
147
- Requires-Dist: tqdm; extra == "databricks-volumes"
148
- Requires-Dist: dataclasses-json; extra == "databricks-volumes"
149
- Requires-Dist: pydantic>=2.7; extra == "databricks-volumes"
150
- Requires-Dist: pandas; extra == "databricks-volumes"
151
- Requires-Dist: python-dateutil; extra == "databricks-volumes"
152
- Requires-Dist: opentelemetry-sdk; extra == "databricks-volumes"
153
- Requires-Dist: click; extra == "databricks-volumes"
154
- Provides-Extra: delta-table
155
- Requires-Dist: deltalake; extra == "delta-table"
156
- Requires-Dist: tqdm; extra == "delta-table"
157
- Requires-Dist: dataclasses-json; extra == "delta-table"
158
- Requires-Dist: pydantic>=2.7; extra == "delta-table"
159
- Requires-Dist: pandas; extra == "delta-table"
160
- Requires-Dist: python-dateutil; extra == "delta-table"
161
- Requires-Dist: opentelemetry-sdk; extra == "delta-table"
162
- Requires-Dist: click; extra == "delta-table"
163
- Requires-Dist: fsspec; extra == "delta-table"
164
- Provides-Extra: discord
165
- Requires-Dist: tqdm; extra == "discord"
166
- Requires-Dist: dataclasses-json; extra == "discord"
167
- Requires-Dist: pydantic>=2.7; extra == "discord"
168
- Requires-Dist: discord-py; extra == "discord"
169
- Requires-Dist: pandas; extra == "discord"
170
- Requires-Dist: python-dateutil; extra == "discord"
171
- Requires-Dist: opentelemetry-sdk; extra == "discord"
172
- Requires-Dist: click; extra == "discord"
173
- Provides-Extra: doc
174
- Requires-Dist: tqdm; extra == "doc"
175
- Requires-Dist: dataclasses-json; extra == "doc"
176
- Requires-Dist: pydantic>=2.7; extra == "doc"
177
- Requires-Dist: unstructured[docx]; extra == "doc"
178
- Requires-Dist: pandas; extra == "doc"
179
- Requires-Dist: python-dateutil; extra == "doc"
180
- Requires-Dist: opentelemetry-sdk; extra == "doc"
181
- Requires-Dist: click; extra == "doc"
182
- Provides-Extra: docx
183
- Requires-Dist: tqdm; extra == "docx"
184
- Requires-Dist: dataclasses-json; extra == "docx"
185
- Requires-Dist: pydantic>=2.7; extra == "docx"
186
- Requires-Dist: unstructured[docx]; extra == "docx"
187
- Requires-Dist: pandas; extra == "docx"
188
- Requires-Dist: python-dateutil; extra == "docx"
189
- Requires-Dist: opentelemetry-sdk; extra == "docx"
190
- Requires-Dist: click; extra == "docx"
191
- Provides-Extra: dropbox
192
- Requires-Dist: pydantic>=2.7; extra == "dropbox"
193
- Requires-Dist: tqdm; extra == "dropbox"
194
- Requires-Dist: dataclasses-json; extra == "dropbox"
195
- Requires-Dist: pandas; extra == "dropbox"
196
- Requires-Dist: python-dateutil; extra == "dropbox"
197
- Requires-Dist: opentelemetry-sdk; extra == "dropbox"
198
- Requires-Dist: click; extra == "dropbox"
199
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
200
- Requires-Dist: fsspec; extra == "dropbox"
201
- Provides-Extra: elasticsearch
202
- Requires-Dist: click; extra == "elasticsearch"
203
- Requires-Dist: tqdm; extra == "elasticsearch"
204
- Requires-Dist: dataclasses-json; extra == "elasticsearch"
205
- Requires-Dist: pydantic>=2.7; extra == "elasticsearch"
206
- Requires-Dist: pandas; extra == "elasticsearch"
207
- Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
208
- Requires-Dist: python-dateutil; extra == "elasticsearch"
209
- Requires-Dist: opentelemetry-sdk; extra == "elasticsearch"
210
- Provides-Extra: embed-huggingface
211
- Requires-Dist: tqdm; extra == "embed-huggingface"
212
- Requires-Dist: dataclasses-json; extra == "embed-huggingface"
213
- Requires-Dist: pydantic>=2.7; extra == "embed-huggingface"
214
- Requires-Dist: pandas; extra == "embed-huggingface"
215
- Requires-Dist: python-dateutil; extra == "embed-huggingface"
216
- Requires-Dist: opentelemetry-sdk; extra == "embed-huggingface"
217
- Requires-Dist: click; extra == "embed-huggingface"
218
- Requires-Dist: langchain-huggingface; extra == "embed-huggingface"
219
- Provides-Extra: embed-mixedbreadai
220
- Requires-Dist: tqdm; extra == "embed-mixedbreadai"
221
- Requires-Dist: dataclasses-json; extra == "embed-mixedbreadai"
222
- Requires-Dist: pydantic>=2.7; extra == "embed-mixedbreadai"
223
- Requires-Dist: pandas; extra == "embed-mixedbreadai"
224
- Requires-Dist: python-dateutil; extra == "embed-mixedbreadai"
225
- Requires-Dist: opentelemetry-sdk; extra == "embed-mixedbreadai"
226
- Requires-Dist: click; extra == "embed-mixedbreadai"
227
- Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
228
- Provides-Extra: embed-octoai
229
- Requires-Dist: click; extra == "embed-octoai"
230
- Requires-Dist: openai; extra == "embed-octoai"
231
- Requires-Dist: tqdm; extra == "embed-octoai"
232
- Requires-Dist: dataclasses-json; extra == "embed-octoai"
233
- Requires-Dist: pydantic>=2.7; extra == "embed-octoai"
234
- Requires-Dist: pandas; extra == "embed-octoai"
235
- Requires-Dist: tiktoken; extra == "embed-octoai"
236
- Requires-Dist: python-dateutil; extra == "embed-octoai"
237
- Requires-Dist: opentelemetry-sdk; extra == "embed-octoai"
238
- Provides-Extra: embed-vertexai
239
- Requires-Dist: langchain-google-vertexai; extra == "embed-vertexai"
240
- Requires-Dist: langchain-community; extra == "embed-vertexai"
241
- Requires-Dist: tqdm; extra == "embed-vertexai"
242
- Requires-Dist: dataclasses-json; extra == "embed-vertexai"
243
- Requires-Dist: pydantic>=2.7; extra == "embed-vertexai"
244
- Requires-Dist: langchain; extra == "embed-vertexai"
245
- Requires-Dist: pandas; extra == "embed-vertexai"
246
- Requires-Dist: python-dateutil; extra == "embed-vertexai"
247
- Requires-Dist: opentelemetry-sdk; extra == "embed-vertexai"
248
- Requires-Dist: click; extra == "embed-vertexai"
249
- Provides-Extra: embed-voyageai
250
- Requires-Dist: langchain-voyageai; extra == "embed-voyageai"
251
- Requires-Dist: tqdm; extra == "embed-voyageai"
252
- Requires-Dist: dataclasses-json; extra == "embed-voyageai"
253
- Requires-Dist: pydantic>=2.7; extra == "embed-voyageai"
254
- Requires-Dist: langchain; extra == "embed-voyageai"
255
- Requires-Dist: pandas; extra == "embed-voyageai"
256
- Requires-Dist: python-dateutil; extra == "embed-voyageai"
257
- Requires-Dist: opentelemetry-sdk; extra == "embed-voyageai"
258
- Requires-Dist: click; extra == "embed-voyageai"
259
- Provides-Extra: epub
260
- Requires-Dist: unstructured[epub]; extra == "epub"
261
- Requires-Dist: tqdm; extra == "epub"
262
- Requires-Dist: dataclasses-json; extra == "epub"
263
- Requires-Dist: pydantic>=2.7; extra == "epub"
264
- Requires-Dist: pandas; extra == "epub"
265
- Requires-Dist: python-dateutil; extra == "epub"
266
- Requires-Dist: opentelemetry-sdk; extra == "epub"
267
- Requires-Dist: click; extra == "epub"
268
- Provides-Extra: gcs
269
- Requires-Dist: gcsfs; extra == "gcs"
270
- Requires-Dist: bs4; extra == "gcs"
271
- Requires-Dist: tqdm; extra == "gcs"
272
- Requires-Dist: dataclasses-json; extra == "gcs"
273
- Requires-Dist: pydantic>=2.7; extra == "gcs"
274
- Requires-Dist: pandas; extra == "gcs"
275
- Requires-Dist: python-dateutil; extra == "gcs"
276
- Requires-Dist: opentelemetry-sdk; extra == "gcs"
277
- Requires-Dist: click; extra == "gcs"
278
- Requires-Dist: fsspec; extra == "gcs"
279
- Provides-Extra: github
280
- Requires-Dist: pydantic>=2.7; extra == "github"
281
- Requires-Dist: tqdm; extra == "github"
282
- Requires-Dist: dataclasses-json; extra == "github"
283
- Requires-Dist: requests; extra == "github"
284
- Requires-Dist: pandas; extra == "github"
285
- Requires-Dist: python-dateutil; extra == "github"
286
- Requires-Dist: opentelemetry-sdk; extra == "github"
287
- Requires-Dist: click; extra == "github"
288
- Requires-Dist: pygithub>1.58.0; extra == "github"
289
- Provides-Extra: gitlab
290
- Requires-Dist: tqdm; extra == "gitlab"
291
- Requires-Dist: dataclasses-json; extra == "gitlab"
292
- Requires-Dist: pydantic>=2.7; extra == "gitlab"
293
- Requires-Dist: pandas; extra == "gitlab"
294
- Requires-Dist: python-dateutil; extra == "gitlab"
295
- Requires-Dist: opentelemetry-sdk; extra == "gitlab"
296
- Requires-Dist: click; extra == "gitlab"
297
- Requires-Dist: python-gitlab; extra == "gitlab"
298
- Provides-Extra: google-drive
299
- Requires-Dist: google-api-python-client; extra == "google-drive"
300
- Requires-Dist: tqdm; extra == "google-drive"
301
- Requires-Dist: dataclasses-json; extra == "google-drive"
302
- Requires-Dist: pydantic>=2.7; extra == "google-drive"
303
- Requires-Dist: pandas; extra == "google-drive"
304
- Requires-Dist: python-dateutil; extra == "google-drive"
305
- Requires-Dist: opentelemetry-sdk; extra == "google-drive"
306
- Requires-Dist: click; extra == "google-drive"
307
- Provides-Extra: hubspot
308
- Requires-Dist: tqdm; extra == "hubspot"
309
- Requires-Dist: dataclasses-json; extra == "hubspot"
310
- Requires-Dist: urllib3; extra == "hubspot"
311
- Requires-Dist: pydantic>=2.7; extra == "hubspot"
312
- Requires-Dist: pandas; extra == "hubspot"
313
- Requires-Dist: hubspot-api-client; extra == "hubspot"
314
- Requires-Dist: python-dateutil; extra == "hubspot"
315
- Requires-Dist: opentelemetry-sdk; extra == "hubspot"
316
- Requires-Dist: click; extra == "hubspot"
317
- Provides-Extra: jira
318
- Requires-Dist: click; extra == "jira"
319
- Requires-Dist: tqdm; extra == "jira"
320
- Requires-Dist: dataclasses-json; extra == "jira"
321
- Requires-Dist: pydantic>=2.7; extra == "jira"
322
- Requires-Dist: pandas; extra == "jira"
323
- Requires-Dist: python-dateutil; extra == "jira"
324
- Requires-Dist: opentelemetry-sdk; extra == "jira"
325
- Requires-Dist: atlassian-python-api; extra == "jira"
326
- Provides-Extra: kafka
327
- Requires-Dist: tqdm; extra == "kafka"
328
- Requires-Dist: dataclasses-json; extra == "kafka"
329
- Requires-Dist: pydantic>=2.7; extra == "kafka"
330
- Requires-Dist: pandas; extra == "kafka"
331
- Requires-Dist: python-dateutil; extra == "kafka"
332
- Requires-Dist: opentelemetry-sdk; extra == "kafka"
333
- Requires-Dist: click; extra == "kafka"
334
- Requires-Dist: confluent-kafka; extra == "kafka"
335
- Provides-Extra: kdbai
336
- Requires-Dist: kdbai-client; extra == "kdbai"
337
- Provides-Extra: md
338
- Requires-Dist: tqdm; extra == "md"
339
- Requires-Dist: dataclasses-json; extra == "md"
340
- Requires-Dist: unstructured[md]; extra == "md"
341
- Requires-Dist: pydantic>=2.7; extra == "md"
342
- Requires-Dist: pandas; extra == "md"
343
- Requires-Dist: python-dateutil; extra == "md"
344
- Requires-Dist: opentelemetry-sdk; extra == "md"
345
- Requires-Dist: click; extra == "md"
346
- Provides-Extra: milvus
347
- Requires-Dist: tqdm; extra == "milvus"
348
- Requires-Dist: dataclasses-json; extra == "milvus"
349
- Requires-Dist: pydantic>=2.7; extra == "milvus"
350
- Requires-Dist: pandas; extra == "milvus"
351
- Requires-Dist: python-dateutil; extra == "milvus"
352
- Requires-Dist: opentelemetry-sdk; extra == "milvus"
353
- Requires-Dist: click; extra == "milvus"
354
- Requires-Dist: pymilvus; extra == "milvus"
355
- Provides-Extra: mongodb
356
- Requires-Dist: pymongo; extra == "mongodb"
357
- Requires-Dist: tqdm; extra == "mongodb"
358
- Requires-Dist: dataclasses-json; extra == "mongodb"
359
- Requires-Dist: pydantic>=2.7; extra == "mongodb"
360
- Requires-Dist: pandas; extra == "mongodb"
361
- Requires-Dist: python-dateutil; extra == "mongodb"
362
- Requires-Dist: opentelemetry-sdk; extra == "mongodb"
363
- Requires-Dist: click; extra == "mongodb"
364
- Provides-Extra: msg
365
- Requires-Dist: unstructured[msg]; extra == "msg"
366
- Requires-Dist: tqdm; extra == "msg"
367
- Requires-Dist: dataclasses-json; extra == "msg"
368
- Requires-Dist: pydantic>=2.7; extra == "msg"
369
- Requires-Dist: pandas; extra == "msg"
370
- Requires-Dist: python-dateutil; extra == "msg"
371
- Requires-Dist: opentelemetry-sdk; extra == "msg"
372
- Requires-Dist: click; extra == "msg"
373
- Provides-Extra: notion
374
- Requires-Dist: notion-client; extra == "notion"
375
- Requires-Dist: httpx; extra == "notion"
376
- Requires-Dist: click; extra == "notion"
377
- Requires-Dist: tqdm; extra == "notion"
378
- Requires-Dist: dataclasses-json; extra == "notion"
379
- Requires-Dist: pydantic>=2.7; extra == "notion"
380
- Requires-Dist: pandas; extra == "notion"
381
- Requires-Dist: python-dateutil; extra == "notion"
382
- Requires-Dist: backoff; extra == "notion"
383
- Requires-Dist: opentelemetry-sdk; extra == "notion"
384
- Requires-Dist: htmlBuilder; extra == "notion"
385
- Provides-Extra: odt
386
- Requires-Dist: tqdm; extra == "odt"
387
- Requires-Dist: dataclasses-json; extra == "odt"
388
- Requires-Dist: pydantic>=2.7; extra == "odt"
389
- Requires-Dist: pandas; extra == "odt"
390
- Requires-Dist: python-dateutil; extra == "odt"
391
- Requires-Dist: opentelemetry-sdk; extra == "odt"
392
- Requires-Dist: click; extra == "odt"
393
- Requires-Dist: unstructured[odt]; extra == "odt"
394
- Provides-Extra: onedrive
395
- Requires-Dist: bs4; extra == "onedrive"
396
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
397
- Requires-Dist: tqdm; extra == "onedrive"
398
- Requires-Dist: dataclasses-json; extra == "onedrive"
399
- Requires-Dist: msal; extra == "onedrive"
400
- Requires-Dist: pydantic>=2.7; extra == "onedrive"
401
- Requires-Dist: pandas; extra == "onedrive"
402
- Requires-Dist: python-dateutil; extra == "onedrive"
403
- Requires-Dist: opentelemetry-sdk; extra == "onedrive"
404
- Requires-Dist: click; extra == "onedrive"
405
- Provides-Extra: openai
406
- Requires-Dist: click; extra == "openai"
407
- Requires-Dist: openai; extra == "openai"
408
- Requires-Dist: langchain-community; extra == "openai"
409
- Requires-Dist: tqdm; extra == "openai"
410
- Requires-Dist: dataclasses-json; extra == "openai"
411
- Requires-Dist: pydantic>=2.7; extra == "openai"
412
- Requires-Dist: pandas; extra == "openai"
413
- Requires-Dist: tiktoken; extra == "openai"
414
- Requires-Dist: python-dateutil; extra == "openai"
415
- Requires-Dist: opentelemetry-sdk; extra == "openai"
416
- Provides-Extra: opensearch
417
- Requires-Dist: tqdm; extra == "opensearch"
418
- Requires-Dist: dataclasses-json; extra == "opensearch"
419
- Requires-Dist: pydantic>=2.7; extra == "opensearch"
420
- Requires-Dist: opensearch-py; extra == "opensearch"
421
- Requires-Dist: pandas; extra == "opensearch"
422
- Requires-Dist: python-dateutil; extra == "opensearch"
423
- Requires-Dist: opentelemetry-sdk; extra == "opensearch"
424
- Requires-Dist: click; extra == "opensearch"
425
- Provides-Extra: org
426
- Requires-Dist: tqdm; extra == "org"
427
- Requires-Dist: dataclasses-json; extra == "org"
428
- Requires-Dist: pydantic>=2.7; extra == "org"
429
- Requires-Dist: unstructured[org]; extra == "org"
430
- Requires-Dist: pandas; extra == "org"
431
- Requires-Dist: python-dateutil; extra == "org"
432
- Requires-Dist: opentelemetry-sdk; extra == "org"
433
- Requires-Dist: click; extra == "org"
434
- Provides-Extra: outlook
435
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
436
- Requires-Dist: tqdm; extra == "outlook"
437
- Requires-Dist: dataclasses-json; extra == "outlook"
438
- Requires-Dist: msal; extra == "outlook"
439
- Requires-Dist: pydantic>=2.7; extra == "outlook"
440
- Requires-Dist: pandas; extra == "outlook"
441
- Requires-Dist: python-dateutil; extra == "outlook"
442
- Requires-Dist: opentelemetry-sdk; extra == "outlook"
443
- Requires-Dist: click; extra == "outlook"
444
- Provides-Extra: pdf
445
- Requires-Dist: tqdm; extra == "pdf"
446
- Requires-Dist: unstructured[pdf]; extra == "pdf"
447
- Requires-Dist: dataclasses-json; extra == "pdf"
448
- Requires-Dist: pydantic>=2.7; extra == "pdf"
449
- Requires-Dist: pandas; extra == "pdf"
450
- Requires-Dist: python-dateutil; extra == "pdf"
451
- Requires-Dist: opentelemetry-sdk; extra == "pdf"
452
- Requires-Dist: click; extra == "pdf"
453
- Provides-Extra: pinecone
454
- Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
455
- Requires-Dist: tqdm; extra == "pinecone"
456
- Requires-Dist: dataclasses-json; extra == "pinecone"
457
- Requires-Dist: pydantic>=2.7; extra == "pinecone"
458
- Requires-Dist: pandas; extra == "pinecone"
459
- Requires-Dist: python-dateutil; extra == "pinecone"
460
- Requires-Dist: opentelemetry-sdk; extra == "pinecone"
461
- Requires-Dist: click; extra == "pinecone"
462
- Provides-Extra: postgres
463
- Requires-Dist: click; extra == "postgres"
464
- Requires-Dist: tqdm; extra == "postgres"
465
- Requires-Dist: dataclasses-json; extra == "postgres"
466
- Requires-Dist: pydantic>=2.7; extra == "postgres"
467
- Requires-Dist: pandas; extra == "postgres"
468
- Requires-Dist: psycopg2-binary; extra == "postgres"
469
- Requires-Dist: python-dateutil; extra == "postgres"
470
- Requires-Dist: opentelemetry-sdk; extra == "postgres"
471
- Provides-Extra: ppt
472
- Requires-Dist: tqdm; extra == "ppt"
473
- Requires-Dist: dataclasses-json; extra == "ppt"
474
- Requires-Dist: unstructured[pptx]; extra == "ppt"
475
- Requires-Dist: pydantic>=2.7; extra == "ppt"
476
- Requires-Dist: pandas; extra == "ppt"
477
- Requires-Dist: python-dateutil; extra == "ppt"
478
- Requires-Dist: opentelemetry-sdk; extra == "ppt"
479
- Requires-Dist: click; extra == "ppt"
480
- Provides-Extra: pptx
481
- Requires-Dist: tqdm; extra == "pptx"
482
- Requires-Dist: dataclasses-json; extra == "pptx"
483
- Requires-Dist: unstructured[pptx]; extra == "pptx"
484
- Requires-Dist: pydantic>=2.7; extra == "pptx"
485
- Requires-Dist: pandas; extra == "pptx"
486
- Requires-Dist: python-dateutil; extra == "pptx"
487
- Requires-Dist: opentelemetry-sdk; extra == "pptx"
488
- Requires-Dist: click; extra == "pptx"
489
- Provides-Extra: qdrant
490
- Requires-Dist: tqdm; extra == "qdrant"
491
- Requires-Dist: dataclasses-json; extra == "qdrant"
492
- Requires-Dist: pydantic>=2.7; extra == "qdrant"
493
- Requires-Dist: qdrant-client; extra == "qdrant"
494
- Requires-Dist: pandas; extra == "qdrant"
495
- Requires-Dist: python-dateutil; extra == "qdrant"
496
- Requires-Dist: opentelemetry-sdk; extra == "qdrant"
497
- Requires-Dist: click; extra == "qdrant"
498
- Provides-Extra: reddit
499
- Requires-Dist: tqdm; extra == "reddit"
500
- Requires-Dist: dataclasses-json; extra == "reddit"
501
- Requires-Dist: pydantic>=2.7; extra == "reddit"
502
- Requires-Dist: praw; extra == "reddit"
503
- Requires-Dist: pandas; extra == "reddit"
504
- Requires-Dist: python-dateutil; extra == "reddit"
505
- Requires-Dist: opentelemetry-sdk; extra == "reddit"
506
- Requires-Dist: click; extra == "reddit"
507
- Provides-Extra: remote
508
- Requires-Dist: tqdm; extra == "remote"
509
- Requires-Dist: dataclasses-json; extra == "remote"
510
- Requires-Dist: pydantic>=2.7; extra == "remote"
511
- Requires-Dist: unstructured-client>=0.25.8; extra == "remote"
512
- Requires-Dist: pandas; extra == "remote"
513
- Requires-Dist: python-dateutil; extra == "remote"
514
- Requires-Dist: opentelemetry-sdk; extra == "remote"
515
- Requires-Dist: click; extra == "remote"
516
- Provides-Extra: rst
517
- Requires-Dist: unstructured[rst]; extra == "rst"
518
- Requires-Dist: tqdm; extra == "rst"
519
- Requires-Dist: dataclasses-json; extra == "rst"
520
- Requires-Dist: pydantic>=2.7; extra == "rst"
521
- Requires-Dist: pandas; extra == "rst"
522
- Requires-Dist: python-dateutil; extra == "rst"
523
- Requires-Dist: opentelemetry-sdk; extra == "rst"
524
- Requires-Dist: click; extra == "rst"
525
- Provides-Extra: rtf
526
- Requires-Dist: tqdm; extra == "rtf"
527
- Requires-Dist: dataclasses-json; extra == "rtf"
528
- Requires-Dist: pydantic>=2.7; extra == "rtf"
529
- Requires-Dist: pandas; extra == "rtf"
530
- Requires-Dist: python-dateutil; extra == "rtf"
531
- Requires-Dist: opentelemetry-sdk; extra == "rtf"
532
- Requires-Dist: click; extra == "rtf"
533
- Requires-Dist: unstructured[rtf]; extra == "rtf"
534
- Provides-Extra: s3
535
- Requires-Dist: s3fs; extra == "s3"
536
- Requires-Dist: tqdm; extra == "s3"
537
- Requires-Dist: dataclasses-json; extra == "s3"
538
- Requires-Dist: pydantic>=2.7; extra == "s3"
539
- Requires-Dist: pandas; extra == "s3"
540
- Requires-Dist: python-dateutil; extra == "s3"
541
- Requires-Dist: opentelemetry-sdk; extra == "s3"
542
- Requires-Dist: click; extra == "s3"
543
- Requires-Dist: fsspec; extra == "s3"
544
- Provides-Extra: salesforce
545
- Requires-Dist: simple-salesforce; extra == "salesforce"
546
- Requires-Dist: tqdm; extra == "salesforce"
547
- Requires-Dist: dataclasses-json; extra == "salesforce"
548
- Requires-Dist: pydantic>=2.7; extra == "salesforce"
549
- Requires-Dist: pandas; extra == "salesforce"
550
- Requires-Dist: python-dateutil; extra == "salesforce"
551
- Requires-Dist: opentelemetry-sdk; extra == "salesforce"
552
- Requires-Dist: click; extra == "salesforce"
553
- Provides-Extra: sftp
554
- Requires-Dist: paramiko; extra == "sftp"
555
- Requires-Dist: tqdm; extra == "sftp"
556
- Requires-Dist: dataclasses-json; extra == "sftp"
557
- Requires-Dist: pydantic>=2.7; extra == "sftp"
558
- Requires-Dist: pandas; extra == "sftp"
559
- Requires-Dist: python-dateutil; extra == "sftp"
560
- Requires-Dist: opentelemetry-sdk; extra == "sftp"
561
- Requires-Dist: click; extra == "sftp"
562
- Requires-Dist: fsspec; extra == "sftp"
563
- Provides-Extra: sharepoint
564
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
565
- Requires-Dist: tqdm; extra == "sharepoint"
566
- Requires-Dist: dataclasses-json; extra == "sharepoint"
567
- Requires-Dist: msal; extra == "sharepoint"
568
- Requires-Dist: pydantic>=2.7; extra == "sharepoint"
569
- Requires-Dist: pandas; extra == "sharepoint"
570
- Requires-Dist: python-dateutil; extra == "sharepoint"
571
- Requires-Dist: opentelemetry-sdk; extra == "sharepoint"
572
- Requires-Dist: click; extra == "sharepoint"
573
- Provides-Extra: singlestore
574
- Requires-Dist: tqdm; extra == "singlestore"
575
- Requires-Dist: dataclasses-json; extra == "singlestore"
576
- Requires-Dist: pydantic>=2.7; extra == "singlestore"
577
- Requires-Dist: pandas; extra == "singlestore"
578
- Requires-Dist: python-dateutil; extra == "singlestore"
579
- Requires-Dist: opentelemetry-sdk; extra == "singlestore"
580
- Requires-Dist: click; extra == "singlestore"
581
- Requires-Dist: singlestoredb; extra == "singlestore"
582
- Provides-Extra: slack
583
- Requires-Dist: tqdm; extra == "slack"
584
- Requires-Dist: dataclasses-json; extra == "slack"
585
- Requires-Dist: pydantic>=2.7; extra == "slack"
586
- Requires-Dist: pandas; extra == "slack"
587
- Requires-Dist: python-dateutil; extra == "slack"
588
- Requires-Dist: opentelemetry-sdk; extra == "slack"
589
- Requires-Dist: click; extra == "slack"
590
- Requires-Dist: slack-sdk; extra == "slack"
591
- Provides-Extra: tsv
592
- Requires-Dist: unstructured[tsv]; extra == "tsv"
593
- Requires-Dist: tqdm; extra == "tsv"
594
- Requires-Dist: dataclasses-json; extra == "tsv"
595
- Requires-Dist: pydantic>=2.7; extra == "tsv"
596
- Requires-Dist: pandas; extra == "tsv"
597
- Requires-Dist: python-dateutil; extra == "tsv"
598
- Requires-Dist: opentelemetry-sdk; extra == "tsv"
599
- Requires-Dist: click; extra == "tsv"
600
- Provides-Extra: vectara
601
- Requires-Dist: tqdm; extra == "vectara"
602
- Requires-Dist: dataclasses-json; extra == "vectara"
603
- Requires-Dist: pydantic>=2.7; extra == "vectara"
604
- Requires-Dist: requests; extra == "vectara"
605
- Requires-Dist: pandas; extra == "vectara"
606
- Requires-Dist: python-dateutil; extra == "vectara"
607
- Requires-Dist: opentelemetry-sdk; extra == "vectara"
608
- Requires-Dist: click; extra == "vectara"
609
- Provides-Extra: weaviate
610
- Requires-Dist: tqdm; extra == "weaviate"
611
- Requires-Dist: dataclasses-json; extra == "weaviate"
612
- Requires-Dist: pydantic>=2.7; extra == "weaviate"
613
- Requires-Dist: pandas; extra == "weaviate"
614
- Requires-Dist: python-dateutil; extra == "weaviate"
615
- Requires-Dist: opentelemetry-sdk; extra == "weaviate"
616
- Requires-Dist: click; extra == "weaviate"
617
- Requires-Dist: weaviate-client; extra == "weaviate"
618
- Provides-Extra: wikipedia
619
- Requires-Dist: pydantic>=2.7; extra == "wikipedia"
620
- Requires-Dist: tqdm; extra == "wikipedia"
621
- Requires-Dist: dataclasses-json; extra == "wikipedia"
622
- Requires-Dist: wikipedia; extra == "wikipedia"
623
- Requires-Dist: pandas; extra == "wikipedia"
624
- Requires-Dist: python-dateutil; extra == "wikipedia"
625
- Requires-Dist: opentelemetry-sdk; extra == "wikipedia"
626
- Requires-Dist: click; extra == "wikipedia"
627
- Provides-Extra: xlsx
628
- Requires-Dist: unstructured[xlsx]; extra == "xlsx"
629
- Requires-Dist: tqdm; extra == "xlsx"
630
- Requires-Dist: dataclasses-json; extra == "xlsx"
631
- Requires-Dist: pydantic>=2.7; extra == "xlsx"
632
- Requires-Dist: pandas; extra == "xlsx"
633
- Requires-Dist: python-dateutil; extra == "xlsx"
634
- Requires-Dist: opentelemetry-sdk; extra == "xlsx"
635
- Requires-Dist: click; extra == "xlsx"
636
-
637
- # Unstructured Ingest
638
-
639
- For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.