unstructured-ingest 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (125) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/cli.py +6 -1
  3. unstructured_ingest/cli/cmds/__init__.py +4 -4
  4. unstructured_ingest/cli/cmds/{astra.py → astradb.py} +9 -9
  5. unstructured_ingest/cli/interfaces.py +13 -6
  6. unstructured_ingest/connector/{astra.py → astradb.py} +29 -29
  7. unstructured_ingest/connector/biomed.py +12 -5
  8. unstructured_ingest/connector/confluence.py +3 -3
  9. unstructured_ingest/connector/github.py +3 -2
  10. unstructured_ingest/connector/google_drive.py +1 -2
  11. unstructured_ingest/connector/mongodb.py +1 -2
  12. unstructured_ingest/connector/notion/client.py +31 -16
  13. unstructured_ingest/connector/notion/connector.py +3 -2
  14. unstructured_ingest/connector/registry.py +2 -2
  15. unstructured_ingest/connector/vectara.py +7 -2
  16. unstructured_ingest/interfaces.py +13 -9
  17. unstructured_ingest/pipeline/interfaces.py +8 -3
  18. unstructured_ingest/pipeline/reformat/chunking.py +13 -9
  19. unstructured_ingest/pipeline/reformat/embedding.py +3 -3
  20. unstructured_ingest/runner/__init__.py +2 -2
  21. unstructured_ingest/runner/{astra.py → astradb.py} +7 -7
  22. unstructured_ingest/runner/writers/__init__.py +2 -2
  23. unstructured_ingest/runner/writers/{astra.py → astradb.py} +7 -7
  24. unstructured_ingest/utils/chunking.py +45 -0
  25. unstructured_ingest/utils/dep_check.py +1 -1
  26. unstructured_ingest/utils/google_filetype.py +9 -0
  27. unstructured_ingest/v2/cli/base/cmd.py +57 -13
  28. unstructured_ingest/v2/cli/base/dest.py +21 -12
  29. unstructured_ingest/v2/cli/base/src.py +35 -23
  30. unstructured_ingest/v2/cli/cmds.py +14 -0
  31. unstructured_ingest/v2/cli/{utils.py → utils/click.py} +36 -89
  32. unstructured_ingest/v2/cli/utils/model_conversion.py +199 -0
  33. unstructured_ingest/v2/interfaces/connector.py +5 -7
  34. unstructured_ingest/v2/interfaces/downloader.py +8 -5
  35. unstructured_ingest/v2/interfaces/file_data.py +8 -2
  36. unstructured_ingest/v2/interfaces/indexer.py +3 -4
  37. unstructured_ingest/v2/interfaces/processor.py +10 -10
  38. unstructured_ingest/v2/interfaces/upload_stager.py +3 -3
  39. unstructured_ingest/v2/interfaces/uploader.py +3 -3
  40. unstructured_ingest/v2/pipeline/pipeline.py +9 -6
  41. unstructured_ingest/v2/pipeline/steps/chunk.py +5 -11
  42. unstructured_ingest/v2/pipeline/steps/download.py +13 -11
  43. unstructured_ingest/v2/pipeline/steps/embed.py +5 -11
  44. unstructured_ingest/v2/pipeline/steps/filter.py +1 -6
  45. unstructured_ingest/v2/pipeline/steps/index.py +14 -10
  46. unstructured_ingest/v2/pipeline/steps/partition.py +5 -5
  47. unstructured_ingest/v2/pipeline/steps/stage.py +4 -7
  48. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -6
  49. unstructured_ingest/v2/pipeline/steps/upload.py +2 -9
  50. unstructured_ingest/v2/processes/__init__.py +18 -0
  51. unstructured_ingest/v2/processes/chunker.py +74 -28
  52. unstructured_ingest/v2/processes/connector_registry.py +8 -2
  53. unstructured_ingest/v2/processes/connectors/__init__.py +18 -3
  54. unstructured_ingest/v2/processes/connectors/{astra.py → astradb.py} +46 -39
  55. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +30 -27
  56. unstructured_ingest/v2/processes/connectors/chroma.py +30 -21
  57. unstructured_ingest/v2/processes/connectors/couchbase.py +333 -0
  58. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +87 -32
  59. unstructured_ingest/v2/processes/connectors/elasticsearch.py +70 -45
  60. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +39 -16
  61. unstructured_ingest/v2/processes/connectors/fsspec/box.py +15 -13
  62. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +10 -11
  63. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +20 -34
  64. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +38 -13
  65. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +31 -17
  66. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -28
  67. unstructured_ingest/v2/processes/connectors/google_drive.py +40 -34
  68. unstructured_ingest/v2/processes/connectors/kdbai.py +170 -0
  69. unstructured_ingest/v2/processes/connectors/local.py +27 -16
  70. unstructured_ingest/v2/processes/connectors/milvus.py +22 -18
  71. unstructured_ingest/v2/processes/connectors/mongodb.py +22 -18
  72. unstructured_ingest/v2/processes/connectors/onedrive.py +17 -14
  73. unstructured_ingest/v2/processes/connectors/opensearch.py +66 -56
  74. unstructured_ingest/v2/processes/connectors/pinecone.py +22 -21
  75. unstructured_ingest/v2/processes/connectors/salesforce.py +26 -18
  76. unstructured_ingest/v2/processes/connectors/sharepoint.py +51 -26
  77. unstructured_ingest/v2/processes/connectors/singlestore.py +11 -15
  78. unstructured_ingest/v2/processes/connectors/sql.py +29 -31
  79. unstructured_ingest/v2/processes/connectors/weaviate.py +22 -13
  80. unstructured_ingest/v2/processes/embedder.py +106 -47
  81. unstructured_ingest/v2/processes/filter.py +11 -5
  82. unstructured_ingest/v2/processes/partitioner.py +79 -33
  83. unstructured_ingest/v2/processes/uncompress.py +3 -3
  84. unstructured_ingest/v2/utils.py +45 -0
  85. unstructured_ingest-0.0.5.dist-info/LICENSE.md +201 -0
  86. unstructured_ingest-0.0.5.dist-info/METADATA +574 -0
  87. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.5.dist-info}/RECORD +91 -116
  88. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.5.dist-info}/WHEEL +1 -1
  89. unstructured_ingest/v2/cli/cmds/__init__.py +0 -89
  90. unstructured_ingest/v2/cli/cmds/astra.py +0 -85
  91. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +0 -72
  92. unstructured_ingest/v2/cli/cmds/chroma.py +0 -108
  93. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +0 -161
  94. unstructured_ingest/v2/cli/cmds/elasticsearch.py +0 -159
  95. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +0 -84
  96. unstructured_ingest/v2/cli/cmds/fsspec/box.py +0 -58
  97. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +0 -58
  98. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +0 -69
  99. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +0 -81
  100. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +0 -84
  101. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +0 -80
  102. unstructured_ingest/v2/cli/cmds/google_drive.py +0 -74
  103. unstructured_ingest/v2/cli/cmds/local.py +0 -52
  104. unstructured_ingest/v2/cli/cmds/milvus.py +0 -72
  105. unstructured_ingest/v2/cli/cmds/mongodb.py +0 -62
  106. unstructured_ingest/v2/cli/cmds/onedrive.py +0 -91
  107. unstructured_ingest/v2/cli/cmds/opensearch.py +0 -93
  108. unstructured_ingest/v2/cli/cmds/pinecone.py +0 -62
  109. unstructured_ingest/v2/cli/cmds/salesforce.py +0 -79
  110. unstructured_ingest/v2/cli/cmds/sharepoint.py +0 -112
  111. unstructured_ingest/v2/cli/cmds/singlestore.py +0 -96
  112. unstructured_ingest/v2/cli/cmds/sql.py +0 -84
  113. unstructured_ingest/v2/cli/cmds/weaviate.py +0 -100
  114. unstructured_ingest/v2/cli/configs/__init__.py +0 -13
  115. unstructured_ingest/v2/cli/configs/chunk.py +0 -89
  116. unstructured_ingest/v2/cli/configs/embed.py +0 -74
  117. unstructured_ingest/v2/cli/configs/filter.py +0 -28
  118. unstructured_ingest/v2/cli/configs/partition.py +0 -99
  119. unstructured_ingest/v2/cli/configs/processor.py +0 -88
  120. unstructured_ingest/v2/cli/interfaces.py +0 -27
  121. unstructured_ingest/v2/pipeline/utils.py +0 -15
  122. unstructured_ingest-0.0.3.dist-info/METADATA +0 -175
  123. /unstructured_ingest/v2/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  124. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.5.dist-info}/entry_points.txt +0 -0
  125. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,574 @@
1
+ Metadata-Version: 2.1
2
+ Name: unstructured-ingest
3
+ Version: 0.0.5
4
+ Summary: A library that prepares raw documents for downstream ML tasks.
5
+ Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
+ Author: Unstructured Technologies
7
+ Author-email: devops@unstructuredai.io
8
+ License: Apache-2.0
9
+ Keywords: NLP PDF HTML CV XML parsing preprocessing
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9.0,<3.13
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE.md
25
+ Requires-Dist: pandas
26
+ Requires-Dist: dataclasses-json
27
+ Requires-Dist: tqdm
28
+ Requires-Dist: python-dateutil
29
+ Requires-Dist: pydantic
30
+ Requires-Dist: click
31
+ Provides-Extra: airtable
32
+ Requires-Dist: pandas; extra == "airtable"
33
+ Requires-Dist: dataclasses-json; extra == "airtable"
34
+ Requires-Dist: tqdm; extra == "airtable"
35
+ Requires-Dist: pyairtable; extra == "airtable"
36
+ Requires-Dist: python-dateutil; extra == "airtable"
37
+ Requires-Dist: pydantic; extra == "airtable"
38
+ Requires-Dist: click; extra == "airtable"
39
+ Provides-Extra: astradb
40
+ Requires-Dist: pandas; extra == "astradb"
41
+ Requires-Dist: dataclasses-json; extra == "astradb"
42
+ Requires-Dist: tqdm; extra == "astradb"
43
+ Requires-Dist: python-dateutil; extra == "astradb"
44
+ Requires-Dist: pydantic; extra == "astradb"
45
+ Requires-Dist: click; extra == "astradb"
46
+ Requires-Dist: astrapy; extra == "astradb"
47
+ Provides-Extra: azure
48
+ Requires-Dist: pandas; extra == "azure"
49
+ Requires-Dist: dataclasses-json; extra == "azure"
50
+ Requires-Dist: tqdm; extra == "azure"
51
+ Requires-Dist: python-dateutil; extra == "azure"
52
+ Requires-Dist: pydantic; extra == "azure"
53
+ Requires-Dist: click; extra == "azure"
54
+ Requires-Dist: fsspec; extra == "azure"
55
+ Requires-Dist: adlfs; extra == "azure"
56
+ Provides-Extra: azure-cognitive-search
57
+ Requires-Dist: pandas; extra == "azure-cognitive-search"
58
+ Requires-Dist: dataclasses-json; extra == "azure-cognitive-search"
59
+ Requires-Dist: tqdm; extra == "azure-cognitive-search"
60
+ Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
61
+ Requires-Dist: python-dateutil; extra == "azure-cognitive-search"
62
+ Requires-Dist: pydantic; extra == "azure-cognitive-search"
63
+ Requires-Dist: click; extra == "azure-cognitive-search"
64
+ Provides-Extra: bedrock
65
+ Requires-Dist: pandas; extra == "bedrock"
66
+ Requires-Dist: dataclasses-json; extra == "bedrock"
67
+ Requires-Dist: unstructured; extra == "bedrock"
68
+ Requires-Dist: tqdm; extra == "bedrock"
69
+ Requires-Dist: boto3; extra == "bedrock"
70
+ Requires-Dist: python-dateutil; extra == "bedrock"
71
+ Requires-Dist: langchain-community; extra == "bedrock"
72
+ Requires-Dist: pydantic; extra == "bedrock"
73
+ Requires-Dist: click; extra == "bedrock"
74
+ Provides-Extra: biomed
75
+ Requires-Dist: pandas; extra == "biomed"
76
+ Requires-Dist: dataclasses-json; extra == "biomed"
77
+ Requires-Dist: bs4; extra == "biomed"
78
+ Requires-Dist: tqdm; extra == "biomed"
79
+ Requires-Dist: python-dateutil; extra == "biomed"
80
+ Requires-Dist: pydantic; extra == "biomed"
81
+ Requires-Dist: click; extra == "biomed"
82
+ Requires-Dist: requests; extra == "biomed"
83
+ Provides-Extra: box
84
+ Requires-Dist: pandas; extra == "box"
85
+ Requires-Dist: dataclasses-json; extra == "box"
86
+ Requires-Dist: boxfs; extra == "box"
87
+ Requires-Dist: tqdm; extra == "box"
88
+ Requires-Dist: python-dateutil; extra == "box"
89
+ Requires-Dist: pydantic; extra == "box"
90
+ Requires-Dist: click; extra == "box"
91
+ Requires-Dist: fsspec; extra == "box"
92
+ Provides-Extra: chroma
93
+ Requires-Dist: click; extra == "chroma"
94
+ Requires-Dist: pandas; extra == "chroma"
95
+ Requires-Dist: chromadb; extra == "chroma"
96
+ Requires-Dist: typer<=0.9.0; extra == "chroma"
97
+ Requires-Dist: dataclasses-json; extra == "chroma"
98
+ Requires-Dist: tqdm; extra == "chroma"
99
+ Requires-Dist: python-dateutil; extra == "chroma"
100
+ Requires-Dist: pydantic; extra == "chroma"
101
+ Requires-Dist: importlib-metadata>=7.1.0; extra == "chroma"
102
+ Provides-Extra: clarifai
103
+ Requires-Dist: pandas; extra == "clarifai"
104
+ Requires-Dist: dataclasses-json; extra == "clarifai"
105
+ Requires-Dist: tqdm; extra == "clarifai"
106
+ Requires-Dist: python-dateutil; extra == "clarifai"
107
+ Requires-Dist: pydantic; extra == "clarifai"
108
+ Requires-Dist: click; extra == "clarifai"
109
+ Requires-Dist: clarifai; extra == "clarifai"
110
+ Provides-Extra: confluence
111
+ Requires-Dist: pandas; extra == "confluence"
112
+ Requires-Dist: atlassian-python-api; extra == "confluence"
113
+ Requires-Dist: dataclasses-json; extra == "confluence"
114
+ Requires-Dist: tqdm; extra == "confluence"
115
+ Requires-Dist: python-dateutil; extra == "confluence"
116
+ Requires-Dist: pydantic; extra == "confluence"
117
+ Requires-Dist: click; extra == "confluence"
118
+ Requires-Dist: requests; extra == "confluence"
119
+ Provides-Extra: couchbase
120
+ Requires-Dist: pandas; extra == "couchbase"
121
+ Requires-Dist: couchbase; extra == "couchbase"
122
+ Requires-Dist: dataclasses-json; extra == "couchbase"
123
+ Requires-Dist: tqdm; extra == "couchbase"
124
+ Requires-Dist: python-dateutil; extra == "couchbase"
125
+ Requires-Dist: pydantic; extra == "couchbase"
126
+ Requires-Dist: click; extra == "couchbase"
127
+ Provides-Extra: csv
128
+ Requires-Dist: pandas; extra == "csv"
129
+ Requires-Dist: dataclasses-json; extra == "csv"
130
+ Requires-Dist: tqdm; extra == "csv"
131
+ Requires-Dist: python-dateutil; extra == "csv"
132
+ Requires-Dist: pydantic; extra == "csv"
133
+ Requires-Dist: click; extra == "csv"
134
+ Requires-Dist: unstructured[tsv]; extra == "csv"
135
+ Provides-Extra: databricks-volumes
136
+ Requires-Dist: pandas; extra == "databricks-volumes"
137
+ Requires-Dist: databricks-sdk; extra == "databricks-volumes"
138
+ Requires-Dist: dataclasses-json; extra == "databricks-volumes"
139
+ Requires-Dist: tqdm; extra == "databricks-volumes"
140
+ Requires-Dist: python-dateutil; extra == "databricks-volumes"
141
+ Requires-Dist: pydantic; extra == "databricks-volumes"
142
+ Requires-Dist: click; extra == "databricks-volumes"
143
+ Provides-Extra: delta-table
144
+ Requires-Dist: pandas; extra == "delta-table"
145
+ Requires-Dist: dataclasses-json; extra == "delta-table"
146
+ Requires-Dist: tqdm; extra == "delta-table"
147
+ Requires-Dist: python-dateutil; extra == "delta-table"
148
+ Requires-Dist: pydantic; extra == "delta-table"
149
+ Requires-Dist: click; extra == "delta-table"
150
+ Requires-Dist: fsspec; extra == "delta-table"
151
+ Requires-Dist: deltalake; extra == "delta-table"
152
+ Provides-Extra: discord
153
+ Requires-Dist: pandas; extra == "discord"
154
+ Requires-Dist: dataclasses-json; extra == "discord"
155
+ Requires-Dist: tqdm; extra == "discord"
156
+ Requires-Dist: python-dateutil; extra == "discord"
157
+ Requires-Dist: discord-py; extra == "discord"
158
+ Requires-Dist: pydantic; extra == "discord"
159
+ Requires-Dist: click; extra == "discord"
160
+ Provides-Extra: doc
161
+ Requires-Dist: pandas; extra == "doc"
162
+ Requires-Dist: dataclasses-json; extra == "doc"
163
+ Requires-Dist: tqdm; extra == "doc"
164
+ Requires-Dist: python-dateutil; extra == "doc"
165
+ Requires-Dist: unstructured[docx]; extra == "doc"
166
+ Requires-Dist: pydantic; extra == "doc"
167
+ Requires-Dist: click; extra == "doc"
168
+ Provides-Extra: docx
169
+ Requires-Dist: pandas; extra == "docx"
170
+ Requires-Dist: dataclasses-json; extra == "docx"
171
+ Requires-Dist: tqdm; extra == "docx"
172
+ Requires-Dist: python-dateutil; extra == "docx"
173
+ Requires-Dist: unstructured[docx]; extra == "docx"
174
+ Requires-Dist: pydantic; extra == "docx"
175
+ Requires-Dist: click; extra == "docx"
176
+ Provides-Extra: dropbox
177
+ Requires-Dist: pandas; extra == "dropbox"
178
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
179
+ Requires-Dist: dataclasses-json; extra == "dropbox"
180
+ Requires-Dist: tqdm; extra == "dropbox"
181
+ Requires-Dist: python-dateutil; extra == "dropbox"
182
+ Requires-Dist: pydantic; extra == "dropbox"
183
+ Requires-Dist: click; extra == "dropbox"
184
+ Requires-Dist: fsspec; extra == "dropbox"
185
+ Provides-Extra: elasticsearch
186
+ Requires-Dist: pandas; extra == "elasticsearch"
187
+ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
188
+ Requires-Dist: dataclasses-json; extra == "elasticsearch"
189
+ Requires-Dist: tqdm; extra == "elasticsearch"
190
+ Requires-Dist: python-dateutil; extra == "elasticsearch"
191
+ Requires-Dist: pydantic; extra == "elasticsearch"
192
+ Requires-Dist: click; extra == "elasticsearch"
193
+ Provides-Extra: embed-huggingface
194
+ Requires-Dist: pandas; extra == "embed-huggingface"
195
+ Requires-Dist: dataclasses-json; extra == "embed-huggingface"
196
+ Requires-Dist: unstructured; extra == "embed-huggingface"
197
+ Requires-Dist: tqdm; extra == "embed-huggingface"
198
+ Requires-Dist: python-dateutil; extra == "embed-huggingface"
199
+ Requires-Dist: pydantic; extra == "embed-huggingface"
200
+ Requires-Dist: click; extra == "embed-huggingface"
201
+ Requires-Dist: langchain-huggingface; extra == "embed-huggingface"
202
+ Provides-Extra: embed-octoai
203
+ Requires-Dist: tiktoken; extra == "embed-octoai"
204
+ Requires-Dist: pandas; extra == "embed-octoai"
205
+ Requires-Dist: openai; extra == "embed-octoai"
206
+ Requires-Dist: dataclasses-json; extra == "embed-octoai"
207
+ Requires-Dist: unstructured; extra == "embed-octoai"
208
+ Requires-Dist: tqdm; extra == "embed-octoai"
209
+ Requires-Dist: python-dateutil; extra == "embed-octoai"
210
+ Requires-Dist: pydantic; extra == "embed-octoai"
211
+ Requires-Dist: click; extra == "embed-octoai"
212
+ Provides-Extra: embed-vertexai
213
+ Requires-Dist: click; extra == "embed-vertexai"
214
+ Requires-Dist: pandas; extra == "embed-vertexai"
215
+ Requires-Dist: dataclasses-json; extra == "embed-vertexai"
216
+ Requires-Dist: unstructured; extra == "embed-vertexai"
217
+ Requires-Dist: tqdm; extra == "embed-vertexai"
218
+ Requires-Dist: langchain-google-vertexai; extra == "embed-vertexai"
219
+ Requires-Dist: python-dateutil; extra == "embed-vertexai"
220
+ Requires-Dist: langchain-community; extra == "embed-vertexai"
221
+ Requires-Dist: pydantic; extra == "embed-vertexai"
222
+ Requires-Dist: langchain; extra == "embed-vertexai"
223
+ Provides-Extra: embed-voyageai
224
+ Requires-Dist: click; extra == "embed-voyageai"
225
+ Requires-Dist: pandas; extra == "embed-voyageai"
226
+ Requires-Dist: dataclasses-json; extra == "embed-voyageai"
227
+ Requires-Dist: unstructured; extra == "embed-voyageai"
228
+ Requires-Dist: tqdm; extra == "embed-voyageai"
229
+ Requires-Dist: python-dateutil; extra == "embed-voyageai"
230
+ Requires-Dist: pydantic; extra == "embed-voyageai"
231
+ Requires-Dist: langchain; extra == "embed-voyageai"
232
+ Requires-Dist: langchain-voyageai; extra == "embed-voyageai"
233
+ Provides-Extra: epub
234
+ Requires-Dist: pandas; extra == "epub"
235
+ Requires-Dist: dataclasses-json; extra == "epub"
236
+ Requires-Dist: tqdm; extra == "epub"
237
+ Requires-Dist: unstructured[epub]; extra == "epub"
238
+ Requires-Dist: python-dateutil; extra == "epub"
239
+ Requires-Dist: pydantic; extra == "epub"
240
+ Requires-Dist: click; extra == "epub"
241
+ Provides-Extra: gcs
242
+ Requires-Dist: pandas; extra == "gcs"
243
+ Requires-Dist: dataclasses-json; extra == "gcs"
244
+ Requires-Dist: bs4; extra == "gcs"
245
+ Requires-Dist: tqdm; extra == "gcs"
246
+ Requires-Dist: python-dateutil; extra == "gcs"
247
+ Requires-Dist: pydantic; extra == "gcs"
248
+ Requires-Dist: click; extra == "gcs"
249
+ Requires-Dist: fsspec; extra == "gcs"
250
+ Requires-Dist: gcsfs; extra == "gcs"
251
+ Provides-Extra: github
252
+ Requires-Dist: pandas; extra == "github"
253
+ Requires-Dist: dataclasses-json; extra == "github"
254
+ Requires-Dist: pygithub>1.58.0; extra == "github"
255
+ Requires-Dist: tqdm; extra == "github"
256
+ Requires-Dist: python-dateutil; extra == "github"
257
+ Requires-Dist: pydantic; extra == "github"
258
+ Requires-Dist: click; extra == "github"
259
+ Requires-Dist: requests; extra == "github"
260
+ Provides-Extra: gitlab
261
+ Requires-Dist: python-gitlab; extra == "gitlab"
262
+ Requires-Dist: pandas; extra == "gitlab"
263
+ Requires-Dist: dataclasses-json; extra == "gitlab"
264
+ Requires-Dist: tqdm; extra == "gitlab"
265
+ Requires-Dist: python-dateutil; extra == "gitlab"
266
+ Requires-Dist: pydantic; extra == "gitlab"
267
+ Requires-Dist: click; extra == "gitlab"
268
+ Provides-Extra: google-drive
269
+ Requires-Dist: pandas; extra == "google-drive"
270
+ Requires-Dist: dataclasses-json; extra == "google-drive"
271
+ Requires-Dist: tqdm; extra == "google-drive"
272
+ Requires-Dist: google-api-python-client; extra == "google-drive"
273
+ Requires-Dist: python-dateutil; extra == "google-drive"
274
+ Requires-Dist: pydantic; extra == "google-drive"
275
+ Requires-Dist: click; extra == "google-drive"
276
+ Provides-Extra: hubspot
277
+ Requires-Dist: pandas; extra == "hubspot"
278
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
279
+ Requires-Dist: dataclasses-json; extra == "hubspot"
280
+ Requires-Dist: tqdm; extra == "hubspot"
281
+ Requires-Dist: urllib3; extra == "hubspot"
282
+ Requires-Dist: python-dateutil; extra == "hubspot"
283
+ Requires-Dist: pydantic; extra == "hubspot"
284
+ Requires-Dist: click; extra == "hubspot"
285
+ Provides-Extra: jira
286
+ Requires-Dist: pandas; extra == "jira"
287
+ Requires-Dist: atlassian-python-api; extra == "jira"
288
+ Requires-Dist: dataclasses-json; extra == "jira"
289
+ Requires-Dist: tqdm; extra == "jira"
290
+ Requires-Dist: python-dateutil; extra == "jira"
291
+ Requires-Dist: pydantic; extra == "jira"
292
+ Requires-Dist: click; extra == "jira"
293
+ Provides-Extra: kafka
294
+ Requires-Dist: confluent-kafka; extra == "kafka"
295
+ Requires-Dist: pandas; extra == "kafka"
296
+ Requires-Dist: dataclasses-json; extra == "kafka"
297
+ Requires-Dist: tqdm; extra == "kafka"
298
+ Requires-Dist: python-dateutil; extra == "kafka"
299
+ Requires-Dist: pydantic; extra == "kafka"
300
+ Requires-Dist: click; extra == "kafka"
301
+ Provides-Extra: kdbai
302
+ Requires-Dist: kdbai-client; extra == "kdbai"
303
+ Provides-Extra: md
304
+ Requires-Dist: pandas; extra == "md"
305
+ Requires-Dist: unstructured[md]; extra == "md"
306
+ Requires-Dist: dataclasses-json; extra == "md"
307
+ Requires-Dist: tqdm; extra == "md"
308
+ Requires-Dist: python-dateutil; extra == "md"
309
+ Requires-Dist: pydantic; extra == "md"
310
+ Requires-Dist: click; extra == "md"
311
+ Provides-Extra: milvus
312
+ Requires-Dist: pandas; extra == "milvus"
313
+ Requires-Dist: dataclasses-json; extra == "milvus"
314
+ Requires-Dist: tqdm; extra == "milvus"
315
+ Requires-Dist: pymilvus; extra == "milvus"
316
+ Requires-Dist: python-dateutil; extra == "milvus"
317
+ Requires-Dist: pydantic; extra == "milvus"
318
+ Requires-Dist: click; extra == "milvus"
319
+ Provides-Extra: mongodb
320
+ Requires-Dist: pandas; extra == "mongodb"
321
+ Requires-Dist: dataclasses-json; extra == "mongodb"
322
+ Requires-Dist: tqdm; extra == "mongodb"
323
+ Requires-Dist: pymongo; extra == "mongodb"
324
+ Requires-Dist: python-dateutil; extra == "mongodb"
325
+ Requires-Dist: pydantic; extra == "mongodb"
326
+ Requires-Dist: click; extra == "mongodb"
327
+ Provides-Extra: msg
328
+ Requires-Dist: unstructured[msg]; extra == "msg"
329
+ Requires-Dist: pandas; extra == "msg"
330
+ Requires-Dist: dataclasses-json; extra == "msg"
331
+ Requires-Dist: tqdm; extra == "msg"
332
+ Requires-Dist: python-dateutil; extra == "msg"
333
+ Requires-Dist: pydantic; extra == "msg"
334
+ Requires-Dist: click; extra == "msg"
335
+ Provides-Extra: notion
336
+ Requires-Dist: pandas; extra == "notion"
337
+ Requires-Dist: htmlBuilder; extra == "notion"
338
+ Requires-Dist: backoff; extra == "notion"
339
+ Requires-Dist: notion-client; extra == "notion"
340
+ Requires-Dist: dataclasses-json; extra == "notion"
341
+ Requires-Dist: tqdm; extra == "notion"
342
+ Requires-Dist: python-dateutil; extra == "notion"
343
+ Requires-Dist: pydantic; extra == "notion"
344
+ Requires-Dist: click; extra == "notion"
345
+ Requires-Dist: httpx; extra == "notion"
346
+ Provides-Extra: odt
347
+ Requires-Dist: pandas; extra == "odt"
348
+ Requires-Dist: dataclasses-json; extra == "odt"
349
+ Requires-Dist: tqdm; extra == "odt"
350
+ Requires-Dist: python-dateutil; extra == "odt"
351
+ Requires-Dist: pydantic; extra == "odt"
352
+ Requires-Dist: click; extra == "odt"
353
+ Requires-Dist: unstructured[odt]; extra == "odt"
354
+ Provides-Extra: onedrive
355
+ Requires-Dist: msal; extra == "onedrive"
356
+ Requires-Dist: pandas; extra == "onedrive"
357
+ Requires-Dist: dataclasses-json; extra == "onedrive"
358
+ Requires-Dist: bs4; extra == "onedrive"
359
+ Requires-Dist: tqdm; extra == "onedrive"
360
+ Requires-Dist: python-dateutil; extra == "onedrive"
361
+ Requires-Dist: pydantic; extra == "onedrive"
362
+ Requires-Dist: click; extra == "onedrive"
363
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
364
+ Provides-Extra: openai
365
+ Requires-Dist: tiktoken; extra == "openai"
366
+ Requires-Dist: pandas; extra == "openai"
367
+ Requires-Dist: openai; extra == "openai"
368
+ Requires-Dist: dataclasses-json; extra == "openai"
369
+ Requires-Dist: unstructured; extra == "openai"
370
+ Requires-Dist: tqdm; extra == "openai"
371
+ Requires-Dist: python-dateutil; extra == "openai"
372
+ Requires-Dist: langchain-community; extra == "openai"
373
+ Requires-Dist: pydantic; extra == "openai"
374
+ Requires-Dist: click; extra == "openai"
375
+ Provides-Extra: opensearch
376
+ Requires-Dist: pandas; extra == "opensearch"
377
+ Requires-Dist: dataclasses-json; extra == "opensearch"
378
+ Requires-Dist: tqdm; extra == "opensearch"
379
+ Requires-Dist: opensearch-py; extra == "opensearch"
380
+ Requires-Dist: python-dateutil; extra == "opensearch"
381
+ Requires-Dist: pydantic; extra == "opensearch"
382
+ Requires-Dist: click; extra == "opensearch"
383
+ Provides-Extra: org
384
+ Requires-Dist: pandas; extra == "org"
385
+ Requires-Dist: unstructured[org]; extra == "org"
386
+ Requires-Dist: dataclasses-json; extra == "org"
387
+ Requires-Dist: tqdm; extra == "org"
388
+ Requires-Dist: python-dateutil; extra == "org"
389
+ Requires-Dist: pydantic; extra == "org"
390
+ Requires-Dist: click; extra == "org"
391
+ Provides-Extra: outlook
392
+ Requires-Dist: msal; extra == "outlook"
393
+ Requires-Dist: pandas; extra == "outlook"
394
+ Requires-Dist: dataclasses-json; extra == "outlook"
395
+ Requires-Dist: tqdm; extra == "outlook"
396
+ Requires-Dist: python-dateutil; extra == "outlook"
397
+ Requires-Dist: pydantic; extra == "outlook"
398
+ Requires-Dist: click; extra == "outlook"
399
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
400
+ Provides-Extra: pdf
401
+ Requires-Dist: unstructured[pdf]; extra == "pdf"
402
+ Requires-Dist: pandas; extra == "pdf"
403
+ Requires-Dist: dataclasses-json; extra == "pdf"
404
+ Requires-Dist: tqdm; extra == "pdf"
405
+ Requires-Dist: python-dateutil; extra == "pdf"
406
+ Requires-Dist: pydantic; extra == "pdf"
407
+ Requires-Dist: click; extra == "pdf"
408
+ Provides-Extra: pinecone
409
+ Requires-Dist: pandas; extra == "pinecone"
410
+ Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
411
+ Requires-Dist: dataclasses-json; extra == "pinecone"
412
+ Requires-Dist: tqdm; extra == "pinecone"
413
+ Requires-Dist: python-dateutil; extra == "pinecone"
414
+ Requires-Dist: pydantic; extra == "pinecone"
415
+ Requires-Dist: click; extra == "pinecone"
416
+ Provides-Extra: postgres
417
+ Requires-Dist: pandas; extra == "postgres"
418
+ Requires-Dist: psycopg2-binary; extra == "postgres"
419
+ Requires-Dist: dataclasses-json; extra == "postgres"
420
+ Requires-Dist: tqdm; extra == "postgres"
421
+ Requires-Dist: python-dateutil; extra == "postgres"
422
+ Requires-Dist: pydantic; extra == "postgres"
423
+ Requires-Dist: click; extra == "postgres"
424
+ Provides-Extra: ppt
425
+ Requires-Dist: pandas; extra == "ppt"
426
+ Requires-Dist: dataclasses-json; extra == "ppt"
427
+ Requires-Dist: tqdm; extra == "ppt"
428
+ Requires-Dist: python-dateutil; extra == "ppt"
429
+ Requires-Dist: pydantic; extra == "ppt"
430
+ Requires-Dist: click; extra == "ppt"
431
+ Requires-Dist: unstructured[pptx]; extra == "ppt"
432
+ Provides-Extra: pptx
433
+ Requires-Dist: pandas; extra == "pptx"
434
+ Requires-Dist: dataclasses-json; extra == "pptx"
435
+ Requires-Dist: tqdm; extra == "pptx"
436
+ Requires-Dist: python-dateutil; extra == "pptx"
437
+ Requires-Dist: pydantic; extra == "pptx"
438
+ Requires-Dist: click; extra == "pptx"
439
+ Requires-Dist: unstructured[pptx]; extra == "pptx"
440
+ Provides-Extra: qdrant
441
+ Requires-Dist: pandas; extra == "qdrant"
442
+ Requires-Dist: dataclasses-json; extra == "qdrant"
443
+ Requires-Dist: tqdm; extra == "qdrant"
444
+ Requires-Dist: python-dateutil; extra == "qdrant"
445
+ Requires-Dist: qdrant-client; extra == "qdrant"
446
+ Requires-Dist: pydantic; extra == "qdrant"
447
+ Requires-Dist: click; extra == "qdrant"
448
+ Provides-Extra: reddit
449
+ Requires-Dist: pandas; extra == "reddit"
450
+ Requires-Dist: dataclasses-json; extra == "reddit"
451
+ Requires-Dist: tqdm; extra == "reddit"
452
+ Requires-Dist: python-dateutil; extra == "reddit"
453
+ Requires-Dist: pydantic; extra == "reddit"
454
+ Requires-Dist: click; extra == "reddit"
455
+ Requires-Dist: praw; extra == "reddit"
456
+ Provides-Extra: remote
457
+ Requires-Dist: pandas; extra == "remote"
458
+ Requires-Dist: dataclasses-json; extra == "remote"
459
+ Requires-Dist: tqdm; extra == "remote"
460
+ Requires-Dist: unstructured-client; extra == "remote"
461
+ Requires-Dist: python-dateutil; extra == "remote"
462
+ Requires-Dist: pydantic; extra == "remote"
463
+ Requires-Dist: click; extra == "remote"
464
+ Provides-Extra: rst
465
+ Requires-Dist: pandas; extra == "rst"
466
+ Requires-Dist: dataclasses-json; extra == "rst"
467
+ Requires-Dist: tqdm; extra == "rst"
468
+ Requires-Dist: python-dateutil; extra == "rst"
469
+ Requires-Dist: pydantic; extra == "rst"
470
+ Requires-Dist: click; extra == "rst"
471
+ Requires-Dist: unstructured[rst]; extra == "rst"
472
+ Provides-Extra: rtf
473
+ Requires-Dist: pandas; extra == "rtf"
474
+ Requires-Dist: dataclasses-json; extra == "rtf"
475
+ Requires-Dist: unstructured[rtf]; extra == "rtf"
476
+ Requires-Dist: tqdm; extra == "rtf"
477
+ Requires-Dist: python-dateutil; extra == "rtf"
478
+ Requires-Dist: pydantic; extra == "rtf"
479
+ Requires-Dist: click; extra == "rtf"
480
+ Provides-Extra: s3
481
+ Requires-Dist: pandas; extra == "s3"
482
+ Requires-Dist: dataclasses-json; extra == "s3"
483
+ Requires-Dist: tqdm; extra == "s3"
484
+ Requires-Dist: s3fs; extra == "s3"
485
+ Requires-Dist: python-dateutil; extra == "s3"
486
+ Requires-Dist: pydantic; extra == "s3"
487
+ Requires-Dist: click; extra == "s3"
488
+ Requires-Dist: fsspec; extra == "s3"
489
+ Provides-Extra: salesforce
490
+ Requires-Dist: pandas; extra == "salesforce"
491
+ Requires-Dist: dataclasses-json; extra == "salesforce"
492
+ Requires-Dist: tqdm; extra == "salesforce"
493
+ Requires-Dist: simple-salesforce; extra == "salesforce"
494
+ Requires-Dist: python-dateutil; extra == "salesforce"
495
+ Requires-Dist: pydantic; extra == "salesforce"
496
+ Requires-Dist: click; extra == "salesforce"
497
+ Provides-Extra: sftp
498
+ Requires-Dist: pandas; extra == "sftp"
499
+ Requires-Dist: dataclasses-json; extra == "sftp"
500
+ Requires-Dist: tqdm; extra == "sftp"
501
+ Requires-Dist: paramiko; extra == "sftp"
502
+ Requires-Dist: python-dateutil; extra == "sftp"
503
+ Requires-Dist: pydantic; extra == "sftp"
504
+ Requires-Dist: click; extra == "sftp"
505
+ Requires-Dist: fsspec; extra == "sftp"
506
+ Provides-Extra: sharepoint
507
+ Requires-Dist: msal; extra == "sharepoint"
508
+ Requires-Dist: pandas; extra == "sharepoint"
509
+ Requires-Dist: dataclasses-json; extra == "sharepoint"
510
+ Requires-Dist: tqdm; extra == "sharepoint"
511
+ Requires-Dist: python-dateutil; extra == "sharepoint"
512
+ Requires-Dist: pydantic; extra == "sharepoint"
513
+ Requires-Dist: click; extra == "sharepoint"
514
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
515
+ Provides-Extra: singlestore
516
+ Requires-Dist: pandas; extra == "singlestore"
517
+ Requires-Dist: dataclasses-json; extra == "singlestore"
518
+ Requires-Dist: tqdm; extra == "singlestore"
519
+ Requires-Dist: python-dateutil; extra == "singlestore"
520
+ Requires-Dist: pydantic; extra == "singlestore"
521
+ Requires-Dist: click; extra == "singlestore"
522
+ Requires-Dist: singlestoredb; extra == "singlestore"
523
+ Provides-Extra: slack
524
+ Requires-Dist: click; extra == "slack"
525
+ Requires-Dist: pandas; extra == "slack"
526
+ Requires-Dist: dataclasses-json; extra == "slack"
527
+ Requires-Dist: tqdm; extra == "slack"
528
+ Requires-Dist: python-dateutil; extra == "slack"
529
+ Requires-Dist: pydantic; extra == "slack"
530
+ Requires-Dist: slack-sdk; extra == "slack"
531
+ Provides-Extra: tsv
532
+ Requires-Dist: pandas; extra == "tsv"
533
+ Requires-Dist: dataclasses-json; extra == "tsv"
534
+ Requires-Dist: tqdm; extra == "tsv"
535
+ Requires-Dist: python-dateutil; extra == "tsv"
536
+ Requires-Dist: pydantic; extra == "tsv"
537
+ Requires-Dist: click; extra == "tsv"
538
+ Requires-Dist: unstructured[tsv]; extra == "tsv"
539
+ Provides-Extra: vectara
540
+ Requires-Dist: pandas; extra == "vectara"
541
+ Requires-Dist: dataclasses-json; extra == "vectara"
542
+ Requires-Dist: tqdm; extra == "vectara"
543
+ Requires-Dist: python-dateutil; extra == "vectara"
544
+ Requires-Dist: pydantic; extra == "vectara"
545
+ Requires-Dist: click; extra == "vectara"
546
+ Requires-Dist: requests; extra == "vectara"
547
+ Provides-Extra: weaviate
548
+ Requires-Dist: pandas; extra == "weaviate"
549
+ Requires-Dist: weaviate-client; extra == "weaviate"
550
+ Requires-Dist: dataclasses-json; extra == "weaviate"
551
+ Requires-Dist: tqdm; extra == "weaviate"
552
+ Requires-Dist: python-dateutil; extra == "weaviate"
553
+ Requires-Dist: pydantic; extra == "weaviate"
554
+ Requires-Dist: click; extra == "weaviate"
555
+ Provides-Extra: wikipedia
556
+ Requires-Dist: pandas; extra == "wikipedia"
557
+ Requires-Dist: wikipedia; extra == "wikipedia"
558
+ Requires-Dist: dataclasses-json; extra == "wikipedia"
559
+ Requires-Dist: tqdm; extra == "wikipedia"
560
+ Requires-Dist: python-dateutil; extra == "wikipedia"
561
+ Requires-Dist: pydantic; extra == "wikipedia"
562
+ Requires-Dist: click; extra == "wikipedia"
563
+ Provides-Extra: xlsx
564
+ Requires-Dist: pandas; extra == "xlsx"
565
+ Requires-Dist: dataclasses-json; extra == "xlsx"
566
+ Requires-Dist: tqdm; extra == "xlsx"
567
+ Requires-Dist: unstructured[xlsx]; extra == "xlsx"
568
+ Requires-Dist: python-dateutil; extra == "xlsx"
569
+ Requires-Dist: pydantic; extra == "xlsx"
570
+ Requires-Dist: click; extra == "xlsx"
571
+
572
+ # Unstructured Ingest
573
+
574
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.