unstructured-ingest 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (123) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/cli.py +6 -1
  3. unstructured_ingest/cli/cmds/__init__.py +4 -4
  4. unstructured_ingest/cli/cmds/{astra.py → astradb.py} +9 -9
  5. unstructured_ingest/cli/interfaces.py +13 -6
  6. unstructured_ingest/connector/{astra.py → astradb.py} +29 -29
  7. unstructured_ingest/connector/biomed.py +12 -5
  8. unstructured_ingest/connector/confluence.py +3 -3
  9. unstructured_ingest/connector/github.py +3 -2
  10. unstructured_ingest/connector/google_drive.py +1 -2
  11. unstructured_ingest/connector/mongodb.py +1 -2
  12. unstructured_ingest/connector/notion/client.py +31 -16
  13. unstructured_ingest/connector/notion/connector.py +3 -2
  14. unstructured_ingest/connector/registry.py +2 -2
  15. unstructured_ingest/connector/vectara.py +7 -2
  16. unstructured_ingest/interfaces.py +13 -9
  17. unstructured_ingest/pipeline/interfaces.py +8 -3
  18. unstructured_ingest/pipeline/reformat/chunking.py +13 -9
  19. unstructured_ingest/pipeline/reformat/embedding.py +3 -3
  20. unstructured_ingest/runner/__init__.py +2 -2
  21. unstructured_ingest/runner/{astra.py → astradb.py} +7 -7
  22. unstructured_ingest/runner/writers/__init__.py +2 -2
  23. unstructured_ingest/runner/writers/{astra.py → astradb.py} +7 -7
  24. unstructured_ingest/utils/chunking.py +45 -0
  25. unstructured_ingest/utils/dep_check.py +1 -1
  26. unstructured_ingest/utils/google_filetype.py +9 -0
  27. unstructured_ingest/v2/cli/base/cmd.py +57 -13
  28. unstructured_ingest/v2/cli/base/dest.py +21 -12
  29. unstructured_ingest/v2/cli/base/src.py +35 -23
  30. unstructured_ingest/v2/cli/cmds.py +14 -0
  31. unstructured_ingest/v2/cli/{utils.py → utils/click.py} +36 -89
  32. unstructured_ingest/v2/cli/utils/model_conversion.py +199 -0
  33. unstructured_ingest/v2/interfaces/connector.py +5 -7
  34. unstructured_ingest/v2/interfaces/downloader.py +8 -5
  35. unstructured_ingest/v2/interfaces/file_data.py +8 -2
  36. unstructured_ingest/v2/interfaces/indexer.py +3 -4
  37. unstructured_ingest/v2/interfaces/processor.py +10 -10
  38. unstructured_ingest/v2/interfaces/upload_stager.py +3 -3
  39. unstructured_ingest/v2/interfaces/uploader.py +3 -3
  40. unstructured_ingest/v2/pipeline/pipeline.py +1 -5
  41. unstructured_ingest/v2/pipeline/steps/chunk.py +5 -11
  42. unstructured_ingest/v2/pipeline/steps/download.py +13 -11
  43. unstructured_ingest/v2/pipeline/steps/embed.py +5 -11
  44. unstructured_ingest/v2/pipeline/steps/filter.py +1 -6
  45. unstructured_ingest/v2/pipeline/steps/index.py +14 -10
  46. unstructured_ingest/v2/pipeline/steps/partition.py +5 -5
  47. unstructured_ingest/v2/pipeline/steps/stage.py +4 -7
  48. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -6
  49. unstructured_ingest/v2/pipeline/steps/upload.py +2 -9
  50. unstructured_ingest/v2/processes/__init__.py +18 -0
  51. unstructured_ingest/v2/processes/chunker.py +74 -28
  52. unstructured_ingest/v2/processes/connector_registry.py +8 -2
  53. unstructured_ingest/v2/processes/connectors/__init__.py +13 -3
  54. unstructured_ingest/v2/processes/connectors/{astra.py → astradb.py} +45 -35
  55. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +30 -27
  56. unstructured_ingest/v2/processes/connectors/chroma.py +30 -21
  57. unstructured_ingest/v2/processes/connectors/couchbase.py +151 -0
  58. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +87 -32
  59. unstructured_ingest/v2/processes/connectors/elasticsearch.py +70 -45
  60. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +39 -16
  61. unstructured_ingest/v2/processes/connectors/fsspec/box.py +15 -13
  62. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +10 -11
  63. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +20 -34
  64. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +38 -13
  65. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +31 -17
  66. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -28
  67. unstructured_ingest/v2/processes/connectors/google_drive.py +40 -34
  68. unstructured_ingest/v2/processes/connectors/local.py +22 -14
  69. unstructured_ingest/v2/processes/connectors/milvus.py +22 -18
  70. unstructured_ingest/v2/processes/connectors/mongodb.py +22 -18
  71. unstructured_ingest/v2/processes/connectors/onedrive.py +17 -14
  72. unstructured_ingest/v2/processes/connectors/opensearch.py +66 -56
  73. unstructured_ingest/v2/processes/connectors/pinecone.py +23 -20
  74. unstructured_ingest/v2/processes/connectors/salesforce.py +26 -18
  75. unstructured_ingest/v2/processes/connectors/sharepoint.py +51 -26
  76. unstructured_ingest/v2/processes/connectors/singlestore.py +11 -15
  77. unstructured_ingest/v2/processes/connectors/sql.py +29 -31
  78. unstructured_ingest/v2/processes/connectors/weaviate.py +22 -13
  79. unstructured_ingest/v2/processes/embedder.py +106 -47
  80. unstructured_ingest/v2/processes/filter.py +11 -5
  81. unstructured_ingest/v2/processes/partitioner.py +79 -33
  82. unstructured_ingest/v2/processes/uncompress.py +3 -3
  83. unstructured_ingest/v2/utils.py +45 -0
  84. unstructured_ingest-0.0.4.dist-info/METADATA +571 -0
  85. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.4.dist-info}/RECORD +89 -116
  86. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.4.dist-info}/WHEEL +1 -1
  87. unstructured_ingest/v2/cli/cmds/__init__.py +0 -89
  88. unstructured_ingest/v2/cli/cmds/astra.py +0 -85
  89. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +0 -72
  90. unstructured_ingest/v2/cli/cmds/chroma.py +0 -108
  91. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +0 -161
  92. unstructured_ingest/v2/cli/cmds/elasticsearch.py +0 -159
  93. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +0 -84
  94. unstructured_ingest/v2/cli/cmds/fsspec/box.py +0 -58
  95. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +0 -58
  96. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +0 -69
  97. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +0 -81
  98. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +0 -84
  99. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +0 -80
  100. unstructured_ingest/v2/cli/cmds/google_drive.py +0 -74
  101. unstructured_ingest/v2/cli/cmds/local.py +0 -52
  102. unstructured_ingest/v2/cli/cmds/milvus.py +0 -72
  103. unstructured_ingest/v2/cli/cmds/mongodb.py +0 -62
  104. unstructured_ingest/v2/cli/cmds/onedrive.py +0 -91
  105. unstructured_ingest/v2/cli/cmds/opensearch.py +0 -93
  106. unstructured_ingest/v2/cli/cmds/pinecone.py +0 -62
  107. unstructured_ingest/v2/cli/cmds/salesforce.py +0 -79
  108. unstructured_ingest/v2/cli/cmds/sharepoint.py +0 -112
  109. unstructured_ingest/v2/cli/cmds/singlestore.py +0 -96
  110. unstructured_ingest/v2/cli/cmds/sql.py +0 -84
  111. unstructured_ingest/v2/cli/cmds/weaviate.py +0 -100
  112. unstructured_ingest/v2/cli/configs/__init__.py +0 -13
  113. unstructured_ingest/v2/cli/configs/chunk.py +0 -89
  114. unstructured_ingest/v2/cli/configs/embed.py +0 -74
  115. unstructured_ingest/v2/cli/configs/filter.py +0 -28
  116. unstructured_ingest/v2/cli/configs/partition.py +0 -99
  117. unstructured_ingest/v2/cli/configs/processor.py +0 -88
  118. unstructured_ingest/v2/cli/interfaces.py +0 -27
  119. unstructured_ingest/v2/pipeline/utils.py +0 -15
  120. unstructured_ingest-0.0.3.dist-info/METADATA +0 -175
  121. /unstructured_ingest/v2/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  122. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.4.dist-info}/entry_points.txt +0 -0
  123. {unstructured_ingest-0.0.3.dist-info → unstructured_ingest-0.0.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,571 @@
1
+ Metadata-Version: 2.1
2
+ Name: unstructured-ingest
3
+ Version: 0.0.4
4
+ Summary: A library that prepares raw documents for downstream ML tasks.
5
+ Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
+ Author: Unstructured Technologies
7
+ Author-email: devops@unstructuredai.io
8
+ License: Apache-2.0
9
+ Keywords: NLP PDF HTML CV XML parsing preprocessing
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9.0,<3.13
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: dataclasses-json
25
+ Requires-Dist: tqdm
26
+ Requires-Dist: click
27
+ Requires-Dist: pydantic
28
+ Requires-Dist: python-dateutil
29
+ Requires-Dist: pandas
30
+ Provides-Extra: airtable
31
+ Requires-Dist: dataclasses-json; extra == "airtable"
32
+ Requires-Dist: tqdm; extra == "airtable"
33
+ Requires-Dist: click; extra == "airtable"
34
+ Requires-Dist: pydantic; extra == "airtable"
35
+ Requires-Dist: pyairtable; extra == "airtable"
36
+ Requires-Dist: python-dateutil; extra == "airtable"
37
+ Requires-Dist: pandas; extra == "airtable"
38
+ Provides-Extra: astradb
39
+ Requires-Dist: dataclasses-json; extra == "astradb"
40
+ Requires-Dist: tqdm; extra == "astradb"
41
+ Requires-Dist: astrapy; extra == "astradb"
42
+ Requires-Dist: click; extra == "astradb"
43
+ Requires-Dist: pydantic; extra == "astradb"
44
+ Requires-Dist: python-dateutil; extra == "astradb"
45
+ Requires-Dist: pandas; extra == "astradb"
46
+ Provides-Extra: azure
47
+ Requires-Dist: dataclasses-json; extra == "azure"
48
+ Requires-Dist: adlfs; extra == "azure"
49
+ Requires-Dist: tqdm; extra == "azure"
50
+ Requires-Dist: click; extra == "azure"
51
+ Requires-Dist: pydantic; extra == "azure"
52
+ Requires-Dist: fsspec; extra == "azure"
53
+ Requires-Dist: python-dateutil; extra == "azure"
54
+ Requires-Dist: pandas; extra == "azure"
55
+ Provides-Extra: azure-cognitive-search
56
+ Requires-Dist: dataclasses-json; extra == "azure-cognitive-search"
57
+ Requires-Dist: pandas; extra == "azure-cognitive-search"
58
+ Requires-Dist: tqdm; extra == "azure-cognitive-search"
59
+ Requires-Dist: click; extra == "azure-cognitive-search"
60
+ Requires-Dist: pydantic; extra == "azure-cognitive-search"
61
+ Requires-Dist: python-dateutil; extra == "azure-cognitive-search"
62
+ Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
63
+ Provides-Extra: bedrock
64
+ Requires-Dist: dataclasses-json; extra == "bedrock"
65
+ Requires-Dist: boto3; extra == "bedrock"
66
+ Requires-Dist: tqdm; extra == "bedrock"
67
+ Requires-Dist: click; extra == "bedrock"
68
+ Requires-Dist: pydantic; extra == "bedrock"
69
+ Requires-Dist: unstructured; extra == "bedrock"
70
+ Requires-Dist: langchain-community; extra == "bedrock"
71
+ Requires-Dist: python-dateutil; extra == "bedrock"
72
+ Requires-Dist: pandas; extra == "bedrock"
73
+ Provides-Extra: biomed
74
+ Requires-Dist: dataclasses-json; extra == "biomed"
75
+ Requires-Dist: tqdm; extra == "biomed"
76
+ Requires-Dist: requests; extra == "biomed"
77
+ Requires-Dist: click; extra == "biomed"
78
+ Requires-Dist: pydantic; extra == "biomed"
79
+ Requires-Dist: python-dateutil; extra == "biomed"
80
+ Requires-Dist: pandas; extra == "biomed"
81
+ Requires-Dist: bs4; extra == "biomed"
82
+ Provides-Extra: box
83
+ Requires-Dist: dataclasses-json; extra == "box"
84
+ Requires-Dist: tqdm; extra == "box"
85
+ Requires-Dist: click; extra == "box"
86
+ Requires-Dist: pydantic; extra == "box"
87
+ Requires-Dist: boxfs; extra == "box"
88
+ Requires-Dist: fsspec; extra == "box"
89
+ Requires-Dist: python-dateutil; extra == "box"
90
+ Requires-Dist: pandas; extra == "box"
91
+ Provides-Extra: chroma
92
+ Requires-Dist: dataclasses-json; extra == "chroma"
93
+ Requires-Dist: chromadb; extra == "chroma"
94
+ Requires-Dist: tqdm; extra == "chroma"
95
+ Requires-Dist: click; extra == "chroma"
96
+ Requires-Dist: pydantic; extra == "chroma"
97
+ Requires-Dist: importlib-metadata>=7.1.0; extra == "chroma"
98
+ Requires-Dist: python-dateutil; extra == "chroma"
99
+ Requires-Dist: pandas; extra == "chroma"
100
+ Requires-Dist: typer<=0.9.0; extra == "chroma"
101
+ Provides-Extra: clarifai
102
+ Requires-Dist: dataclasses-json; extra == "clarifai"
103
+ Requires-Dist: tqdm; extra == "clarifai"
104
+ Requires-Dist: clarifai; extra == "clarifai"
105
+ Requires-Dist: pydantic; extra == "clarifai"
106
+ Requires-Dist: click; extra == "clarifai"
107
+ Requires-Dist: python-dateutil; extra == "clarifai"
108
+ Requires-Dist: pandas; extra == "clarifai"
109
+ Provides-Extra: confluence
110
+ Requires-Dist: dataclasses-json; extra == "confluence"
111
+ Requires-Dist: tqdm; extra == "confluence"
112
+ Requires-Dist: requests; extra == "confluence"
113
+ Requires-Dist: click; extra == "confluence"
114
+ Requires-Dist: pydantic; extra == "confluence"
115
+ Requires-Dist: atlassian-python-api; extra == "confluence"
116
+ Requires-Dist: python-dateutil; extra == "confluence"
117
+ Requires-Dist: pandas; extra == "confluence"
118
+ Provides-Extra: couchbase
119
+ Requires-Dist: dataclasses-json; extra == "couchbase"
120
+ Requires-Dist: tqdm; extra == "couchbase"
121
+ Requires-Dist: click; extra == "couchbase"
122
+ Requires-Dist: pydantic; extra == "couchbase"
123
+ Requires-Dist: couchbase; extra == "couchbase"
124
+ Requires-Dist: python-dateutil; extra == "couchbase"
125
+ Requires-Dist: pandas; extra == "couchbase"
126
+ Provides-Extra: csv
127
+ Requires-Dist: unstructured[tsv]; extra == "csv"
128
+ Requires-Dist: dataclasses-json; extra == "csv"
129
+ Requires-Dist: tqdm; extra == "csv"
130
+ Requires-Dist: click; extra == "csv"
131
+ Requires-Dist: pydantic; extra == "csv"
132
+ Requires-Dist: python-dateutil; extra == "csv"
133
+ Requires-Dist: pandas; extra == "csv"
134
+ Provides-Extra: databricks-volumes
135
+ Requires-Dist: dataclasses-json; extra == "databricks-volumes"
136
+ Requires-Dist: tqdm; extra == "databricks-volumes"
137
+ Requires-Dist: click; extra == "databricks-volumes"
138
+ Requires-Dist: pydantic; extra == "databricks-volumes"
139
+ Requires-Dist: databricks-sdk; extra == "databricks-volumes"
140
+ Requires-Dist: python-dateutil; extra == "databricks-volumes"
141
+ Requires-Dist: pandas; extra == "databricks-volumes"
142
+ Provides-Extra: delta-table
143
+ Requires-Dist: dataclasses-json; extra == "delta-table"
144
+ Requires-Dist: tqdm; extra == "delta-table"
145
+ Requires-Dist: click; extra == "delta-table"
146
+ Requires-Dist: pydantic; extra == "delta-table"
147
+ Requires-Dist: fsspec; extra == "delta-table"
148
+ Requires-Dist: python-dateutil; extra == "delta-table"
149
+ Requires-Dist: deltalake; extra == "delta-table"
150
+ Requires-Dist: pandas; extra == "delta-table"
151
+ Provides-Extra: discord
152
+ Requires-Dist: dataclasses-json; extra == "discord"
153
+ Requires-Dist: tqdm; extra == "discord"
154
+ Requires-Dist: click; extra == "discord"
155
+ Requires-Dist: pydantic; extra == "discord"
156
+ Requires-Dist: python-dateutil; extra == "discord"
157
+ Requires-Dist: pandas; extra == "discord"
158
+ Requires-Dist: discord-py; extra == "discord"
159
+ Provides-Extra: doc
160
+ Requires-Dist: dataclasses-json; extra == "doc"
161
+ Requires-Dist: tqdm; extra == "doc"
162
+ Requires-Dist: click; extra == "doc"
163
+ Requires-Dist: pydantic; extra == "doc"
164
+ Requires-Dist: unstructured[docx]; extra == "doc"
165
+ Requires-Dist: python-dateutil; extra == "doc"
166
+ Requires-Dist: pandas; extra == "doc"
167
+ Provides-Extra: docx
168
+ Requires-Dist: dataclasses-json; extra == "docx"
169
+ Requires-Dist: tqdm; extra == "docx"
170
+ Requires-Dist: click; extra == "docx"
171
+ Requires-Dist: pydantic; extra == "docx"
172
+ Requires-Dist: unstructured[docx]; extra == "docx"
173
+ Requires-Dist: python-dateutil; extra == "docx"
174
+ Requires-Dist: pandas; extra == "docx"
175
+ Provides-Extra: dropbox
176
+ Requires-Dist: dataclasses-json; extra == "dropbox"
177
+ Requires-Dist: tqdm; extra == "dropbox"
178
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
179
+ Requires-Dist: click; extra == "dropbox"
180
+ Requires-Dist: pydantic; extra == "dropbox"
181
+ Requires-Dist: fsspec; extra == "dropbox"
182
+ Requires-Dist: python-dateutil; extra == "dropbox"
183
+ Requires-Dist: pandas; extra == "dropbox"
184
+ Provides-Extra: elasticsearch
185
+ Requires-Dist: dataclasses-json; extra == "elasticsearch"
186
+ Requires-Dist: tqdm; extra == "elasticsearch"
187
+ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
188
+ Requires-Dist: pydantic; extra == "elasticsearch"
189
+ Requires-Dist: click; extra == "elasticsearch"
190
+ Requires-Dist: python-dateutil; extra == "elasticsearch"
191
+ Requires-Dist: pandas; extra == "elasticsearch"
192
+ Provides-Extra: embed-huggingface
193
+ Requires-Dist: dataclasses-json; extra == "embed-huggingface"
194
+ Requires-Dist: tqdm; extra == "embed-huggingface"
195
+ Requires-Dist: click; extra == "embed-huggingface"
196
+ Requires-Dist: pydantic; extra == "embed-huggingface"
197
+ Requires-Dist: unstructured; extra == "embed-huggingface"
198
+ Requires-Dist: langchain-huggingface; extra == "embed-huggingface"
199
+ Requires-Dist: python-dateutil; extra == "embed-huggingface"
200
+ Requires-Dist: pandas; extra == "embed-huggingface"
201
+ Provides-Extra: embed-octoai
202
+ Requires-Dist: dataclasses-json; extra == "embed-octoai"
203
+ Requires-Dist: tqdm; extra == "embed-octoai"
204
+ Requires-Dist: tiktoken; extra == "embed-octoai"
205
+ Requires-Dist: click; extra == "embed-octoai"
206
+ Requires-Dist: pydantic; extra == "embed-octoai"
207
+ Requires-Dist: unstructured; extra == "embed-octoai"
208
+ Requires-Dist: openai; extra == "embed-octoai"
209
+ Requires-Dist: python-dateutil; extra == "embed-octoai"
210
+ Requires-Dist: pandas; extra == "embed-octoai"
211
+ Provides-Extra: embed-vertexai
212
+ Requires-Dist: dataclasses-json; extra == "embed-vertexai"
213
+ Requires-Dist: tqdm; extra == "embed-vertexai"
214
+ Requires-Dist: langchain-google-vertexai; extra == "embed-vertexai"
215
+ Requires-Dist: click; extra == "embed-vertexai"
216
+ Requires-Dist: pydantic; extra == "embed-vertexai"
217
+ Requires-Dist: langchain; extra == "embed-vertexai"
218
+ Requires-Dist: unstructured; extra == "embed-vertexai"
219
+ Requires-Dist: langchain-community; extra == "embed-vertexai"
220
+ Requires-Dist: python-dateutil; extra == "embed-vertexai"
221
+ Requires-Dist: pandas; extra == "embed-vertexai"
222
+ Provides-Extra: embed-voyageai
223
+ Requires-Dist: dataclasses-json; extra == "embed-voyageai"
224
+ Requires-Dist: tqdm; extra == "embed-voyageai"
225
+ Requires-Dist: langchain-voyageai; extra == "embed-voyageai"
226
+ Requires-Dist: click; extra == "embed-voyageai"
227
+ Requires-Dist: pydantic; extra == "embed-voyageai"
228
+ Requires-Dist: langchain; extra == "embed-voyageai"
229
+ Requires-Dist: unstructured; extra == "embed-voyageai"
230
+ Requires-Dist: python-dateutil; extra == "embed-voyageai"
231
+ Requires-Dist: pandas; extra == "embed-voyageai"
232
+ Provides-Extra: epub
233
+ Requires-Dist: dataclasses-json; extra == "epub"
234
+ Requires-Dist: tqdm; extra == "epub"
235
+ Requires-Dist: click; extra == "epub"
236
+ Requires-Dist: pydantic; extra == "epub"
237
+ Requires-Dist: unstructured[epub]; extra == "epub"
238
+ Requires-Dist: python-dateutil; extra == "epub"
239
+ Requires-Dist: pandas; extra == "epub"
240
+ Provides-Extra: gcs
241
+ Requires-Dist: dataclasses-json; extra == "gcs"
242
+ Requires-Dist: tqdm; extra == "gcs"
243
+ Requires-Dist: gcsfs; extra == "gcs"
244
+ Requires-Dist: click; extra == "gcs"
245
+ Requires-Dist: pydantic; extra == "gcs"
246
+ Requires-Dist: fsspec; extra == "gcs"
247
+ Requires-Dist: python-dateutil; extra == "gcs"
248
+ Requires-Dist: pandas; extra == "gcs"
249
+ Requires-Dist: bs4; extra == "gcs"
250
+ Provides-Extra: github
251
+ Requires-Dist: dataclasses-json; extra == "github"
252
+ Requires-Dist: pygithub>1.58.0; extra == "github"
253
+ Requires-Dist: tqdm; extra == "github"
254
+ Requires-Dist: requests; extra == "github"
255
+ Requires-Dist: click; extra == "github"
256
+ Requires-Dist: pydantic; extra == "github"
257
+ Requires-Dist: python-dateutil; extra == "github"
258
+ Requires-Dist: pandas; extra == "github"
259
+ Provides-Extra: gitlab
260
+ Requires-Dist: dataclasses-json; extra == "gitlab"
261
+ Requires-Dist: tqdm; extra == "gitlab"
262
+ Requires-Dist: click; extra == "gitlab"
263
+ Requires-Dist: pydantic; extra == "gitlab"
264
+ Requires-Dist: python-gitlab; extra == "gitlab"
265
+ Requires-Dist: python-dateutil; extra == "gitlab"
266
+ Requires-Dist: pandas; extra == "gitlab"
267
+ Provides-Extra: google-drive
268
+ Requires-Dist: dataclasses-json; extra == "google-drive"
269
+ Requires-Dist: python-dateutil; extra == "google-drive"
270
+ Requires-Dist: tqdm; extra == "google-drive"
271
+ Requires-Dist: click; extra == "google-drive"
272
+ Requires-Dist: pydantic; extra == "google-drive"
273
+ Requires-Dist: google-api-python-client; extra == "google-drive"
274
+ Requires-Dist: pandas; extra == "google-drive"
275
+ Provides-Extra: hubspot
276
+ Requires-Dist: dataclasses-json; extra == "hubspot"
277
+ Requires-Dist: tqdm; extra == "hubspot"
278
+ Requires-Dist: click; extra == "hubspot"
279
+ Requires-Dist: pydantic; extra == "hubspot"
280
+ Requires-Dist: urllib3; extra == "hubspot"
281
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
282
+ Requires-Dist: python-dateutil; extra == "hubspot"
283
+ Requires-Dist: pandas; extra == "hubspot"
284
+ Provides-Extra: jira
285
+ Requires-Dist: dataclasses-json; extra == "jira"
286
+ Requires-Dist: tqdm; extra == "jira"
287
+ Requires-Dist: click; extra == "jira"
288
+ Requires-Dist: pydantic; extra == "jira"
289
+ Requires-Dist: atlassian-python-api; extra == "jira"
290
+ Requires-Dist: python-dateutil; extra == "jira"
291
+ Requires-Dist: pandas; extra == "jira"
292
+ Provides-Extra: kafka
293
+ Requires-Dist: dataclasses-json; extra == "kafka"
294
+ Requires-Dist: tqdm; extra == "kafka"
295
+ Requires-Dist: click; extra == "kafka"
296
+ Requires-Dist: pydantic; extra == "kafka"
297
+ Requires-Dist: confluent-kafka; extra == "kafka"
298
+ Requires-Dist: python-dateutil; extra == "kafka"
299
+ Requires-Dist: pandas; extra == "kafka"
300
+ Provides-Extra: md
301
+ Requires-Dist: dataclasses-json; extra == "md"
302
+ Requires-Dist: tqdm; extra == "md"
303
+ Requires-Dist: click; extra == "md"
304
+ Requires-Dist: pydantic; extra == "md"
305
+ Requires-Dist: unstructured[md]; extra == "md"
306
+ Requires-Dist: python-dateutil; extra == "md"
307
+ Requires-Dist: pandas; extra == "md"
308
+ Provides-Extra: milvus
309
+ Requires-Dist: dataclasses-json; extra == "milvus"
310
+ Requires-Dist: tqdm; extra == "milvus"
311
+ Requires-Dist: click; extra == "milvus"
312
+ Requires-Dist: pydantic; extra == "milvus"
313
+ Requires-Dist: pymilvus; extra == "milvus"
314
+ Requires-Dist: python-dateutil; extra == "milvus"
315
+ Requires-Dist: pandas; extra == "milvus"
316
+ Provides-Extra: mongodb
317
+ Requires-Dist: dataclasses-json; extra == "mongodb"
318
+ Requires-Dist: tqdm; extra == "mongodb"
319
+ Requires-Dist: click; extra == "mongodb"
320
+ Requires-Dist: pydantic; extra == "mongodb"
321
+ Requires-Dist: pymongo; extra == "mongodb"
322
+ Requires-Dist: python-dateutil; extra == "mongodb"
323
+ Requires-Dist: pandas; extra == "mongodb"
324
+ Provides-Extra: msg
325
+ Requires-Dist: dataclasses-json; extra == "msg"
326
+ Requires-Dist: tqdm; extra == "msg"
327
+ Requires-Dist: click; extra == "msg"
328
+ Requires-Dist: pydantic; extra == "msg"
329
+ Requires-Dist: python-dateutil; extra == "msg"
330
+ Requires-Dist: unstructured[msg]; extra == "msg"
331
+ Requires-Dist: pandas; extra == "msg"
332
+ Provides-Extra: notion
333
+ Requires-Dist: dataclasses-json; extra == "notion"
334
+ Requires-Dist: htmlBuilder; extra == "notion"
335
+ Requires-Dist: tqdm; extra == "notion"
336
+ Requires-Dist: click; extra == "notion"
337
+ Requires-Dist: pydantic; extra == "notion"
338
+ Requires-Dist: notion-client; extra == "notion"
339
+ Requires-Dist: httpx; extra == "notion"
340
+ Requires-Dist: python-dateutil; extra == "notion"
341
+ Requires-Dist: pandas; extra == "notion"
342
+ Requires-Dist: backoff; extra == "notion"
343
+ Provides-Extra: odt
344
+ Requires-Dist: dataclasses-json; extra == "odt"
345
+ Requires-Dist: tqdm; extra == "odt"
346
+ Requires-Dist: click; extra == "odt"
347
+ Requires-Dist: pydantic; extra == "odt"
348
+ Requires-Dist: unstructured[odt]; extra == "odt"
349
+ Requires-Dist: python-dateutil; extra == "odt"
350
+ Requires-Dist: pandas; extra == "odt"
351
+ Provides-Extra: onedrive
352
+ Requires-Dist: dataclasses-json; extra == "onedrive"
353
+ Requires-Dist: tqdm; extra == "onedrive"
354
+ Requires-Dist: click; extra == "onedrive"
355
+ Requires-Dist: pydantic; extra == "onedrive"
356
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
357
+ Requires-Dist: msal; extra == "onedrive"
358
+ Requires-Dist: python-dateutil; extra == "onedrive"
359
+ Requires-Dist: pandas; extra == "onedrive"
360
+ Requires-Dist: bs4; extra == "onedrive"
361
+ Provides-Extra: openai
362
+ Requires-Dist: dataclasses-json; extra == "openai"
363
+ Requires-Dist: tqdm; extra == "openai"
364
+ Requires-Dist: tiktoken; extra == "openai"
365
+ Requires-Dist: click; extra == "openai"
366
+ Requires-Dist: pydantic; extra == "openai"
367
+ Requires-Dist: unstructured; extra == "openai"
368
+ Requires-Dist: openai; extra == "openai"
369
+ Requires-Dist: langchain-community; extra == "openai"
370
+ Requires-Dist: python-dateutil; extra == "openai"
371
+ Requires-Dist: pandas; extra == "openai"
372
+ Provides-Extra: opensearch
373
+ Requires-Dist: dataclasses-json; extra == "opensearch"
374
+ Requires-Dist: tqdm; extra == "opensearch"
375
+ Requires-Dist: click; extra == "opensearch"
376
+ Requires-Dist: pydantic; extra == "opensearch"
377
+ Requires-Dist: opensearch-py; extra == "opensearch"
378
+ Requires-Dist: python-dateutil; extra == "opensearch"
379
+ Requires-Dist: pandas; extra == "opensearch"
380
+ Provides-Extra: org
381
+ Requires-Dist: dataclasses-json; extra == "org"
382
+ Requires-Dist: tqdm; extra == "org"
383
+ Requires-Dist: click; extra == "org"
384
+ Requires-Dist: pydantic; extra == "org"
385
+ Requires-Dist: unstructured[org]; extra == "org"
386
+ Requires-Dist: python-dateutil; extra == "org"
387
+ Requires-Dist: pandas; extra == "org"
388
+ Provides-Extra: outlook
389
+ Requires-Dist: dataclasses-json; extra == "outlook"
390
+ Requires-Dist: tqdm; extra == "outlook"
391
+ Requires-Dist: click; extra == "outlook"
392
+ Requires-Dist: pydantic; extra == "outlook"
393
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
394
+ Requires-Dist: msal; extra == "outlook"
395
+ Requires-Dist: python-dateutil; extra == "outlook"
396
+ Requires-Dist: pandas; extra == "outlook"
397
+ Provides-Extra: pdf
398
+ Requires-Dist: dataclasses-json; extra == "pdf"
399
+ Requires-Dist: tqdm; extra == "pdf"
400
+ Requires-Dist: click; extra == "pdf"
401
+ Requires-Dist: pydantic; extra == "pdf"
402
+ Requires-Dist: unstructured[pdf]; extra == "pdf"
403
+ Requires-Dist: python-dateutil; extra == "pdf"
404
+ Requires-Dist: pandas; extra == "pdf"
405
+ Provides-Extra: pinecone
406
+ Requires-Dist: dataclasses-json; extra == "pinecone"
407
+ Requires-Dist: tqdm; extra == "pinecone"
408
+ Requires-Dist: click; extra == "pinecone"
409
+ Requires-Dist: pydantic; extra == "pinecone"
410
+ Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
411
+ Requires-Dist: python-dateutil; extra == "pinecone"
412
+ Requires-Dist: pandas; extra == "pinecone"
413
+ Provides-Extra: postgres
414
+ Requires-Dist: dataclasses-json; extra == "postgres"
415
+ Requires-Dist: tqdm; extra == "postgres"
416
+ Requires-Dist: click; extra == "postgres"
417
+ Requires-Dist: pydantic; extra == "postgres"
418
+ Requires-Dist: python-dateutil; extra == "postgres"
419
+ Requires-Dist: pandas; extra == "postgres"
420
+ Requires-Dist: psycopg2-binary; extra == "postgres"
421
+ Provides-Extra: ppt
422
+ Requires-Dist: dataclasses-json; extra == "ppt"
423
+ Requires-Dist: tqdm; extra == "ppt"
424
+ Requires-Dist: unstructured[pptx]; extra == "ppt"
425
+ Requires-Dist: click; extra == "ppt"
426
+ Requires-Dist: pydantic; extra == "ppt"
427
+ Requires-Dist: python-dateutil; extra == "ppt"
428
+ Requires-Dist: pandas; extra == "ppt"
429
+ Provides-Extra: pptx
430
+ Requires-Dist: dataclasses-json; extra == "pptx"
431
+ Requires-Dist: tqdm; extra == "pptx"
432
+ Requires-Dist: unstructured[pptx]; extra == "pptx"
433
+ Requires-Dist: click; extra == "pptx"
434
+ Requires-Dist: pydantic; extra == "pptx"
435
+ Requires-Dist: python-dateutil; extra == "pptx"
436
+ Requires-Dist: pandas; extra == "pptx"
437
+ Provides-Extra: qdrant
438
+ Requires-Dist: dataclasses-json; extra == "qdrant"
439
+ Requires-Dist: tqdm; extra == "qdrant"
440
+ Requires-Dist: click; extra == "qdrant"
441
+ Requires-Dist: pydantic; extra == "qdrant"
442
+ Requires-Dist: qdrant-client; extra == "qdrant"
443
+ Requires-Dist: python-dateutil; extra == "qdrant"
444
+ Requires-Dist: pandas; extra == "qdrant"
445
+ Provides-Extra: reddit
446
+ Requires-Dist: dataclasses-json; extra == "reddit"
447
+ Requires-Dist: tqdm; extra == "reddit"
448
+ Requires-Dist: click; extra == "reddit"
449
+ Requires-Dist: pydantic; extra == "reddit"
450
+ Requires-Dist: praw; extra == "reddit"
451
+ Requires-Dist: python-dateutil; extra == "reddit"
452
+ Requires-Dist: pandas; extra == "reddit"
453
+ Provides-Extra: remote
454
+ Requires-Dist: dataclasses-json; extra == "remote"
455
+ Requires-Dist: tqdm; extra == "remote"
456
+ Requires-Dist: click; extra == "remote"
457
+ Requires-Dist: pydantic; extra == "remote"
458
+ Requires-Dist: unstructured-client; extra == "remote"
459
+ Requires-Dist: python-dateutil; extra == "remote"
460
+ Requires-Dist: pandas; extra == "remote"
461
+ Provides-Extra: rst
462
+ Requires-Dist: dataclasses-json; extra == "rst"
463
+ Requires-Dist: tqdm; extra == "rst"
464
+ Requires-Dist: unstructured[rst]; extra == "rst"
465
+ Requires-Dist: pydantic; extra == "rst"
466
+ Requires-Dist: click; extra == "rst"
467
+ Requires-Dist: python-dateutil; extra == "rst"
468
+ Requires-Dist: pandas; extra == "rst"
469
+ Provides-Extra: rtf
470
+ Requires-Dist: dataclasses-json; extra == "rtf"
471
+ Requires-Dist: tqdm; extra == "rtf"
472
+ Requires-Dist: click; extra == "rtf"
473
+ Requires-Dist: pydantic; extra == "rtf"
474
+ Requires-Dist: unstructured[rtf]; extra == "rtf"
475
+ Requires-Dist: python-dateutil; extra == "rtf"
476
+ Requires-Dist: pandas; extra == "rtf"
477
+ Provides-Extra: s3
478
+ Requires-Dist: dataclasses-json; extra == "s3"
479
+ Requires-Dist: tqdm; extra == "s3"
480
+ Requires-Dist: click; extra == "s3"
481
+ Requires-Dist: pydantic; extra == "s3"
482
+ Requires-Dist: s3fs; extra == "s3"
483
+ Requires-Dist: fsspec; extra == "s3"
484
+ Requires-Dist: python-dateutil; extra == "s3"
485
+ Requires-Dist: pandas; extra == "s3"
486
+ Provides-Extra: salesforce
487
+ Requires-Dist: dataclasses-json; extra == "salesforce"
488
+ Requires-Dist: tqdm; extra == "salesforce"
489
+ Requires-Dist: simple-salesforce; extra == "salesforce"
490
+ Requires-Dist: click; extra == "salesforce"
491
+ Requires-Dist: pydantic; extra == "salesforce"
492
+ Requires-Dist: python-dateutil; extra == "salesforce"
493
+ Requires-Dist: pandas; extra == "salesforce"
494
+ Provides-Extra: sftp
495
+ Requires-Dist: dataclasses-json; extra == "sftp"
496
+ Requires-Dist: tqdm; extra == "sftp"
497
+ Requires-Dist: click; extra == "sftp"
498
+ Requires-Dist: pydantic; extra == "sftp"
499
+ Requires-Dist: paramiko; extra == "sftp"
500
+ Requires-Dist: fsspec; extra == "sftp"
501
+ Requires-Dist: python-dateutil; extra == "sftp"
502
+ Requires-Dist: pandas; extra == "sftp"
503
+ Provides-Extra: sharepoint
504
+ Requires-Dist: dataclasses-json; extra == "sharepoint"
505
+ Requires-Dist: tqdm; extra == "sharepoint"
506
+ Requires-Dist: click; extra == "sharepoint"
507
+ Requires-Dist: pydantic; extra == "sharepoint"
508
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
509
+ Requires-Dist: msal; extra == "sharepoint"
510
+ Requires-Dist: python-dateutil; extra == "sharepoint"
511
+ Requires-Dist: pandas; extra == "sharepoint"
512
+ Provides-Extra: singlestore
513
+ Requires-Dist: dataclasses-json; extra == "singlestore"
514
+ Requires-Dist: singlestoredb; extra == "singlestore"
515
+ Requires-Dist: tqdm; extra == "singlestore"
516
+ Requires-Dist: click; extra == "singlestore"
517
+ Requires-Dist: pydantic; extra == "singlestore"
518
+ Requires-Dist: python-dateutil; extra == "singlestore"
519
+ Requires-Dist: pandas; extra == "singlestore"
520
+ Provides-Extra: slack
521
+ Requires-Dist: dataclasses-json; extra == "slack"
522
+ Requires-Dist: tqdm; extra == "slack"
523
+ Requires-Dist: click; extra == "slack"
524
+ Requires-Dist: pydantic; extra == "slack"
525
+ Requires-Dist: slack-sdk; extra == "slack"
526
+ Requires-Dist: python-dateutil; extra == "slack"
527
+ Requires-Dist: pandas; extra == "slack"
528
+ Provides-Extra: tsv
529
+ Requires-Dist: unstructured[tsv]; extra == "tsv"
530
+ Requires-Dist: dataclasses-json; extra == "tsv"
531
+ Requires-Dist: tqdm; extra == "tsv"
532
+ Requires-Dist: click; extra == "tsv"
533
+ Requires-Dist: pydantic; extra == "tsv"
534
+ Requires-Dist: python-dateutil; extra == "tsv"
535
+ Requires-Dist: pandas; extra == "tsv"
536
+ Provides-Extra: vectara
537
+ Requires-Dist: dataclasses-json; extra == "vectara"
538
+ Requires-Dist: tqdm; extra == "vectara"
539
+ Requires-Dist: requests; extra == "vectara"
540
+ Requires-Dist: click; extra == "vectara"
541
+ Requires-Dist: pydantic; extra == "vectara"
542
+ Requires-Dist: python-dateutil; extra == "vectara"
543
+ Requires-Dist: pandas; extra == "vectara"
544
+ Provides-Extra: weaviate
545
+ Requires-Dist: dataclasses-json; extra == "weaviate"
546
+ Requires-Dist: weaviate-client; extra == "weaviate"
547
+ Requires-Dist: tqdm; extra == "weaviate"
548
+ Requires-Dist: click; extra == "weaviate"
549
+ Requires-Dist: pydantic; extra == "weaviate"
550
+ Requires-Dist: python-dateutil; extra == "weaviate"
551
+ Requires-Dist: pandas; extra == "weaviate"
552
+ Provides-Extra: wikipedia
553
+ Requires-Dist: dataclasses-json; extra == "wikipedia"
554
+ Requires-Dist: python-dateutil; extra == "wikipedia"
555
+ Requires-Dist: tqdm; extra == "wikipedia"
556
+ Requires-Dist: click; extra == "wikipedia"
557
+ Requires-Dist: pydantic; extra == "wikipedia"
558
+ Requires-Dist: wikipedia; extra == "wikipedia"
559
+ Requires-Dist: pandas; extra == "wikipedia"
560
+ Provides-Extra: xlsx
561
+ Requires-Dist: dataclasses-json; extra == "xlsx"
562
+ Requires-Dist: unstructured[xlsx]; extra == "xlsx"
563
+ Requires-Dist: tqdm; extra == "xlsx"
564
+ Requires-Dist: click; extra == "xlsx"
565
+ Requires-Dist: pydantic; extra == "xlsx"
566
+ Requires-Dist: python-dateutil; extra == "xlsx"
567
+ Requires-Dist: pandas; extra == "xlsx"
568
+
569
+ # Unstructured Ingest
570
+
571
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.