unstructured-ingest 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.6.0" # pragma: no cover
1
+ __version__ = "0.6.1" # pragma: no cover
@@ -6,6 +6,7 @@ from typing import Any, Optional
6
6
  from pydantic import BaseModel, Field, SecretStr
7
7
 
8
8
  from unstructured_ingest.utils.chunking import assign_and_map_hash_ids
9
+ from unstructured_ingest.utils.data_prep import get_json_data
9
10
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
11
  from unstructured_ingest.v2.interfaces.process import BaseProcess
11
12
  from unstructured_ingest.v2.logger import logger
@@ -92,9 +93,11 @@ class Chunker(BaseProcess, ABC):
92
93
  @requires_dependencies(dependencies=["unstructured"])
93
94
  def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
94
95
  from unstructured.chunking import dispatch
95
- from unstructured.staging.base import elements_from_json
96
+ from unstructured.staging.base import elements_from_dicts
96
97
 
97
- elements = elements_from_json(filename=str(elements_filepath))
98
+ element_dicts = get_json_data(elements_filepath)
99
+
100
+ elements = elements_from_dicts(element_dicts=element_dicts)
98
101
  if not elements:
99
102
  return [e.to_dict() for e in elements]
100
103
  local_chunking_strategies = ("basic", "by_title")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,348 +22,348 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
+ Requires-Dist: click
26
+ Requires-Dist: tqdm
25
27
  Requires-Dist: opentelemetry-sdk
28
+ Requires-Dist: python-dateutil
26
29
  Requires-Dist: dataclasses_json
27
30
  Requires-Dist: pydantic>=2.7
28
- Requires-Dist: python-dateutil
29
- Requires-Dist: click
30
- Requires-Dist: tqdm
31
- Requires-Dist: pandas
32
31
  Requires-Dist: numpy
32
+ Requires-Dist: pandas
33
33
  Provides-Extra: remote
34
34
  Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
35
- Requires-Dist: pandas; extra == "remote"
36
35
  Requires-Dist: numpy; extra == "remote"
36
+ Requires-Dist: pandas; extra == "remote"
37
37
  Provides-Extra: csv
38
38
  Requires-Dist: unstructured[tsv]; extra == "csv"
39
- Requires-Dist: pandas; extra == "csv"
40
39
  Requires-Dist: numpy; extra == "csv"
40
+ Requires-Dist: pandas; extra == "csv"
41
41
  Provides-Extra: doc
42
42
  Requires-Dist: unstructured[docx]; extra == "doc"
43
- Requires-Dist: pandas; extra == "doc"
44
43
  Requires-Dist: numpy; extra == "doc"
44
+ Requires-Dist: pandas; extra == "doc"
45
45
  Provides-Extra: docx
46
46
  Requires-Dist: unstructured[docx]; extra == "docx"
47
- Requires-Dist: pandas; extra == "docx"
48
47
  Requires-Dist: numpy; extra == "docx"
48
+ Requires-Dist: pandas; extra == "docx"
49
49
  Provides-Extra: epub
50
50
  Requires-Dist: unstructured[epub]; extra == "epub"
51
- Requires-Dist: pandas; extra == "epub"
52
51
  Requires-Dist: numpy; extra == "epub"
52
+ Requires-Dist: pandas; extra == "epub"
53
53
  Provides-Extra: md
54
54
  Requires-Dist: unstructured[md]; extra == "md"
55
- Requires-Dist: pandas; extra == "md"
56
55
  Requires-Dist: numpy; extra == "md"
56
+ Requires-Dist: pandas; extra == "md"
57
57
  Provides-Extra: msg
58
58
  Requires-Dist: unstructured[msg]; extra == "msg"
59
- Requires-Dist: pandas; extra == "msg"
60
59
  Requires-Dist: numpy; extra == "msg"
60
+ Requires-Dist: pandas; extra == "msg"
61
61
  Provides-Extra: odt
62
62
  Requires-Dist: unstructured[odt]; extra == "odt"
63
- Requires-Dist: pandas; extra == "odt"
64
63
  Requires-Dist: numpy; extra == "odt"
64
+ Requires-Dist: pandas; extra == "odt"
65
65
  Provides-Extra: org
66
66
  Requires-Dist: unstructured[org]; extra == "org"
67
- Requires-Dist: pandas; extra == "org"
68
67
  Requires-Dist: numpy; extra == "org"
68
+ Requires-Dist: pandas; extra == "org"
69
69
  Provides-Extra: pdf
70
70
  Requires-Dist: unstructured[pdf]; extra == "pdf"
71
- Requires-Dist: pandas; extra == "pdf"
72
71
  Requires-Dist: numpy; extra == "pdf"
72
+ Requires-Dist: pandas; extra == "pdf"
73
73
  Provides-Extra: ppt
74
74
  Requires-Dist: unstructured[pptx]; extra == "ppt"
75
- Requires-Dist: pandas; extra == "ppt"
76
75
  Requires-Dist: numpy; extra == "ppt"
76
+ Requires-Dist: pandas; extra == "ppt"
77
77
  Provides-Extra: pptx
78
78
  Requires-Dist: unstructured[pptx]; extra == "pptx"
79
- Requires-Dist: pandas; extra == "pptx"
80
79
  Requires-Dist: numpy; extra == "pptx"
80
+ Requires-Dist: pandas; extra == "pptx"
81
81
  Provides-Extra: rtf
82
82
  Requires-Dist: unstructured[rtf]; extra == "rtf"
83
- Requires-Dist: pandas; extra == "rtf"
84
83
  Requires-Dist: numpy; extra == "rtf"
84
+ Requires-Dist: pandas; extra == "rtf"
85
85
  Provides-Extra: rst
86
86
  Requires-Dist: unstructured[rst]; extra == "rst"
87
- Requires-Dist: pandas; extra == "rst"
88
87
  Requires-Dist: numpy; extra == "rst"
88
+ Requires-Dist: pandas; extra == "rst"
89
89
  Provides-Extra: tsv
90
90
  Requires-Dist: unstructured[tsv]; extra == "tsv"
91
- Requires-Dist: pandas; extra == "tsv"
92
91
  Requires-Dist: numpy; extra == "tsv"
92
+ Requires-Dist: pandas; extra == "tsv"
93
93
  Provides-Extra: xlsx
94
94
  Requires-Dist: unstructured[xlsx]; extra == "xlsx"
95
- Requires-Dist: pandas; extra == "xlsx"
96
95
  Requires-Dist: numpy; extra == "xlsx"
96
+ Requires-Dist: pandas; extra == "xlsx"
97
97
  Provides-Extra: airtable
98
98
  Requires-Dist: pyairtable; extra == "airtable"
99
- Requires-Dist: pandas; extra == "airtable"
100
99
  Requires-Dist: numpy; extra == "airtable"
100
+ Requires-Dist: pandas; extra == "airtable"
101
101
  Provides-Extra: astradb
102
102
  Requires-Dist: astrapy; extra == "astradb"
103
- Requires-Dist: pandas; extra == "astradb"
104
103
  Requires-Dist: numpy; extra == "astradb"
104
+ Requires-Dist: pandas; extra == "astradb"
105
105
  Provides-Extra: azure
106
106
  Requires-Dist: adlfs; extra == "azure"
107
107
  Requires-Dist: fsspec; extra == "azure"
108
- Requires-Dist: pandas; extra == "azure"
109
108
  Requires-Dist: numpy; extra == "azure"
109
+ Requires-Dist: pandas; extra == "azure"
110
110
  Provides-Extra: azure-ai-search
111
111
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
112
- Requires-Dist: pandas; extra == "azure-ai-search"
113
112
  Requires-Dist: numpy; extra == "azure-ai-search"
113
+ Requires-Dist: pandas; extra == "azure-ai-search"
114
114
  Provides-Extra: biomed
115
115
  Requires-Dist: requests; extra == "biomed"
116
116
  Requires-Dist: bs4; extra == "biomed"
117
- Requires-Dist: pandas; extra == "biomed"
118
117
  Requires-Dist: numpy; extra == "biomed"
118
+ Requires-Dist: pandas; extra == "biomed"
119
119
  Provides-Extra: box
120
- Requires-Dist: boxfs; extra == "box"
121
120
  Requires-Dist: fsspec; extra == "box"
122
- Requires-Dist: pandas; extra == "box"
121
+ Requires-Dist: boxfs; extra == "box"
123
122
  Requires-Dist: numpy; extra == "box"
123
+ Requires-Dist: pandas; extra == "box"
124
124
  Provides-Extra: chroma
125
125
  Requires-Dist: chromadb; extra == "chroma"
126
- Requires-Dist: pandas; extra == "chroma"
127
126
  Requires-Dist: numpy; extra == "chroma"
127
+ Requires-Dist: pandas; extra == "chroma"
128
128
  Provides-Extra: clarifai
129
129
  Requires-Dist: clarifai; extra == "clarifai"
130
- Requires-Dist: pandas; extra == "clarifai"
131
130
  Requires-Dist: numpy; extra == "clarifai"
131
+ Requires-Dist: pandas; extra == "clarifai"
132
132
  Provides-Extra: confluence
133
133
  Requires-Dist: requests; extra == "confluence"
134
134
  Requires-Dist: atlassian-python-api; extra == "confluence"
135
- Requires-Dist: pandas; extra == "confluence"
136
135
  Requires-Dist: numpy; extra == "confluence"
136
+ Requires-Dist: pandas; extra == "confluence"
137
137
  Provides-Extra: couchbase
138
138
  Requires-Dist: couchbase; extra == "couchbase"
139
- Requires-Dist: pandas; extra == "couchbase"
140
139
  Requires-Dist: numpy; extra == "couchbase"
140
+ Requires-Dist: pandas; extra == "couchbase"
141
141
  Provides-Extra: delta-table
142
142
  Requires-Dist: boto3; extra == "delta-table"
143
143
  Requires-Dist: deltalake; extra == "delta-table"
144
- Requires-Dist: pandas; extra == "delta-table"
145
144
  Requires-Dist: numpy; extra == "delta-table"
145
+ Requires-Dist: pandas; extra == "delta-table"
146
146
  Provides-Extra: discord
147
147
  Requires-Dist: discord.py; extra == "discord"
148
- Requires-Dist: pandas; extra == "discord"
149
148
  Requires-Dist: numpy; extra == "discord"
149
+ Requires-Dist: pandas; extra == "discord"
150
150
  Provides-Extra: dropbox
151
- Requires-Dist: fsspec; extra == "dropbox"
152
151
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
153
- Requires-Dist: pandas; extra == "dropbox"
152
+ Requires-Dist: fsspec; extra == "dropbox"
154
153
  Requires-Dist: numpy; extra == "dropbox"
154
+ Requires-Dist: pandas; extra == "dropbox"
155
155
  Provides-Extra: duckdb
156
156
  Requires-Dist: duckdb; extra == "duckdb"
157
- Requires-Dist: pandas; extra == "duckdb"
158
157
  Requires-Dist: numpy; extra == "duckdb"
158
+ Requires-Dist: pandas; extra == "duckdb"
159
159
  Provides-Extra: elasticsearch
160
160
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
161
- Requires-Dist: pandas; extra == "elasticsearch"
162
161
  Requires-Dist: numpy; extra == "elasticsearch"
162
+ Requires-Dist: pandas; extra == "elasticsearch"
163
163
  Provides-Extra: gcs
164
+ Requires-Dist: bs4; extra == "gcs"
164
165
  Requires-Dist: gcsfs; extra == "gcs"
165
166
  Requires-Dist: fsspec; extra == "gcs"
166
- Requires-Dist: bs4; extra == "gcs"
167
- Requires-Dist: pandas; extra == "gcs"
168
167
  Requires-Dist: numpy; extra == "gcs"
168
+ Requires-Dist: pandas; extra == "gcs"
169
169
  Provides-Extra: github
170
- Requires-Dist: pygithub>1.58.0; extra == "github"
171
170
  Requires-Dist: requests; extra == "github"
172
- Requires-Dist: pandas; extra == "github"
171
+ Requires-Dist: pygithub>1.58.0; extra == "github"
173
172
  Requires-Dist: numpy; extra == "github"
173
+ Requires-Dist: pandas; extra == "github"
174
174
  Provides-Extra: gitlab
175
175
  Requires-Dist: python-gitlab; extra == "gitlab"
176
- Requires-Dist: pandas; extra == "gitlab"
177
176
  Requires-Dist: numpy; extra == "gitlab"
177
+ Requires-Dist: pandas; extra == "gitlab"
178
178
  Provides-Extra: google-drive
179
179
  Requires-Dist: google-api-python-client; extra == "google-drive"
180
- Requires-Dist: pandas; extra == "google-drive"
181
180
  Requires-Dist: numpy; extra == "google-drive"
181
+ Requires-Dist: pandas; extra == "google-drive"
182
182
  Provides-Extra: hubspot
183
183
  Requires-Dist: hubspot-api-client; extra == "hubspot"
184
184
  Requires-Dist: urllib3; extra == "hubspot"
185
- Requires-Dist: pandas; extra == "hubspot"
186
185
  Requires-Dist: numpy; extra == "hubspot"
186
+ Requires-Dist: pandas; extra == "hubspot"
187
187
  Provides-Extra: ibm-watsonx-s3
188
188
  Requires-Dist: httpx; extra == "ibm-watsonx-s3"
189
- Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
190
189
  Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
191
190
  Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
192
- Requires-Dist: pandas; extra == "ibm-watsonx-s3"
191
+ Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
193
192
  Requires-Dist: numpy; extra == "ibm-watsonx-s3"
193
+ Requires-Dist: pandas; extra == "ibm-watsonx-s3"
194
194
  Provides-Extra: jira
195
195
  Requires-Dist: atlassian-python-api; extra == "jira"
196
- Requires-Dist: pandas; extra == "jira"
197
196
  Requires-Dist: numpy; extra == "jira"
197
+ Requires-Dist: pandas; extra == "jira"
198
198
  Provides-Extra: kafka
199
199
  Requires-Dist: confluent-kafka; extra == "kafka"
200
- Requires-Dist: pandas; extra == "kafka"
201
200
  Requires-Dist: numpy; extra == "kafka"
201
+ Requires-Dist: pandas; extra == "kafka"
202
202
  Provides-Extra: kdbai
203
203
  Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
204
- Requires-Dist: pandas; extra == "kdbai"
205
204
  Requires-Dist: numpy; extra == "kdbai"
205
+ Requires-Dist: pandas; extra == "kdbai"
206
206
  Provides-Extra: lancedb
207
207
  Requires-Dist: lancedb; extra == "lancedb"
208
- Requires-Dist: pandas; extra == "lancedb"
209
208
  Requires-Dist: numpy; extra == "lancedb"
209
+ Requires-Dist: pandas; extra == "lancedb"
210
210
  Provides-Extra: milvus
211
211
  Requires-Dist: pymilvus; extra == "milvus"
212
- Requires-Dist: pandas; extra == "milvus"
213
212
  Requires-Dist: numpy; extra == "milvus"
213
+ Requires-Dist: pandas; extra == "milvus"
214
214
  Provides-Extra: mongodb
215
215
  Requires-Dist: pymongo; extra == "mongodb"
216
- Requires-Dist: pandas; extra == "mongodb"
217
216
  Requires-Dist: numpy; extra == "mongodb"
217
+ Requires-Dist: pandas; extra == "mongodb"
218
218
  Provides-Extra: neo4j
219
219
  Requires-Dist: networkx; extra == "neo4j"
220
220
  Requires-Dist: cymple; extra == "neo4j"
221
221
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
222
- Requires-Dist: pandas; extra == "neo4j"
223
222
  Requires-Dist: numpy; extra == "neo4j"
223
+ Requires-Dist: pandas; extra == "neo4j"
224
224
  Provides-Extra: notion
225
- Requires-Dist: backoff; extra == "notion"
226
225
  Requires-Dist: httpx; extra == "notion"
227
226
  Requires-Dist: notion-client; extra == "notion"
228
227
  Requires-Dist: htmlBuilder; extra == "notion"
229
- Requires-Dist: pandas; extra == "notion"
228
+ Requires-Dist: backoff; extra == "notion"
230
229
  Requires-Dist: numpy; extra == "notion"
230
+ Requires-Dist: pandas; extra == "notion"
231
231
  Provides-Extra: onedrive
232
- Requires-Dist: msal; extra == "onedrive"
233
232
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
234
233
  Requires-Dist: bs4; extra == "onedrive"
235
- Requires-Dist: pandas; extra == "onedrive"
234
+ Requires-Dist: msal; extra == "onedrive"
236
235
  Requires-Dist: numpy; extra == "onedrive"
236
+ Requires-Dist: pandas; extra == "onedrive"
237
237
  Provides-Extra: opensearch
238
238
  Requires-Dist: opensearch-py; extra == "opensearch"
239
- Requires-Dist: pandas; extra == "opensearch"
240
239
  Requires-Dist: numpy; extra == "opensearch"
240
+ Requires-Dist: pandas; extra == "opensearch"
241
241
  Provides-Extra: outlook
242
- Requires-Dist: msal; extra == "outlook"
243
242
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
244
- Requires-Dist: pandas; extra == "outlook"
243
+ Requires-Dist: msal; extra == "outlook"
245
244
  Requires-Dist: numpy; extra == "outlook"
245
+ Requires-Dist: pandas; extra == "outlook"
246
246
  Provides-Extra: pinecone
247
247
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
248
- Requires-Dist: pandas; extra == "pinecone"
249
248
  Requires-Dist: numpy; extra == "pinecone"
249
+ Requires-Dist: pandas; extra == "pinecone"
250
250
  Provides-Extra: postgres
251
251
  Requires-Dist: psycopg2-binary; extra == "postgres"
252
- Requires-Dist: pandas; extra == "postgres"
253
252
  Requires-Dist: numpy; extra == "postgres"
253
+ Requires-Dist: pandas; extra == "postgres"
254
254
  Provides-Extra: qdrant
255
255
  Requires-Dist: qdrant-client; extra == "qdrant"
256
- Requires-Dist: pandas; extra == "qdrant"
257
256
  Requires-Dist: numpy; extra == "qdrant"
257
+ Requires-Dist: pandas; extra == "qdrant"
258
258
  Provides-Extra: reddit
259
259
  Requires-Dist: praw; extra == "reddit"
260
- Requires-Dist: pandas; extra == "reddit"
261
260
  Requires-Dist: numpy; extra == "reddit"
261
+ Requires-Dist: pandas; extra == "reddit"
262
262
  Provides-Extra: redis
263
263
  Requires-Dist: redis; extra == "redis"
264
- Requires-Dist: pandas; extra == "redis"
265
264
  Requires-Dist: numpy; extra == "redis"
265
+ Requires-Dist: pandas; extra == "redis"
266
266
  Provides-Extra: s3
267
- Requires-Dist: fsspec; extra == "s3"
268
267
  Requires-Dist: s3fs; extra == "s3"
269
- Requires-Dist: pandas; extra == "s3"
268
+ Requires-Dist: fsspec; extra == "s3"
270
269
  Requires-Dist: numpy; extra == "s3"
270
+ Requires-Dist: pandas; extra == "s3"
271
271
  Provides-Extra: sharepoint
272
- Requires-Dist: msal; extra == "sharepoint"
273
272
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
274
- Requires-Dist: pandas; extra == "sharepoint"
273
+ Requires-Dist: msal; extra == "sharepoint"
275
274
  Requires-Dist: numpy; extra == "sharepoint"
275
+ Requires-Dist: pandas; extra == "sharepoint"
276
276
  Provides-Extra: salesforce
277
277
  Requires-Dist: simple-salesforce; extra == "salesforce"
278
- Requires-Dist: pandas; extra == "salesforce"
279
278
  Requires-Dist: numpy; extra == "salesforce"
279
+ Requires-Dist: pandas; extra == "salesforce"
280
280
  Provides-Extra: sftp
281
- Requires-Dist: fsspec; extra == "sftp"
282
281
  Requires-Dist: paramiko; extra == "sftp"
283
- Requires-Dist: pandas; extra == "sftp"
282
+ Requires-Dist: fsspec; extra == "sftp"
284
283
  Requires-Dist: numpy; extra == "sftp"
284
+ Requires-Dist: pandas; extra == "sftp"
285
285
  Provides-Extra: slack
286
286
  Requires-Dist: slack_sdk[optional]; extra == "slack"
287
- Requires-Dist: pandas; extra == "slack"
288
287
  Requires-Dist: numpy; extra == "slack"
288
+ Requires-Dist: pandas; extra == "slack"
289
289
  Provides-Extra: snowflake
290
- Requires-Dist: psycopg2-binary; extra == "snowflake"
291
290
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
292
- Requires-Dist: pandas; extra == "snowflake"
291
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
293
292
  Requires-Dist: numpy; extra == "snowflake"
293
+ Requires-Dist: pandas; extra == "snowflake"
294
294
  Provides-Extra: wikipedia
295
295
  Requires-Dist: wikipedia; extra == "wikipedia"
296
- Requires-Dist: pandas; extra == "wikipedia"
297
296
  Requires-Dist: numpy; extra == "wikipedia"
297
+ Requires-Dist: pandas; extra == "wikipedia"
298
298
  Provides-Extra: weaviate
299
299
  Requires-Dist: weaviate-client; extra == "weaviate"
300
- Requires-Dist: pandas; extra == "weaviate"
301
300
  Requires-Dist: numpy; extra == "weaviate"
301
+ Requires-Dist: pandas; extra == "weaviate"
302
302
  Provides-Extra: databricks-volumes
303
303
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
304
- Requires-Dist: pandas; extra == "databricks-volumes"
305
304
  Requires-Dist: numpy; extra == "databricks-volumes"
305
+ Requires-Dist: pandas; extra == "databricks-volumes"
306
306
  Provides-Extra: databricks-delta-tables
307
307
  Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
308
- Requires-Dist: pandas; extra == "databricks-delta-tables"
309
308
  Requires-Dist: numpy; extra == "databricks-delta-tables"
309
+ Requires-Dist: pandas; extra == "databricks-delta-tables"
310
310
  Provides-Extra: singlestore
311
311
  Requires-Dist: singlestoredb; extra == "singlestore"
312
- Requires-Dist: pandas; extra == "singlestore"
313
312
  Requires-Dist: numpy; extra == "singlestore"
313
+ Requires-Dist: pandas; extra == "singlestore"
314
314
  Provides-Extra: vectara
315
- Requires-Dist: httpx; extra == "vectara"
316
315
  Requires-Dist: requests; extra == "vectara"
317
316
  Requires-Dist: aiofiles; extra == "vectara"
318
- Requires-Dist: pandas; extra == "vectara"
317
+ Requires-Dist: httpx; extra == "vectara"
319
318
  Requires-Dist: numpy; extra == "vectara"
319
+ Requires-Dist: pandas; extra == "vectara"
320
320
  Provides-Extra: vastdb
321
+ Requires-Dist: vastdb; extra == "vastdb"
321
322
  Requires-Dist: ibis; extra == "vastdb"
322
323
  Requires-Dist: pyarrow; extra == "vastdb"
323
- Requires-Dist: vastdb; extra == "vastdb"
324
- Requires-Dist: pandas; extra == "vastdb"
325
324
  Requires-Dist: numpy; extra == "vastdb"
325
+ Requires-Dist: pandas; extra == "vastdb"
326
326
  Provides-Extra: zendesk
327
327
  Requires-Dist: httpx; extra == "zendesk"
328
328
  Requires-Dist: aiofiles; extra == "zendesk"
329
329
  Requires-Dist: bs4; extra == "zendesk"
330
- Requires-Dist: pandas; extra == "zendesk"
331
330
  Requires-Dist: numpy; extra == "zendesk"
331
+ Requires-Dist: pandas; extra == "zendesk"
332
332
  Provides-Extra: embed-huggingface
333
333
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
334
- Requires-Dist: pandas; extra == "embed-huggingface"
335
334
  Requires-Dist: numpy; extra == "embed-huggingface"
335
+ Requires-Dist: pandas; extra == "embed-huggingface"
336
336
  Provides-Extra: embed-octoai
337
337
  Requires-Dist: openai; extra == "embed-octoai"
338
338
  Requires-Dist: tiktoken; extra == "embed-octoai"
339
- Requires-Dist: pandas; extra == "embed-octoai"
340
339
  Requires-Dist: numpy; extra == "embed-octoai"
340
+ Requires-Dist: pandas; extra == "embed-octoai"
341
341
  Provides-Extra: embed-vertexai
342
342
  Requires-Dist: vertexai; extra == "embed-vertexai"
343
- Requires-Dist: pandas; extra == "embed-vertexai"
344
343
  Requires-Dist: numpy; extra == "embed-vertexai"
344
+ Requires-Dist: pandas; extra == "embed-vertexai"
345
345
  Provides-Extra: embed-voyageai
346
346
  Requires-Dist: voyageai; extra == "embed-voyageai"
347
- Requires-Dist: pandas; extra == "embed-voyageai"
348
347
  Requires-Dist: numpy; extra == "embed-voyageai"
348
+ Requires-Dist: pandas; extra == "embed-voyageai"
349
349
  Provides-Extra: embed-mixedbreadai
350
350
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
351
- Requires-Dist: pandas; extra == "embed-mixedbreadai"
352
351
  Requires-Dist: numpy; extra == "embed-mixedbreadai"
352
+ Requires-Dist: pandas; extra == "embed-mixedbreadai"
353
353
  Provides-Extra: openai
354
354
  Requires-Dist: openai; extra == "openai"
355
355
  Requires-Dist: tiktoken; extra == "openai"
356
- Requires-Dist: pandas; extra == "openai"
357
356
  Requires-Dist: numpy; extra == "openai"
357
+ Requires-Dist: pandas; extra == "openai"
358
358
  Provides-Extra: bedrock
359
- Requires-Dist: aioboto3; extra == "bedrock"
360
359
  Requires-Dist: boto3; extra == "bedrock"
361
- Requires-Dist: pandas; extra == "bedrock"
360
+ Requires-Dist: aioboto3; extra == "bedrock"
362
361
  Requires-Dist: numpy; extra == "bedrock"
362
+ Requires-Dist: pandas; extra == "bedrock"
363
363
  Provides-Extra: togetherai
364
364
  Requires-Dist: together; extra == "togetherai"
365
- Requires-Dist: pandas; extra == "togetherai"
366
365
  Requires-Dist: numpy; extra == "togetherai"
366
+ Requires-Dist: pandas; extra == "togetherai"
367
367
  Dynamic: author
368
368
  Dynamic: author-email
369
369
  Dynamic: classifier
@@ -113,7 +113,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
113
113
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
115
115
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
116
- unstructured_ingest/__version__.py,sha256=i3sYvJ7iKJXkLBzcGwrQbcRp0S3NVhrYt6MAMPjU-ss,42
116
+ unstructured_ingest/__version__.py,sha256=vYkj5wI9darc7y1Fll8uAtxzlI0lqsa5gGerwBBkeIQ,42
117
117
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
118
118
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
119
119
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -421,7 +421,7 @@ unstructured_ingest/v2/pipeline/steps/stage.py,sha256=_0BN2i273y_fZyvSUPOOeXv4kL
421
421
  unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=I9TyqMCUSxlf2kdPADjeH4TrUTSe0FMTlARp9QD6TsE,1763
422
422
  unstructured_ingest/v2/pipeline/steps/upload.py,sha256=6x8SUdnydR76K6cR3nUVupOACIx-XsRV3vXRlebolqg,1996
423
423
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
424
- unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
424
+ unstructured_ingest/v2/processes/chunker.py,sha256=O5FN8KWym79H0dtKZvW7ABgn4bwKtaeUO8meGdjM2Yo,5609
425
425
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
426
426
  unstructured_ingest/v2/processes/embedder.py,sha256=gvlCQDsbQVgcp-2f0Qq4RiFbcr8gJwIS-imgloE-UOc,7887
427
427
  unstructured_ingest/v2/processes/filter.py,sha256=E1MLxk-XeCm3mZIuM49lJToVcSgOivmTFIZApqOEFs8,2150
@@ -582,9 +582,9 @@ unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
582
582
  unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=_I3OMdpUElQdIwVs7W9ORU1kncNaZ_nr6lbxeKE8uaU,1014
583
583
  unstructured_ingest/v2/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
584
584
  unstructured_ingest/v2/types/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
585
- unstructured_ingest-0.6.0.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
586
- unstructured_ingest-0.6.0.dist-info/METADATA,sha256=NWCm69UvgcHT7J8owHmQCYkXmdfkfyCJNcW6slNzANA,14998
587
- unstructured_ingest-0.6.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
588
- unstructured_ingest-0.6.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
589
- unstructured_ingest-0.6.0.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
590
- unstructured_ingest-0.6.0.dist-info/RECORD,,
585
+ unstructured_ingest-0.6.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
586
+ unstructured_ingest-0.6.1.dist-info/METADATA,sha256=Babhsu1h1L0nvRFeImk9Jn-jPjnaW-jdz6mhB3jkmbI,14998
587
+ unstructured_ingest-0.6.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
588
+ unstructured_ingest-0.6.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
589
+ unstructured_ingest-0.6.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
590
+ unstructured_ingest-0.6.1.dist-info/RECORD,,