unstructured-ingest 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.0.12" # pragma: no cover
1
+ __version__ = "0.0.13" # pragma: no cover
@@ -15,14 +15,19 @@ def batch_generator(iterable, batch_size=100):
15
15
  chunk = tuple(itertools.islice(it, batch_size))
16
16
 
17
17
 
18
- def generator_batching_wbytes(iterable, batch_size_limit_bytes=15_000_000):
18
+ def generator_batching_wbytes(
19
+ iterable, batch_size_limit_bytes=15_000_000, max_batch_size: int = 1000
20
+ ):
19
21
  """A helper function to break an iterable into chunks of specified bytes."""
20
22
  current_batch, current_batch_size = [], 0
21
23
 
22
24
  for item in iterable:
23
25
  item_size_bytes = len(json.dumps(item).encode("utf-8"))
24
26
 
25
- if current_batch_size + item_size_bytes <= batch_size_limit_bytes:
27
+ if (
28
+ current_batch_size + item_size_bytes <= batch_size_limit_bytes
29
+ or len(current_batch) == 0 # prevent inifite yielding of empty batch
30
+ ) and len(current_batch) < max_batch_size:
26
31
  current_batch.append(item)
27
32
  current_batch_size += item_size_bytes
28
33
  else:
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
7
7
  from pydantic import Field, Secret
8
8
 
9
9
  from unstructured_ingest.error import DestinationConnectionError
10
- from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
10
+ from unstructured_ingest.utils.data_prep import flatten_dict, generator_batching_wbytes
11
11
  from unstructured_ingest.utils.dep_check import requires_dependencies
12
12
  from unstructured_ingest.v2.interfaces import (
13
13
  AccessConfig,
@@ -19,15 +19,14 @@ from unstructured_ingest.v2.interfaces import (
19
19
  UploadStagerConfig,
20
20
  )
21
21
  from unstructured_ingest.v2.logger import logger
22
- from unstructured_ingest.v2.processes.connector_registry import (
23
- DestinationRegistryEntry,
24
- )
22
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
25
23
 
26
24
  if TYPE_CHECKING:
27
25
  from pinecone import Index as PineconeIndex
28
26
 
29
27
 
30
28
  CONNECTOR_TYPE = "pinecone"
29
+ MAX_PAYLOAD_SIZE = 2 * 1024 * 1024 # 2MB
31
30
 
32
31
 
33
32
  class PineconeAccessConfig(AccessConfig):
@@ -69,6 +68,23 @@ class PineconeUploaderConfig(UploaderConfig):
69
68
  batch_size: int = Field(default=100, description="Number of records per batch")
70
69
 
71
70
 
71
+ ALLOWED_FIELDS = (
72
+ "element_id",
73
+ "text",
74
+ "parent_id",
75
+ "category_depth",
76
+ "emphasized_text_tags",
77
+ "emphasized_text_contents",
78
+ "coordinates",
79
+ "last_modified",
80
+ "page_number",
81
+ "filename",
82
+ "is_continuation",
83
+ "link_urls",
84
+ "link_texts",
85
+ )
86
+
87
+
72
88
  @dataclass
73
89
  class PineconeUploadStager(UploadStager):
74
90
  upload_stager_config: PineconeUploadStagerConfig = field(
@@ -77,22 +93,24 @@ class PineconeUploadStager(UploadStager):
77
93
 
78
94
  @staticmethod
79
95
  def conform_dict(element_dict: dict) -> dict:
80
- # While flatten_dict enables indexing on various fields,
81
- # element_serialized enables easily reloading the element object to memory.
82
- # element_serialized is formed without text/embeddings to avoid data bloating.
96
+ embeddings = element_dict.pop("embeddings", None)
97
+ metadata: dict[str, Any] = element_dict.pop("metadata", {})
98
+ data_source = metadata.pop("data_source", {})
99
+ coordinates = metadata.pop("coordinates", {})
100
+
101
+ element_dict.update(metadata)
102
+ element_dict.update(data_source)
103
+ element_dict.update(coordinates)
104
+
83
105
  return {
84
106
  "id": str(uuid.uuid4()),
85
- "values": element_dict.pop("embeddings", None),
86
- "metadata": {
87
- "text": element_dict.pop("text", None),
88
- "element_serialized": json.dumps(element_dict),
89
- **flatten_dict(
90
- element_dict,
91
- separator="-",
92
- flatten_lists=True,
93
- remove_none=True,
94
- ),
95
- },
107
+ "values": embeddings,
108
+ "metadata": flatten_dict(
109
+ {k: v for k, v in element_dict.items() if k in ALLOWED_FIELDS},
110
+ separator="-",
111
+ flatten_lists=True,
112
+ remove_none=True,
113
+ ),
96
114
  }
97
115
 
98
116
  def run(
@@ -150,9 +168,10 @@ class PineconeUploader(Uploader):
150
168
  f" with batch size {self.upload_config.batch_size}"
151
169
  )
152
170
 
153
- pinecone_batch_size = self.upload_config.batch_size
154
- for pinecone_batch in batch_generator(elements_dict, pinecone_batch_size):
155
- self.upsert_batch(batch=pinecone_batch)
171
+ for batch in generator_batching_wbytes(
172
+ elements_dict, MAX_PAYLOAD_SIZE - 100, self.upload_config.batch_size
173
+ ):
174
+ self.upsert_batch(batch=batch)
156
175
 
157
176
 
158
177
  pinecone_destination_entry = DestinationRegistryEntry(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.12
3
+ Version: 0.0.13
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,616 +22,616 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: tqdm
26
- Requires-Dist: pydantic
27
- Requires-Dist: pandas
28
25
  Requires-Dist: opentelemetry-sdk
26
+ Requires-Dist: pandas
29
27
  Requires-Dist: python-dateutil
28
+ Requires-Dist: tqdm
30
29
  Requires-Dist: click
31
30
  Requires-Dist: dataclasses-json
31
+ Requires-Dist: pydantic
32
32
  Provides-Extra: airtable
33
- Requires-Dist: tqdm; extra == "airtable"
34
- Requires-Dist: pydantic; extra == "airtable"
33
+ Requires-Dist: pyairtable; extra == "airtable"
35
34
  Requires-Dist: opentelemetry-sdk; extra == "airtable"
36
35
  Requires-Dist: pandas; extra == "airtable"
37
36
  Requires-Dist: python-dateutil; extra == "airtable"
37
+ Requires-Dist: tqdm; extra == "airtable"
38
38
  Requires-Dist: click; extra == "airtable"
39
- Requires-Dist: pyairtable; extra == "airtable"
40
39
  Requires-Dist: dataclasses-json; extra == "airtable"
40
+ Requires-Dist: pydantic; extra == "airtable"
41
41
  Provides-Extra: astradb
42
- Requires-Dist: tqdm; extra == "astradb"
43
42
  Requires-Dist: astrapy; extra == "astradb"
44
- Requires-Dist: pydantic; extra == "astradb"
45
43
  Requires-Dist: opentelemetry-sdk; extra == "astradb"
46
44
  Requires-Dist: pandas; extra == "astradb"
47
45
  Requires-Dist: python-dateutil; extra == "astradb"
46
+ Requires-Dist: tqdm; extra == "astradb"
48
47
  Requires-Dist: click; extra == "astradb"
49
48
  Requires-Dist: dataclasses-json; extra == "astradb"
49
+ Requires-Dist: pydantic; extra == "astradb"
50
50
  Provides-Extra: azure
51
+ Requires-Dist: adlfs; extra == "azure"
51
52
  Requires-Dist: fsspec; extra == "azure"
52
- Requires-Dist: tqdm; extra == "azure"
53
- Requires-Dist: pydantic; extra == "azure"
54
- Requires-Dist: pandas; extra == "azure"
55
53
  Requires-Dist: opentelemetry-sdk; extra == "azure"
54
+ Requires-Dist: pandas; extra == "azure"
56
55
  Requires-Dist: python-dateutil; extra == "azure"
57
- Requires-Dist: adlfs; extra == "azure"
56
+ Requires-Dist: tqdm; extra == "azure"
58
57
  Requires-Dist: click; extra == "azure"
59
58
  Requires-Dist: dataclasses-json; extra == "azure"
59
+ Requires-Dist: pydantic; extra == "azure"
60
60
  Provides-Extra: azure-cognitive-search
61
- Requires-Dist: tqdm; extra == "azure-cognitive-search"
62
- Requires-Dist: pydantic; extra == "azure-cognitive-search"
63
61
  Requires-Dist: opentelemetry-sdk; extra == "azure-cognitive-search"
64
62
  Requires-Dist: pandas; extra == "azure-cognitive-search"
65
63
  Requires-Dist: python-dateutil; extra == "azure-cognitive-search"
64
+ Requires-Dist: tqdm; extra == "azure-cognitive-search"
66
65
  Requires-Dist: click; extra == "azure-cognitive-search"
67
66
  Requires-Dist: dataclasses-json; extra == "azure-cognitive-search"
68
67
  Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
68
+ Requires-Dist: pydantic; extra == "azure-cognitive-search"
69
69
  Provides-Extra: bedrock
70
- Requires-Dist: tqdm; extra == "bedrock"
71
- Requires-Dist: unstructured; extra == "bedrock"
72
- Requires-Dist: pydantic; extra == "bedrock"
73
- Requires-Dist: pandas; extra == "bedrock"
74
70
  Requires-Dist: opentelemetry-sdk; extra == "bedrock"
71
+ Requires-Dist: pandas; extra == "bedrock"
75
72
  Requires-Dist: python-dateutil; extra == "bedrock"
76
- Requires-Dist: langchain-community; extra == "bedrock"
73
+ Requires-Dist: boto3; extra == "bedrock"
74
+ Requires-Dist: tqdm; extra == "bedrock"
77
75
  Requires-Dist: click; extra == "bedrock"
78
76
  Requires-Dist: dataclasses-json; extra == "bedrock"
79
- Requires-Dist: boto3; extra == "bedrock"
77
+ Requires-Dist: unstructured; extra == "bedrock"
78
+ Requires-Dist: langchain-community; extra == "bedrock"
79
+ Requires-Dist: pydantic; extra == "bedrock"
80
80
  Provides-Extra: biomed
81
- Requires-Dist: tqdm; extra == "biomed"
82
- Requires-Dist: pydantic; extra == "biomed"
83
- Requires-Dist: pandas; extra == "biomed"
81
+ Requires-Dist: bs4; extra == "biomed"
84
82
  Requires-Dist: opentelemetry-sdk; extra == "biomed"
83
+ Requires-Dist: requests; extra == "biomed"
84
+ Requires-Dist: pandas; extra == "biomed"
85
85
  Requires-Dist: python-dateutil; extra == "biomed"
86
- Requires-Dist: bs4; extra == "biomed"
86
+ Requires-Dist: tqdm; extra == "biomed"
87
87
  Requires-Dist: click; extra == "biomed"
88
88
  Requires-Dist: dataclasses-json; extra == "biomed"
89
- Requires-Dist: requests; extra == "biomed"
89
+ Requires-Dist: pydantic; extra == "biomed"
90
90
  Provides-Extra: box
91
91
  Requires-Dist: fsspec; extra == "box"
92
- Requires-Dist: tqdm; extra == "box"
93
- Requires-Dist: pydantic; extra == "box"
94
- Requires-Dist: pandas; extra == "box"
92
+ Requires-Dist: boxfs; extra == "box"
95
93
  Requires-Dist: opentelemetry-sdk; extra == "box"
94
+ Requires-Dist: pandas; extra == "box"
96
95
  Requires-Dist: python-dateutil; extra == "box"
96
+ Requires-Dist: tqdm; extra == "box"
97
97
  Requires-Dist: click; extra == "box"
98
- Requires-Dist: boxfs; extra == "box"
99
98
  Requires-Dist: dataclasses-json; extra == "box"
99
+ Requires-Dist: pydantic; extra == "box"
100
100
  Provides-Extra: chroma
101
- Requires-Dist: importlib-metadata>=7.1.0; extra == "chroma"
102
- Requires-Dist: tqdm; extra == "chroma"
103
- Requires-Dist: pydantic; extra == "chroma"
104
- Requires-Dist: pandas; extra == "chroma"
101
+ Requires-Dist: typer<=0.9.0; extra == "chroma"
105
102
  Requires-Dist: opentelemetry-sdk; extra == "chroma"
103
+ Requires-Dist: pandas; extra == "chroma"
104
+ Requires-Dist: chromadb; extra == "chroma"
106
105
  Requires-Dist: python-dateutil; extra == "chroma"
106
+ Requires-Dist: tqdm; extra == "chroma"
107
107
  Requires-Dist: click; extra == "chroma"
108
+ Requires-Dist: importlib-metadata>=7.1.0; extra == "chroma"
108
109
  Requires-Dist: dataclasses-json; extra == "chroma"
109
- Requires-Dist: chromadb; extra == "chroma"
110
- Requires-Dist: typer<=0.9.0; extra == "chroma"
110
+ Requires-Dist: pydantic; extra == "chroma"
111
111
  Provides-Extra: clarifai
112
- Requires-Dist: tqdm; extra == "clarifai"
113
- Requires-Dist: pydantic; extra == "clarifai"
112
+ Requires-Dist: clarifai; extra == "clarifai"
114
113
  Requires-Dist: opentelemetry-sdk; extra == "clarifai"
115
114
  Requires-Dist: pandas; extra == "clarifai"
116
115
  Requires-Dist: python-dateutil; extra == "clarifai"
116
+ Requires-Dist: tqdm; extra == "clarifai"
117
117
  Requires-Dist: click; extra == "clarifai"
118
- Requires-Dist: clarifai; extra == "clarifai"
119
118
  Requires-Dist: dataclasses-json; extra == "clarifai"
119
+ Requires-Dist: pydantic; extra == "clarifai"
120
120
  Provides-Extra: confluence
121
- Requires-Dist: tqdm; extra == "confluence"
122
- Requires-Dist: pydantic; extra == "confluence"
123
- Requires-Dist: pandas; extra == "confluence"
124
121
  Requires-Dist: opentelemetry-sdk; extra == "confluence"
122
+ Requires-Dist: requests; extra == "confluence"
123
+ Requires-Dist: pandas; extra == "confluence"
125
124
  Requires-Dist: python-dateutil; extra == "confluence"
126
- Requires-Dist: click; extra == "confluence"
127
125
  Requires-Dist: atlassian-python-api; extra == "confluence"
126
+ Requires-Dist: tqdm; extra == "confluence"
127
+ Requires-Dist: click; extra == "confluence"
128
128
  Requires-Dist: dataclasses-json; extra == "confluence"
129
- Requires-Dist: requests; extra == "confluence"
129
+ Requires-Dist: pydantic; extra == "confluence"
130
130
  Provides-Extra: couchbase
131
- Requires-Dist: tqdm; extra == "couchbase"
132
- Requires-Dist: pydantic; extra == "couchbase"
131
+ Requires-Dist: couchbase; extra == "couchbase"
133
132
  Requires-Dist: opentelemetry-sdk; extra == "couchbase"
134
133
  Requires-Dist: pandas; extra == "couchbase"
135
134
  Requires-Dist: python-dateutil; extra == "couchbase"
135
+ Requires-Dist: tqdm; extra == "couchbase"
136
136
  Requires-Dist: click; extra == "couchbase"
137
- Requires-Dist: couchbase; extra == "couchbase"
138
137
  Requires-Dist: dataclasses-json; extra == "couchbase"
138
+ Requires-Dist: pydantic; extra == "couchbase"
139
139
  Provides-Extra: csv
140
- Requires-Dist: unstructured[tsv]; extra == "csv"
141
- Requires-Dist: tqdm; extra == "csv"
142
- Requires-Dist: pydantic; extra == "csv"
143
140
  Requires-Dist: opentelemetry-sdk; extra == "csv"
144
141
  Requires-Dist: pandas; extra == "csv"
145
142
  Requires-Dist: python-dateutil; extra == "csv"
143
+ Requires-Dist: tqdm; extra == "csv"
144
+ Requires-Dist: unstructured[tsv]; extra == "csv"
146
145
  Requires-Dist: click; extra == "csv"
147
146
  Requires-Dist: dataclasses-json; extra == "csv"
147
+ Requires-Dist: pydantic; extra == "csv"
148
148
  Provides-Extra: databricks-volumes
149
- Requires-Dist: tqdm; extra == "databricks-volumes"
150
- Requires-Dist: pydantic; extra == "databricks-volumes"
151
149
  Requires-Dist: opentelemetry-sdk; extra == "databricks-volumes"
152
150
  Requires-Dist: pandas; extra == "databricks-volumes"
153
- Requires-Dist: databricks-sdk; extra == "databricks-volumes"
154
151
  Requires-Dist: python-dateutil; extra == "databricks-volumes"
152
+ Requires-Dist: tqdm; extra == "databricks-volumes"
155
153
  Requires-Dist: click; extra == "databricks-volumes"
154
+ Requires-Dist: databricks-sdk; extra == "databricks-volumes"
156
155
  Requires-Dist: dataclasses-json; extra == "databricks-volumes"
156
+ Requires-Dist: pydantic; extra == "databricks-volumes"
157
157
  Provides-Extra: delta-table
158
158
  Requires-Dist: fsspec; extra == "delta-table"
159
- Requires-Dist: tqdm; extra == "delta-table"
160
- Requires-Dist: pydantic; extra == "delta-table"
161
- Requires-Dist: pandas; extra == "delta-table"
162
159
  Requires-Dist: opentelemetry-sdk; extra == "delta-table"
160
+ Requires-Dist: pandas; extra == "delta-table"
163
161
  Requires-Dist: python-dateutil; extra == "delta-table"
162
+ Requires-Dist: tqdm; extra == "delta-table"
164
163
  Requires-Dist: click; extra == "delta-table"
165
164
  Requires-Dist: deltalake; extra == "delta-table"
166
165
  Requires-Dist: dataclasses-json; extra == "delta-table"
166
+ Requires-Dist: pydantic; extra == "delta-table"
167
167
  Provides-Extra: discord
168
- Requires-Dist: discord-py; extra == "discord"
169
- Requires-Dist: tqdm; extra == "discord"
170
- Requires-Dist: pydantic; extra == "discord"
171
168
  Requires-Dist: opentelemetry-sdk; extra == "discord"
172
169
  Requires-Dist: pandas; extra == "discord"
173
170
  Requires-Dist: python-dateutil; extra == "discord"
171
+ Requires-Dist: tqdm; extra == "discord"
174
172
  Requires-Dist: click; extra == "discord"
173
+ Requires-Dist: discord-py; extra == "discord"
175
174
  Requires-Dist: dataclasses-json; extra == "discord"
175
+ Requires-Dist: pydantic; extra == "discord"
176
176
  Provides-Extra: doc
177
- Requires-Dist: tqdm; extra == "doc"
178
- Requires-Dist: pydantic; extra == "doc"
177
+ Requires-Dist: unstructured[docx]; extra == "doc"
179
178
  Requires-Dist: opentelemetry-sdk; extra == "doc"
180
179
  Requires-Dist: pandas; extra == "doc"
181
180
  Requires-Dist: python-dateutil; extra == "doc"
181
+ Requires-Dist: tqdm; extra == "doc"
182
182
  Requires-Dist: click; extra == "doc"
183
- Requires-Dist: unstructured[docx]; extra == "doc"
184
183
  Requires-Dist: dataclasses-json; extra == "doc"
184
+ Requires-Dist: pydantic; extra == "doc"
185
185
  Provides-Extra: docx
186
- Requires-Dist: tqdm; extra == "docx"
187
- Requires-Dist: pydantic; extra == "docx"
186
+ Requires-Dist: unstructured[docx]; extra == "docx"
188
187
  Requires-Dist: opentelemetry-sdk; extra == "docx"
189
188
  Requires-Dist: pandas; extra == "docx"
190
189
  Requires-Dist: python-dateutil; extra == "docx"
190
+ Requires-Dist: tqdm; extra == "docx"
191
191
  Requires-Dist: click; extra == "docx"
192
- Requires-Dist: unstructured[docx]; extra == "docx"
193
192
  Requires-Dist: dataclasses-json; extra == "docx"
193
+ Requires-Dist: pydantic; extra == "docx"
194
194
  Provides-Extra: dropbox
195
195
  Requires-Dist: fsspec; extra == "dropbox"
196
- Requires-Dist: tqdm; extra == "dropbox"
197
- Requires-Dist: pydantic; extra == "dropbox"
198
- Requires-Dist: pandas; extra == "dropbox"
199
196
  Requires-Dist: opentelemetry-sdk; extra == "dropbox"
197
+ Requires-Dist: pandas; extra == "dropbox"
200
198
  Requires-Dist: python-dateutil; extra == "dropbox"
199
+ Requires-Dist: tqdm; extra == "dropbox"
201
200
  Requires-Dist: click; extra == "dropbox"
202
201
  Requires-Dist: dataclasses-json; extra == "dropbox"
203
202
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
203
+ Requires-Dist: pydantic; extra == "dropbox"
204
204
  Provides-Extra: elasticsearch
205
- Requires-Dist: tqdm; extra == "elasticsearch"
206
205
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
207
- Requires-Dist: pydantic; extra == "elasticsearch"
208
206
  Requires-Dist: opentelemetry-sdk; extra == "elasticsearch"
209
207
  Requires-Dist: pandas; extra == "elasticsearch"
210
208
  Requires-Dist: python-dateutil; extra == "elasticsearch"
209
+ Requires-Dist: tqdm; extra == "elasticsearch"
211
210
  Requires-Dist: click; extra == "elasticsearch"
212
211
  Requires-Dist: dataclasses-json; extra == "elasticsearch"
212
+ Requires-Dist: pydantic; extra == "elasticsearch"
213
213
  Provides-Extra: embed-huggingface
214
- Requires-Dist: tqdm; extra == "embed-huggingface"
215
- Requires-Dist: unstructured; extra == "embed-huggingface"
216
- Requires-Dist: pydantic; extra == "embed-huggingface"
217
214
  Requires-Dist: opentelemetry-sdk; extra == "embed-huggingface"
218
215
  Requires-Dist: pandas; extra == "embed-huggingface"
219
216
  Requires-Dist: python-dateutil; extra == "embed-huggingface"
217
+ Requires-Dist: tqdm; extra == "embed-huggingface"
220
218
  Requires-Dist: click; extra == "embed-huggingface"
221
- Requires-Dist: langchain-huggingface; extra == "embed-huggingface"
222
219
  Requires-Dist: dataclasses-json; extra == "embed-huggingface"
220
+ Requires-Dist: unstructured; extra == "embed-huggingface"
221
+ Requires-Dist: langchain-huggingface; extra == "embed-huggingface"
222
+ Requires-Dist: pydantic; extra == "embed-huggingface"
223
223
  Provides-Extra: embed-octoai
224
- Requires-Dist: tqdm; extra == "embed-octoai"
225
- Requires-Dist: tiktoken; extra == "embed-octoai"
226
- Requires-Dist: pydantic; extra == "embed-octoai"
227
- Requires-Dist: openai; extra == "embed-octoai"
228
- Requires-Dist: pandas; extra == "embed-octoai"
229
224
  Requires-Dist: opentelemetry-sdk; extra == "embed-octoai"
225
+ Requires-Dist: pandas; extra == "embed-octoai"
226
+ Requires-Dist: tiktoken; extra == "embed-octoai"
230
227
  Requires-Dist: python-dateutil; extra == "embed-octoai"
231
- Requires-Dist: unstructured; extra == "embed-octoai"
228
+ Requires-Dist: tqdm; extra == "embed-octoai"
232
229
  Requires-Dist: click; extra == "embed-octoai"
233
230
  Requires-Dist: dataclasses-json; extra == "embed-octoai"
231
+ Requires-Dist: unstructured; extra == "embed-octoai"
232
+ Requires-Dist: openai; extra == "embed-octoai"
233
+ Requires-Dist: pydantic; extra == "embed-octoai"
234
234
  Provides-Extra: embed-vertexai
235
- Requires-Dist: langchain-google-vertexai; extra == "embed-vertexai"
236
- Requires-Dist: tqdm; extra == "embed-vertexai"
237
- Requires-Dist: unstructured; extra == "embed-vertexai"
238
- Requires-Dist: pydantic; extra == "embed-vertexai"
239
- Requires-Dist: pandas; extra == "embed-vertexai"
235
+ Requires-Dist: langchain; extra == "embed-vertexai"
240
236
  Requires-Dist: opentelemetry-sdk; extra == "embed-vertexai"
237
+ Requires-Dist: pandas; extra == "embed-vertexai"
238
+ Requires-Dist: langchain-google-vertexai; extra == "embed-vertexai"
241
239
  Requires-Dist: python-dateutil; extra == "embed-vertexai"
242
- Requires-Dist: langchain-community; extra == "embed-vertexai"
240
+ Requires-Dist: tqdm; extra == "embed-vertexai"
243
241
  Requires-Dist: click; extra == "embed-vertexai"
244
- Requires-Dist: langchain; extra == "embed-vertexai"
245
242
  Requires-Dist: dataclasses-json; extra == "embed-vertexai"
243
+ Requires-Dist: unstructured; extra == "embed-vertexai"
244
+ Requires-Dist: langchain-community; extra == "embed-vertexai"
245
+ Requires-Dist: pydantic; extra == "embed-vertexai"
246
246
  Provides-Extra: embed-voyageai
247
- Requires-Dist: tqdm; extra == "embed-voyageai"
248
- Requires-Dist: unstructured; extra == "embed-voyageai"
249
- Requires-Dist: pydantic; extra == "embed-voyageai"
250
- Requires-Dist: pandas; extra == "embed-voyageai"
251
- Requires-Dist: langchain-voyageai; extra == "embed-voyageai"
247
+ Requires-Dist: langchain; extra == "embed-voyageai"
252
248
  Requires-Dist: opentelemetry-sdk; extra == "embed-voyageai"
249
+ Requires-Dist: pandas; extra == "embed-voyageai"
253
250
  Requires-Dist: python-dateutil; extra == "embed-voyageai"
251
+ Requires-Dist: tqdm; extra == "embed-voyageai"
254
252
  Requires-Dist: click; extra == "embed-voyageai"
255
- Requires-Dist: langchain; extra == "embed-voyageai"
253
+ Requires-Dist: langchain-voyageai; extra == "embed-voyageai"
256
254
  Requires-Dist: dataclasses-json; extra == "embed-voyageai"
255
+ Requires-Dist: unstructured; extra == "embed-voyageai"
256
+ Requires-Dist: pydantic; extra == "embed-voyageai"
257
257
  Provides-Extra: epub
258
- Requires-Dist: tqdm; extra == "epub"
259
- Requires-Dist: pydantic; extra == "epub"
260
258
  Requires-Dist: opentelemetry-sdk; extra == "epub"
261
259
  Requires-Dist: pandas; extra == "epub"
262
260
  Requires-Dist: python-dateutil; extra == "epub"
261
+ Requires-Dist: tqdm; extra == "epub"
263
262
  Requires-Dist: click; extra == "epub"
264
263
  Requires-Dist: unstructured[epub]; extra == "epub"
265
264
  Requires-Dist: dataclasses-json; extra == "epub"
265
+ Requires-Dist: pydantic; extra == "epub"
266
266
  Provides-Extra: gcs
267
267
  Requires-Dist: fsspec; extra == "gcs"
268
- Requires-Dist: tqdm; extra == "gcs"
269
- Requires-Dist: gcsfs; extra == "gcs"
270
- Requires-Dist: pydantic; extra == "gcs"
271
- Requires-Dist: pandas; extra == "gcs"
268
+ Requires-Dist: bs4; extra == "gcs"
272
269
  Requires-Dist: opentelemetry-sdk; extra == "gcs"
270
+ Requires-Dist: pandas; extra == "gcs"
273
271
  Requires-Dist: python-dateutil; extra == "gcs"
274
- Requires-Dist: bs4; extra == "gcs"
272
+ Requires-Dist: tqdm; extra == "gcs"
275
273
  Requires-Dist: click; extra == "gcs"
276
274
  Requires-Dist: dataclasses-json; extra == "gcs"
275
+ Requires-Dist: gcsfs; extra == "gcs"
276
+ Requires-Dist: pydantic; extra == "gcs"
277
277
  Provides-Extra: github
278
- Requires-Dist: tqdm; extra == "github"
279
278
  Requires-Dist: pygithub>1.58.0; extra == "github"
280
- Requires-Dist: pydantic; extra == "github"
281
- Requires-Dist: pandas; extra == "github"
282
279
  Requires-Dist: opentelemetry-sdk; extra == "github"
280
+ Requires-Dist: requests; extra == "github"
281
+ Requires-Dist: pandas; extra == "github"
283
282
  Requires-Dist: python-dateutil; extra == "github"
283
+ Requires-Dist: tqdm; extra == "github"
284
284
  Requires-Dist: click; extra == "github"
285
285
  Requires-Dist: dataclasses-json; extra == "github"
286
- Requires-Dist: requests; extra == "github"
286
+ Requires-Dist: pydantic; extra == "github"
287
287
  Provides-Extra: gitlab
288
- Requires-Dist: tqdm; extra == "gitlab"
289
- Requires-Dist: python-gitlab; extra == "gitlab"
290
- Requires-Dist: pydantic; extra == "gitlab"
291
- Requires-Dist: pandas; extra == "gitlab"
292
288
  Requires-Dist: opentelemetry-sdk; extra == "gitlab"
289
+ Requires-Dist: pandas; extra == "gitlab"
293
290
  Requires-Dist: python-dateutil; extra == "gitlab"
291
+ Requires-Dist: tqdm; extra == "gitlab"
294
292
  Requires-Dist: click; extra == "gitlab"
295
293
  Requires-Dist: dataclasses-json; extra == "gitlab"
294
+ Requires-Dist: python-gitlab; extra == "gitlab"
295
+ Requires-Dist: pydantic; extra == "gitlab"
296
296
  Provides-Extra: google-drive
297
- Requires-Dist: tqdm; extra == "google-drive"
298
- Requires-Dist: google-api-python-client; extra == "google-drive"
299
- Requires-Dist: pydantic; extra == "google-drive"
300
297
  Requires-Dist: opentelemetry-sdk; extra == "google-drive"
301
298
  Requires-Dist: pandas; extra == "google-drive"
302
299
  Requires-Dist: python-dateutil; extra == "google-drive"
300
+ Requires-Dist: tqdm; extra == "google-drive"
301
+ Requires-Dist: google-api-python-client; extra == "google-drive"
303
302
  Requires-Dist: click; extra == "google-drive"
304
303
  Requires-Dist: dataclasses-json; extra == "google-drive"
304
+ Requires-Dist: pydantic; extra == "google-drive"
305
305
  Provides-Extra: hubspot
306
- Requires-Dist: hubspot-api-client; extra == "hubspot"
307
- Requires-Dist: tqdm; extra == "hubspot"
308
- Requires-Dist: urllib3; extra == "hubspot"
309
- Requires-Dist: pydantic; extra == "hubspot"
310
- Requires-Dist: pandas; extra == "hubspot"
311
306
  Requires-Dist: opentelemetry-sdk; extra == "hubspot"
307
+ Requires-Dist: pandas; extra == "hubspot"
312
308
  Requires-Dist: python-dateutil; extra == "hubspot"
309
+ Requires-Dist: urllib3; extra == "hubspot"
310
+ Requires-Dist: tqdm; extra == "hubspot"
313
311
  Requires-Dist: click; extra == "hubspot"
312
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
314
313
  Requires-Dist: dataclasses-json; extra == "hubspot"
314
+ Requires-Dist: pydantic; extra == "hubspot"
315
315
  Provides-Extra: jira
316
- Requires-Dist: tqdm; extra == "jira"
317
- Requires-Dist: pydantic; extra == "jira"
318
316
  Requires-Dist: opentelemetry-sdk; extra == "jira"
319
317
  Requires-Dist: pandas; extra == "jira"
320
318
  Requires-Dist: python-dateutil; extra == "jira"
321
- Requires-Dist: click; extra == "jira"
319
+ Requires-Dist: tqdm; extra == "jira"
322
320
  Requires-Dist: atlassian-python-api; extra == "jira"
321
+ Requires-Dist: click; extra == "jira"
323
322
  Requires-Dist: dataclasses-json; extra == "jira"
323
+ Requires-Dist: pydantic; extra == "jira"
324
324
  Provides-Extra: kafka
325
- Requires-Dist: tqdm; extra == "kafka"
326
- Requires-Dist: pydantic; extra == "kafka"
325
+ Requires-Dist: confluent-kafka; extra == "kafka"
327
326
  Requires-Dist: opentelemetry-sdk; extra == "kafka"
328
327
  Requires-Dist: pandas; extra == "kafka"
329
328
  Requires-Dist: python-dateutil; extra == "kafka"
329
+ Requires-Dist: tqdm; extra == "kafka"
330
330
  Requires-Dist: click; extra == "kafka"
331
- Requires-Dist: confluent-kafka; extra == "kafka"
332
331
  Requires-Dist: dataclasses-json; extra == "kafka"
332
+ Requires-Dist: pydantic; extra == "kafka"
333
333
  Provides-Extra: kdbai
334
334
  Requires-Dist: kdbai-client; extra == "kdbai"
335
335
  Provides-Extra: md
336
- Requires-Dist: unstructured[md]; extra == "md"
337
- Requires-Dist: tqdm; extra == "md"
338
- Requires-Dist: pydantic; extra == "md"
339
336
  Requires-Dist: opentelemetry-sdk; extra == "md"
340
337
  Requires-Dist: pandas; extra == "md"
341
338
  Requires-Dist: python-dateutil; extra == "md"
339
+ Requires-Dist: tqdm; extra == "md"
342
340
  Requires-Dist: click; extra == "md"
343
341
  Requires-Dist: dataclasses-json; extra == "md"
342
+ Requires-Dist: unstructured[md]; extra == "md"
343
+ Requires-Dist: pydantic; extra == "md"
344
344
  Provides-Extra: milvus
345
- Requires-Dist: tqdm; extra == "milvus"
346
- Requires-Dist: pydantic; extra == "milvus"
347
345
  Requires-Dist: opentelemetry-sdk; extra == "milvus"
348
346
  Requires-Dist: pandas; extra == "milvus"
349
- Requires-Dist: pymilvus; extra == "milvus"
350
347
  Requires-Dist: python-dateutil; extra == "milvus"
348
+ Requires-Dist: tqdm; extra == "milvus"
351
349
  Requires-Dist: click; extra == "milvus"
352
350
  Requires-Dist: dataclasses-json; extra == "milvus"
351
+ Requires-Dist: pymilvus; extra == "milvus"
352
+ Requires-Dist: pydantic; extra == "milvus"
353
353
  Provides-Extra: mongodb
354
- Requires-Dist: pymongo; extra == "mongodb"
355
- Requires-Dist: tqdm; extra == "mongodb"
356
- Requires-Dist: pydantic; extra == "mongodb"
357
354
  Requires-Dist: opentelemetry-sdk; extra == "mongodb"
358
355
  Requires-Dist: pandas; extra == "mongodb"
359
356
  Requires-Dist: python-dateutil; extra == "mongodb"
357
+ Requires-Dist: tqdm; extra == "mongodb"
358
+ Requires-Dist: pymongo; extra == "mongodb"
360
359
  Requires-Dist: click; extra == "mongodb"
361
360
  Requires-Dist: dataclasses-json; extra == "mongodb"
361
+ Requires-Dist: pydantic; extra == "mongodb"
362
362
  Provides-Extra: msg
363
- Requires-Dist: unstructured[msg]; extra == "msg"
364
- Requires-Dist: tqdm; extra == "msg"
365
- Requires-Dist: pydantic; extra == "msg"
366
363
  Requires-Dist: opentelemetry-sdk; extra == "msg"
364
+ Requires-Dist: unstructured[msg]; extra == "msg"
367
365
  Requires-Dist: pandas; extra == "msg"
368
366
  Requires-Dist: python-dateutil; extra == "msg"
367
+ Requires-Dist: tqdm; extra == "msg"
369
368
  Requires-Dist: click; extra == "msg"
370
369
  Requires-Dist: dataclasses-json; extra == "msg"
370
+ Requires-Dist: pydantic; extra == "msg"
371
371
  Provides-Extra: notion
372
372
  Requires-Dist: notion-client; extra == "notion"
373
- Requires-Dist: backoff; extra == "notion"
374
- Requires-Dist: tqdm; extra == "notion"
375
- Requires-Dist: pydantic; extra == "notion"
376
- Requires-Dist: pandas; extra == "notion"
377
373
  Requires-Dist: opentelemetry-sdk; extra == "notion"
378
- Requires-Dist: httpx; extra == "notion"
374
+ Requires-Dist: htmlBuilder; extra == "notion"
375
+ Requires-Dist: pandas; extra == "notion"
379
376
  Requires-Dist: python-dateutil; extra == "notion"
377
+ Requires-Dist: tqdm; extra == "notion"
378
+ Requires-Dist: backoff; extra == "notion"
380
379
  Requires-Dist: click; extra == "notion"
381
- Requires-Dist: htmlBuilder; extra == "notion"
382
380
  Requires-Dist: dataclasses-json; extra == "notion"
381
+ Requires-Dist: httpx; extra == "notion"
382
+ Requires-Dist: pydantic; extra == "notion"
383
383
  Provides-Extra: odt
384
- Requires-Dist: unstructured[odt]; extra == "odt"
385
- Requires-Dist: tqdm; extra == "odt"
386
- Requires-Dist: pydantic; extra == "odt"
387
384
  Requires-Dist: opentelemetry-sdk; extra == "odt"
388
385
  Requires-Dist: pandas; extra == "odt"
389
386
  Requires-Dist: python-dateutil; extra == "odt"
387
+ Requires-Dist: tqdm; extra == "odt"
390
388
  Requires-Dist: click; extra == "odt"
391
389
  Requires-Dist: dataclasses-json; extra == "odt"
390
+ Requires-Dist: unstructured[odt]; extra == "odt"
391
+ Requires-Dist: pydantic; extra == "odt"
392
392
  Provides-Extra: onedrive
393
- Requires-Dist: tqdm; extra == "onedrive"
394
393
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
395
- Requires-Dist: pydantic; extra == "onedrive"
396
- Requires-Dist: pandas; extra == "onedrive"
394
+ Requires-Dist: bs4; extra == "onedrive"
397
395
  Requires-Dist: opentelemetry-sdk; extra == "onedrive"
396
+ Requires-Dist: pandas; extra == "onedrive"
398
397
  Requires-Dist: python-dateutil; extra == "onedrive"
399
- Requires-Dist: bs4; extra == "onedrive"
398
+ Requires-Dist: tqdm; extra == "onedrive"
400
399
  Requires-Dist: click; extra == "onedrive"
401
- Requires-Dist: msal; extra == "onedrive"
402
400
  Requires-Dist: dataclasses-json; extra == "onedrive"
401
+ Requires-Dist: msal; extra == "onedrive"
402
+ Requires-Dist: pydantic; extra == "onedrive"
403
403
  Provides-Extra: openai
404
- Requires-Dist: tqdm; extra == "openai"
405
- Requires-Dist: tiktoken; extra == "openai"
406
- Requires-Dist: pydantic; extra == "openai"
407
- Requires-Dist: openai; extra == "openai"
408
- Requires-Dist: pandas; extra == "openai"
409
404
  Requires-Dist: opentelemetry-sdk; extra == "openai"
410
- Requires-Dist: langchain-community; extra == "openai"
405
+ Requires-Dist: pandas; extra == "openai"
406
+ Requires-Dist: tiktoken; extra == "openai"
411
407
  Requires-Dist: python-dateutil; extra == "openai"
408
+ Requires-Dist: tqdm; extra == "openai"
412
409
  Requires-Dist: click; extra == "openai"
413
- Requires-Dist: unstructured; extra == "openai"
414
410
  Requires-Dist: dataclasses-json; extra == "openai"
411
+ Requires-Dist: unstructured; extra == "openai"
412
+ Requires-Dist: langchain-community; extra == "openai"
413
+ Requires-Dist: openai; extra == "openai"
414
+ Requires-Dist: pydantic; extra == "openai"
415
415
  Provides-Extra: opensearch
416
- Requires-Dist: tqdm; extra == "opensearch"
417
- Requires-Dist: pydantic; extra == "opensearch"
418
416
  Requires-Dist: opentelemetry-sdk; extra == "opensearch"
419
417
  Requires-Dist: pandas; extra == "opensearch"
420
418
  Requires-Dist: python-dateutil; extra == "opensearch"
419
+ Requires-Dist: tqdm; extra == "opensearch"
420
+ Requires-Dist: opensearch-py; extra == "opensearch"
421
421
  Requires-Dist: click; extra == "opensearch"
422
422
  Requires-Dist: dataclasses-json; extra == "opensearch"
423
- Requires-Dist: opensearch-py; extra == "opensearch"
423
+ Requires-Dist: pydantic; extra == "opensearch"
424
424
  Provides-Extra: org
425
- Requires-Dist: tqdm; extra == "org"
426
- Requires-Dist: pydantic; extra == "org"
427
425
  Requires-Dist: opentelemetry-sdk; extra == "org"
428
426
  Requires-Dist: pandas; extra == "org"
429
- Requires-Dist: unstructured[org]; extra == "org"
430
427
  Requires-Dist: python-dateutil; extra == "org"
428
+ Requires-Dist: tqdm; extra == "org"
431
429
  Requires-Dist: click; extra == "org"
430
+ Requires-Dist: unstructured[org]; extra == "org"
432
431
  Requires-Dist: dataclasses-json; extra == "org"
432
+ Requires-Dist: pydantic; extra == "org"
433
433
  Provides-Extra: outlook
434
- Requires-Dist: tqdm; extra == "outlook"
435
434
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
436
- Requires-Dist: pydantic; extra == "outlook"
437
- Requires-Dist: pandas; extra == "outlook"
438
435
  Requires-Dist: opentelemetry-sdk; extra == "outlook"
436
+ Requires-Dist: pandas; extra == "outlook"
439
437
  Requires-Dist: python-dateutil; extra == "outlook"
438
+ Requires-Dist: tqdm; extra == "outlook"
440
439
  Requires-Dist: click; extra == "outlook"
441
- Requires-Dist: msal; extra == "outlook"
442
440
  Requires-Dist: dataclasses-json; extra == "outlook"
441
+ Requires-Dist: msal; extra == "outlook"
442
+ Requires-Dist: pydantic; extra == "outlook"
443
443
  Provides-Extra: pdf
444
- Requires-Dist: tqdm; extra == "pdf"
445
- Requires-Dist: pydantic; extra == "pdf"
446
444
  Requires-Dist: opentelemetry-sdk; extra == "pdf"
447
445
  Requires-Dist: pandas; extra == "pdf"
448
- Requires-Dist: unstructured[pdf]; extra == "pdf"
449
446
  Requires-Dist: python-dateutil; extra == "pdf"
447
+ Requires-Dist: tqdm; extra == "pdf"
450
448
  Requires-Dist: click; extra == "pdf"
451
449
  Requires-Dist: dataclasses-json; extra == "pdf"
450
+ Requires-Dist: unstructured[pdf]; extra == "pdf"
451
+ Requires-Dist: pydantic; extra == "pdf"
452
452
  Provides-Extra: pinecone
453
- Requires-Dist: tqdm; extra == "pinecone"
454
- Requires-Dist: pydantic; extra == "pinecone"
455
453
  Requires-Dist: opentelemetry-sdk; extra == "pinecone"
456
454
  Requires-Dist: pandas; extra == "pinecone"
457
455
  Requires-Dist: python-dateutil; extra == "pinecone"
456
+ Requires-Dist: tqdm; extra == "pinecone"
458
457
  Requires-Dist: click; extra == "pinecone"
459
- Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
460
458
  Requires-Dist: dataclasses-json; extra == "pinecone"
459
+ Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
460
+ Requires-Dist: pydantic; extra == "pinecone"
461
461
  Provides-Extra: postgres
462
- Requires-Dist: tqdm; extra == "postgres"
463
- Requires-Dist: pydantic; extra == "postgres"
462
+ Requires-Dist: psycopg2-binary; extra == "postgres"
464
463
  Requires-Dist: opentelemetry-sdk; extra == "postgres"
465
464
  Requires-Dist: pandas; extra == "postgres"
466
465
  Requires-Dist: python-dateutil; extra == "postgres"
466
+ Requires-Dist: tqdm; extra == "postgres"
467
467
  Requires-Dist: click; extra == "postgres"
468
- Requires-Dist: psycopg2-binary; extra == "postgres"
469
468
  Requires-Dist: dataclasses-json; extra == "postgres"
469
+ Requires-Dist: pydantic; extra == "postgres"
470
470
  Provides-Extra: ppt
471
- Requires-Dist: tqdm; extra == "ppt"
472
- Requires-Dist: pydantic; extra == "ppt"
473
471
  Requires-Dist: opentelemetry-sdk; extra == "ppt"
474
472
  Requires-Dist: pandas; extra == "ppt"
475
- Requires-Dist: unstructured[pptx]; extra == "ppt"
476
473
  Requires-Dist: python-dateutil; extra == "ppt"
474
+ Requires-Dist: tqdm; extra == "ppt"
477
475
  Requires-Dist: click; extra == "ppt"
476
+ Requires-Dist: unstructured[pptx]; extra == "ppt"
478
477
  Requires-Dist: dataclasses-json; extra == "ppt"
478
+ Requires-Dist: pydantic; extra == "ppt"
479
479
  Provides-Extra: pptx
480
- Requires-Dist: tqdm; extra == "pptx"
481
- Requires-Dist: pydantic; extra == "pptx"
482
480
  Requires-Dist: opentelemetry-sdk; extra == "pptx"
483
481
  Requires-Dist: pandas; extra == "pptx"
484
- Requires-Dist: unstructured[pptx]; extra == "pptx"
485
482
  Requires-Dist: python-dateutil; extra == "pptx"
483
+ Requires-Dist: tqdm; extra == "pptx"
486
484
  Requires-Dist: click; extra == "pptx"
485
+ Requires-Dist: unstructured[pptx]; extra == "pptx"
487
486
  Requires-Dist: dataclasses-json; extra == "pptx"
487
+ Requires-Dist: pydantic; extra == "pptx"
488
488
  Provides-Extra: qdrant
489
- Requires-Dist: tqdm; extra == "qdrant"
490
- Requires-Dist: pydantic; extra == "qdrant"
491
489
  Requires-Dist: opentelemetry-sdk; extra == "qdrant"
490
+ Requires-Dist: qdrant-client; extra == "qdrant"
492
491
  Requires-Dist: pandas; extra == "qdrant"
493
492
  Requires-Dist: python-dateutil; extra == "qdrant"
493
+ Requires-Dist: tqdm; extra == "qdrant"
494
494
  Requires-Dist: click; extra == "qdrant"
495
- Requires-Dist: qdrant-client; extra == "qdrant"
496
495
  Requires-Dist: dataclasses-json; extra == "qdrant"
496
+ Requires-Dist: pydantic; extra == "qdrant"
497
497
  Provides-Extra: reddit
498
- Requires-Dist: tqdm; extra == "reddit"
499
- Requires-Dist: pydantic; extra == "reddit"
500
498
  Requires-Dist: opentelemetry-sdk; extra == "reddit"
501
499
  Requires-Dist: pandas; extra == "reddit"
502
500
  Requires-Dist: python-dateutil; extra == "reddit"
501
+ Requires-Dist: praw; extra == "reddit"
502
+ Requires-Dist: tqdm; extra == "reddit"
503
503
  Requires-Dist: click; extra == "reddit"
504
504
  Requires-Dist: dataclasses-json; extra == "reddit"
505
- Requires-Dist: praw; extra == "reddit"
505
+ Requires-Dist: pydantic; extra == "reddit"
506
506
  Provides-Extra: remote
507
- Requires-Dist: tqdm; extra == "remote"
508
- Requires-Dist: pydantic; extra == "remote"
509
507
  Requires-Dist: opentelemetry-sdk; extra == "remote"
510
508
  Requires-Dist: pandas; extra == "remote"
511
509
  Requires-Dist: python-dateutil; extra == "remote"
510
+ Requires-Dist: tqdm; extra == "remote"
512
511
  Requires-Dist: click; extra == "remote"
513
512
  Requires-Dist: dataclasses-json; extra == "remote"
514
513
  Requires-Dist: unstructured-client>=0.23.0; extra == "remote"
514
+ Requires-Dist: pydantic; extra == "remote"
515
515
  Provides-Extra: rst
516
- Requires-Dist: tqdm; extra == "rst"
517
- Requires-Dist: pydantic; extra == "rst"
516
+ Requires-Dist: unstructured[rst]; extra == "rst"
518
517
  Requires-Dist: opentelemetry-sdk; extra == "rst"
519
518
  Requires-Dist: pandas; extra == "rst"
520
519
  Requires-Dist: python-dateutil; extra == "rst"
520
+ Requires-Dist: tqdm; extra == "rst"
521
521
  Requires-Dist: click; extra == "rst"
522
522
  Requires-Dist: dataclasses-json; extra == "rst"
523
- Requires-Dist: unstructured[rst]; extra == "rst"
523
+ Requires-Dist: pydantic; extra == "rst"
524
524
  Provides-Extra: rtf
525
- Requires-Dist: tqdm; extra == "rtf"
526
- Requires-Dist: pydantic; extra == "rtf"
527
525
  Requires-Dist: opentelemetry-sdk; extra == "rtf"
528
526
  Requires-Dist: pandas; extra == "rtf"
529
527
  Requires-Dist: python-dateutil; extra == "rtf"
530
- Requires-Dist: unstructured[rtf]; extra == "rtf"
528
+ Requires-Dist: tqdm; extra == "rtf"
531
529
  Requires-Dist: click; extra == "rtf"
532
530
  Requires-Dist: dataclasses-json; extra == "rtf"
531
+ Requires-Dist: unstructured[rtf]; extra == "rtf"
532
+ Requires-Dist: pydantic; extra == "rtf"
533
533
  Provides-Extra: s3
534
534
  Requires-Dist: fsspec; extra == "s3"
535
- Requires-Dist: tqdm; extra == "s3"
536
- Requires-Dist: pydantic; extra == "s3"
537
- Requires-Dist: pandas; extra == "s3"
538
535
  Requires-Dist: opentelemetry-sdk; extra == "s3"
536
+ Requires-Dist: pandas; extra == "s3"
539
537
  Requires-Dist: python-dateutil; extra == "s3"
538
+ Requires-Dist: tqdm; extra == "s3"
540
539
  Requires-Dist: click; extra == "s3"
541
- Requires-Dist: s3fs; extra == "s3"
542
540
  Requires-Dist: dataclasses-json; extra == "s3"
541
+ Requires-Dist: pydantic; extra == "s3"
542
+ Requires-Dist: s3fs; extra == "s3"
543
543
  Provides-Extra: salesforce
544
544
  Requires-Dist: simple-salesforce; extra == "salesforce"
545
- Requires-Dist: tqdm; extra == "salesforce"
546
- Requires-Dist: pydantic; extra == "salesforce"
547
545
  Requires-Dist: opentelemetry-sdk; extra == "salesforce"
548
546
  Requires-Dist: pandas; extra == "salesforce"
549
547
  Requires-Dist: python-dateutil; extra == "salesforce"
548
+ Requires-Dist: tqdm; extra == "salesforce"
550
549
  Requires-Dist: click; extra == "salesforce"
551
550
  Requires-Dist: dataclasses-json; extra == "salesforce"
551
+ Requires-Dist: pydantic; extra == "salesforce"
552
552
  Provides-Extra: sftp
553
553
  Requires-Dist: fsspec; extra == "sftp"
554
- Requires-Dist: tqdm; extra == "sftp"
555
- Requires-Dist: pydantic; extra == "sftp"
556
- Requires-Dist: pandas; extra == "sftp"
557
554
  Requires-Dist: opentelemetry-sdk; extra == "sftp"
555
+ Requires-Dist: pandas; extra == "sftp"
558
556
  Requires-Dist: python-dateutil; extra == "sftp"
557
+ Requires-Dist: tqdm; extra == "sftp"
559
558
  Requires-Dist: click; extra == "sftp"
560
- Requires-Dist: paramiko; extra == "sftp"
561
559
  Requires-Dist: dataclasses-json; extra == "sftp"
560
+ Requires-Dist: paramiko; extra == "sftp"
561
+ Requires-Dist: pydantic; extra == "sftp"
562
562
  Provides-Extra: sharepoint
563
- Requires-Dist: tqdm; extra == "sharepoint"
564
563
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
565
- Requires-Dist: pydantic; extra == "sharepoint"
566
- Requires-Dist: pandas; extra == "sharepoint"
567
564
  Requires-Dist: opentelemetry-sdk; extra == "sharepoint"
565
+ Requires-Dist: pandas; extra == "sharepoint"
568
566
  Requires-Dist: python-dateutil; extra == "sharepoint"
567
+ Requires-Dist: tqdm; extra == "sharepoint"
569
568
  Requires-Dist: click; extra == "sharepoint"
570
- Requires-Dist: msal; extra == "sharepoint"
571
569
  Requires-Dist: dataclasses-json; extra == "sharepoint"
570
+ Requires-Dist: msal; extra == "sharepoint"
571
+ Requires-Dist: pydantic; extra == "sharepoint"
572
572
  Provides-Extra: singlestore
573
573
  Requires-Dist: singlestoredb; extra == "singlestore"
574
- Requires-Dist: tqdm; extra == "singlestore"
575
- Requires-Dist: pydantic; extra == "singlestore"
576
- Requires-Dist: pandas; extra == "singlestore"
577
574
  Requires-Dist: opentelemetry-sdk; extra == "singlestore"
575
+ Requires-Dist: pandas; extra == "singlestore"
578
576
  Requires-Dist: python-dateutil; extra == "singlestore"
577
+ Requires-Dist: tqdm; extra == "singlestore"
579
578
  Requires-Dist: click; extra == "singlestore"
580
579
  Requires-Dist: dataclasses-json; extra == "singlestore"
580
+ Requires-Dist: pydantic; extra == "singlestore"
581
581
  Provides-Extra: slack
582
- Requires-Dist: tqdm; extra == "slack"
583
- Requires-Dist: pydantic; extra == "slack"
584
582
  Requires-Dist: opentelemetry-sdk; extra == "slack"
583
+ Requires-Dist: slack-sdk; extra == "slack"
585
584
  Requires-Dist: pandas; extra == "slack"
586
585
  Requires-Dist: python-dateutil; extra == "slack"
586
+ Requires-Dist: tqdm; extra == "slack"
587
587
  Requires-Dist: click; extra == "slack"
588
588
  Requires-Dist: dataclasses-json; extra == "slack"
589
- Requires-Dist: slack-sdk; extra == "slack"
589
+ Requires-Dist: pydantic; extra == "slack"
590
590
  Provides-Extra: tsv
591
- Requires-Dist: unstructured[tsv]; extra == "tsv"
592
- Requires-Dist: tqdm; extra == "tsv"
593
- Requires-Dist: pydantic; extra == "tsv"
594
591
  Requires-Dist: opentelemetry-sdk; extra == "tsv"
595
592
  Requires-Dist: pandas; extra == "tsv"
596
593
  Requires-Dist: python-dateutil; extra == "tsv"
594
+ Requires-Dist: tqdm; extra == "tsv"
595
+ Requires-Dist: unstructured[tsv]; extra == "tsv"
597
596
  Requires-Dist: click; extra == "tsv"
598
597
  Requires-Dist: dataclasses-json; extra == "tsv"
598
+ Requires-Dist: pydantic; extra == "tsv"
599
599
  Provides-Extra: vectara
600
- Requires-Dist: tqdm; extra == "vectara"
601
- Requires-Dist: pydantic; extra == "vectara"
602
600
  Requires-Dist: opentelemetry-sdk; extra == "vectara"
601
+ Requires-Dist: requests; extra == "vectara"
603
602
  Requires-Dist: pandas; extra == "vectara"
604
603
  Requires-Dist: python-dateutil; extra == "vectara"
604
+ Requires-Dist: tqdm; extra == "vectara"
605
605
  Requires-Dist: click; extra == "vectara"
606
606
  Requires-Dist: dataclasses-json; extra == "vectara"
607
- Requires-Dist: requests; extra == "vectara"
607
+ Requires-Dist: pydantic; extra == "vectara"
608
608
  Provides-Extra: weaviate
609
- Requires-Dist: weaviate-client; extra == "weaviate"
610
- Requires-Dist: tqdm; extra == "weaviate"
611
- Requires-Dist: pydantic; extra == "weaviate"
612
609
  Requires-Dist: opentelemetry-sdk; extra == "weaviate"
613
610
  Requires-Dist: pandas; extra == "weaviate"
614
611
  Requires-Dist: python-dateutil; extra == "weaviate"
612
+ Requires-Dist: weaviate-client; extra == "weaviate"
613
+ Requires-Dist: tqdm; extra == "weaviate"
615
614
  Requires-Dist: click; extra == "weaviate"
616
615
  Requires-Dist: dataclasses-json; extra == "weaviate"
616
+ Requires-Dist: pydantic; extra == "weaviate"
617
617
  Provides-Extra: wikipedia
618
- Requires-Dist: tqdm; extra == "wikipedia"
619
- Requires-Dist: pydantic; extra == "wikipedia"
620
618
  Requires-Dist: opentelemetry-sdk; extra == "wikipedia"
621
619
  Requires-Dist: pandas; extra == "wikipedia"
622
620
  Requires-Dist: python-dateutil; extra == "wikipedia"
621
+ Requires-Dist: tqdm; extra == "wikipedia"
623
622
  Requires-Dist: click; extra == "wikipedia"
624
- Requires-Dist: wikipedia; extra == "wikipedia"
625
623
  Requires-Dist: dataclasses-json; extra == "wikipedia"
624
+ Requires-Dist: wikipedia; extra == "wikipedia"
625
+ Requires-Dist: pydantic; extra == "wikipedia"
626
626
  Provides-Extra: xlsx
627
- Requires-Dist: tqdm; extra == "xlsx"
628
- Requires-Dist: pydantic; extra == "xlsx"
629
627
  Requires-Dist: opentelemetry-sdk; extra == "xlsx"
630
628
  Requires-Dist: pandas; extra == "xlsx"
631
629
  Requires-Dist: python-dateutil; extra == "xlsx"
630
+ Requires-Dist: tqdm; extra == "xlsx"
632
631
  Requires-Dist: click; extra == "xlsx"
633
- Requires-Dist: unstructured[xlsx]; extra == "xlsx"
634
632
  Requires-Dist: dataclasses-json; extra == "xlsx"
633
+ Requires-Dist: unstructured[xlsx]; extra == "xlsx"
634
+ Requires-Dist: pydantic; extra == "xlsx"
635
635
 
636
636
  # Unstructured Ingest
637
637
 
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=LmJ1WyvltOnn1Z2gO4OuQElGKiK5a9LJhdYstc5Jec0,43
2
+ unstructured_ingest/__version__.py,sha256=hVnhWghkqnr7x2fn9tKrlVtVMYcocdwt-lq1-dsXu4A,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
5
  unstructured_ingest/interfaces.py,sha256=AeEywcSKCMA5AiEdENLpu_yPcXp_c6wpvESePfC00yo,31214
@@ -248,7 +248,7 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
248
248
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
249
  unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
250
250
  unstructured_ingest/utils/compression.py,sha256=mgndeNULGH7stoC51hhT4B9HwqMUSL0jhphcia6F_bw,4433
251
- unstructured_ingest/utils/data_prep.py,sha256=oizt8xZ15hExZ_8xacMG0j1LSH4ILreQpXJUYm2Bb_M,3917
251
+ unstructured_ingest/utils/data_prep.py,sha256=SqhRlkzqFH1Sl8YSxgOQvP1tyhcAFcTOepDTVEv11FY,4097
252
252
  unstructured_ingest/utils/dep_check.py,sha256=cVEqZtMwji8BIt7pjtUOMtEmN7KaNXRXwelEKFpOdW8,1914
253
253
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
254
254
  unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
@@ -313,7 +313,7 @@ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=hNMtjdNF6Nv8E_0n5uD
313
313
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=q_GRuG2RQ5-8ajefifKuhFO52wCVhtU9j4ZIEf5hNas,4948
314
314
  unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=-B8ELr0rnspzrTy6HBvgbvuiF1eEKRQyCT1ocwmET5Q,9145
315
315
  unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=z4UTenXy-iqy9Xlqlf1UTiGdOhIDPowiMg8juWnCh9M,6755
316
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=_RwrV7qVbMOsvD8LuGYKQO3UrSFJvgjTGzx304Jj4l0,5459
316
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=w7bY9s61c9_a-k3NjAmGjHXJQks-9KpRfpXKW9B6q9E,5744
317
317
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
318
318
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=_ZTjtFNcKcJ0z4cvEZml18TdOMm-Kbwlz8nxTTjp9nc,19500
319
319
  unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
@@ -329,9 +329,9 @@ unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=O1j0hIYWI4lPpTQ
329
329
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=cOMvMh0C9rtyEPJ0X59Fn-qb11LFUMRfeUgsi3QRWUk,6390
330
330
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=NkcU4U2DQWWuM8UHkez67C3SEOZpVyRtmtNS-z-F0Fw,6056
331
331
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
332
- unstructured_ingest-0.0.12.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
333
- unstructured_ingest-0.0.12.dist-info/METADATA,sha256=H_nAi8x37TLMam7ByQOKFl_2tBUg_jIPa1Dn4VHpPKk,28110
334
- unstructured_ingest-0.0.12.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
335
- unstructured_ingest-0.0.12.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
336
- unstructured_ingest-0.0.12.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
337
- unstructured_ingest-0.0.12.dist-info/RECORD,,
332
+ unstructured_ingest-0.0.13.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
333
+ unstructured_ingest-0.0.13.dist-info/METADATA,sha256=xIJhdAI3i4HtbkN5yFE5BwhBh4pYW6FWD9m_iPjV1TM,28110
334
+ unstructured_ingest-0.0.13.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
335
+ unstructured_ingest-0.0.13.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
336
+ unstructured_ingest-0.0.13.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
337
+ unstructured_ingest-0.0.13.dist-info/RECORD,,