unstructured-ingest 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/pipeline/pipeline.py +8 -1
- unstructured_ingest/v2/processes/connectors/onedrive.py +2 -2
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/METADATA +17 -17
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/RECORD +9 -9
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.4.
|
|
1
|
+
__version__ = "0.4.5" # pragma: no cover
|
|
@@ -203,7 +203,14 @@ class Pipeline:
|
|
|
203
203
|
|
|
204
204
|
def get_indices(self) -> list[dict]:
|
|
205
205
|
if self.indexer_step.process.is_async():
|
|
206
|
-
|
|
206
|
+
|
|
207
|
+
async def run_async():
|
|
208
|
+
output = []
|
|
209
|
+
async for i in self.indexer_step.run_async():
|
|
210
|
+
output.append(i)
|
|
211
|
+
return output
|
|
212
|
+
|
|
213
|
+
indices = asyncio.run(run_async())
|
|
207
214
|
else:
|
|
208
215
|
indices = self.indexer_step.run()
|
|
209
216
|
indices_inputs = [{"file_data_path": i} for i in indices]
|
|
@@ -223,7 +223,7 @@ class OnedriveDownloader(Downloader):
|
|
|
223
223
|
download_config: OnedriveDownloaderConfig
|
|
224
224
|
|
|
225
225
|
@SourceConnectionNetworkError.wrap
|
|
226
|
-
def _fetch_file(self, file_data: FileData):
|
|
226
|
+
def _fetch_file(self, file_data: FileData) -> DriveItem:
|
|
227
227
|
if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath:
|
|
228
228
|
raise ValueError(
|
|
229
229
|
f"file data doesn't have enough information to get "
|
|
@@ -257,7 +257,7 @@ class OnedriveDownloader(Downloader):
|
|
|
257
257
|
file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
|
|
258
258
|
else:
|
|
259
259
|
with download_path.open(mode="wb") as f:
|
|
260
|
-
file.
|
|
260
|
+
file.download_session(f).execute_query()
|
|
261
261
|
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
262
262
|
except Exception as e:
|
|
263
263
|
logger.error(f"[{CONNECTOR_TYPE}] Exception during downloading: {e}", exc_info=True)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -23,12 +23,12 @@ Requires-Python: >=3.9.0,<3.14
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: pydantic>=2.7
|
|
26
|
-
Requires-Dist: click
|
|
27
26
|
Requires-Dist: pandas
|
|
28
|
-
Requires-Dist: dataclasses-json
|
|
29
27
|
Requires-Dist: tqdm
|
|
30
|
-
Requires-Dist: opentelemetry-sdk
|
|
31
28
|
Requires-Dist: python-dateutil
|
|
29
|
+
Requires-Dist: click
|
|
30
|
+
Requires-Dist: dataclasses-json
|
|
31
|
+
Requires-Dist: opentelemetry-sdk
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
@@ -45,8 +45,8 @@ Provides-Extra: biomed
|
|
|
45
45
|
Requires-Dist: bs4; extra == "biomed"
|
|
46
46
|
Requires-Dist: requests; extra == "biomed"
|
|
47
47
|
Provides-Extra: box
|
|
48
|
-
Requires-Dist: boxfs; extra == "box"
|
|
49
48
|
Requires-Dist: fsspec; extra == "box"
|
|
49
|
+
Requires-Dist: boxfs; extra == "box"
|
|
50
50
|
Provides-Extra: chroma
|
|
51
51
|
Requires-Dist: chromadb; extra == "chroma"
|
|
52
52
|
Provides-Extra: clarifai
|
|
@@ -63,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
63
63
|
Provides-Extra: databricks-volumes
|
|
64
64
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
65
65
|
Provides-Extra: delta-table
|
|
66
|
-
Requires-Dist: deltalake; extra == "delta-table"
|
|
67
66
|
Requires-Dist: boto3; extra == "delta-table"
|
|
67
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
68
68
|
Provides-Extra: discord
|
|
69
69
|
Requires-Dist: discord.py; extra == "discord"
|
|
70
70
|
Provides-Extra: doc
|
|
@@ -72,8 +72,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
72
72
|
Provides-Extra: docx
|
|
73
73
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
74
74
|
Provides-Extra: dropbox
|
|
75
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
76
75
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
76
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
77
77
|
Provides-Extra: duckdb
|
|
78
78
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
79
79
|
Provides-Extra: elasticsearch
|
|
@@ -83,8 +83,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
|
83
83
|
Provides-Extra: embed-mixedbreadai
|
|
84
84
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
85
85
|
Provides-Extra: embed-octoai
|
|
86
|
-
Requires-Dist: openai; extra == "embed-octoai"
|
|
87
86
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
87
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
88
88
|
Provides-Extra: embed-vertexai
|
|
89
89
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
90
90
|
Provides-Extra: embed-voyageai
|
|
@@ -103,8 +103,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
103
103
|
Provides-Extra: google-drive
|
|
104
104
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
105
105
|
Provides-Extra: hubspot
|
|
106
|
-
Requires-Dist: urllib3; extra == "hubspot"
|
|
107
106
|
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
107
|
+
Requires-Dist: urllib3; extra == "hubspot"
|
|
108
108
|
Provides-Extra: jira
|
|
109
109
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
110
110
|
Provides-Extra: kafka
|
|
@@ -122,23 +122,23 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
122
122
|
Provides-Extra: msg
|
|
123
123
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
124
124
|
Provides-Extra: neo4j
|
|
125
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
126
125
|
Requires-Dist: cymple; extra == "neo4j"
|
|
127
126
|
Requires-Dist: neo4j; extra == "neo4j"
|
|
127
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
128
128
|
Provides-Extra: notion
|
|
129
129
|
Requires-Dist: backoff; extra == "notion"
|
|
130
|
-
Requires-Dist: httpx; extra == "notion"
|
|
131
130
|
Requires-Dist: notion-client; extra == "notion"
|
|
131
|
+
Requires-Dist: httpx; extra == "notion"
|
|
132
132
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
133
133
|
Provides-Extra: odt
|
|
134
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
136
|
Requires-Dist: bs4; extra == "onedrive"
|
|
137
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
138
|
Requires-Dist: msal; extra == "onedrive"
|
|
139
139
|
Provides-Extra: openai
|
|
140
|
-
Requires-Dist: openai; extra == "openai"
|
|
141
140
|
Requires-Dist: tiktoken; extra == "openai"
|
|
141
|
+
Requires-Dist: openai; extra == "openai"
|
|
142
142
|
Provides-Extra: opensearch
|
|
143
143
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
144
144
|
Provides-Extra: org
|
|
@@ -169,8 +169,8 @@ Requires-Dist: unstructured[rst]; extra == "rst"
|
|
|
169
169
|
Provides-Extra: rtf
|
|
170
170
|
Requires-Dist: unstructured[rtf]; extra == "rtf"
|
|
171
171
|
Provides-Extra: s3
|
|
172
|
-
Requires-Dist: fsspec; extra == "s3"
|
|
173
172
|
Requires-Dist: s3fs; extra == "s3"
|
|
173
|
+
Requires-Dist: fsspec; extra == "s3"
|
|
174
174
|
Provides-Extra: salesforce
|
|
175
175
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
176
176
|
Provides-Extra: sftp
|
|
@@ -184,20 +184,20 @@ Requires-Dist: singlestoredb; extra == "singlestore"
|
|
|
184
184
|
Provides-Extra: slack
|
|
185
185
|
Requires-Dist: slack-sdk[optional]; extra == "slack"
|
|
186
186
|
Provides-Extra: snowflake
|
|
187
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
188
187
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
188
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
189
189
|
Provides-Extra: togetherai
|
|
190
190
|
Requires-Dist: together; extra == "togetherai"
|
|
191
191
|
Provides-Extra: tsv
|
|
192
192
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
193
193
|
Provides-Extra: vastdb
|
|
194
194
|
Requires-Dist: ibis; extra == "vastdb"
|
|
195
|
-
Requires-Dist: pyarrow; extra == "vastdb"
|
|
196
195
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
196
|
+
Requires-Dist: pyarrow; extra == "vastdb"
|
|
197
197
|
Provides-Extra: vectara
|
|
198
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
198
199
|
Requires-Dist: httpx; extra == "vectara"
|
|
199
200
|
Requires-Dist: requests; extra == "vectara"
|
|
200
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
201
201
|
Provides-Extra: weaviate
|
|
202
202
|
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
203
203
|
Provides-Extra: wikipedia
|
|
@@ -102,7 +102,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
102
102
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
103
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
104
104
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
105
|
-
unstructured_ingest/__version__.py,sha256=
|
|
105
|
+
unstructured_ingest/__version__.py,sha256=LZI8wLYHcTzImgX-mBT2GEDfuLfBbZawJ40Z_jZShYc,42
|
|
106
106
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
107
107
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
108
108
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -399,7 +399,7 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-
|
|
|
399
399
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
400
400
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
401
401
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
402
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256
|
|
402
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=-1TlqG33x_GGjGMk4Y8Psx1z6Prbuj11MMAR2WAuhBc,16520
|
|
403
403
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
404
404
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
405
405
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
@@ -433,7 +433,7 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
|
|
|
433
433
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
434
434
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
435
435
|
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
|
|
436
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=9UK5nILtrAXSwpp_aeANgqvQf_UbH3J3czN7y-DL9d0,17386
|
|
437
437
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
438
438
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
|
|
439
439
|
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
@@ -561,9 +561,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
561
561
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
562
562
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
563
563
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
|
|
564
|
-
unstructured_ingest-0.4.
|
|
565
|
-
unstructured_ingest-0.4.
|
|
566
|
-
unstructured_ingest-0.4.
|
|
567
|
-
unstructured_ingest-0.4.
|
|
568
|
-
unstructured_ingest-0.4.
|
|
569
|
-
unstructured_ingest-0.4.
|
|
564
|
+
unstructured_ingest-0.4.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
565
|
+
unstructured_ingest-0.4.5.dist-info/METADATA,sha256=cetNdLOmsQvHFt7j2m2utZKATnaMx9BIAV2i386aoTc,8051
|
|
566
|
+
unstructured_ingest-0.4.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
567
|
+
unstructured_ingest-0.4.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
568
|
+
unstructured_ingest-0.4.5.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
569
|
+
unstructured_ingest-0.4.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.4.dist-info → unstructured_ingest-0.4.5.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|