unstructured-ingest 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.4.4" # pragma: no cover
1
+ __version__ = "0.4.5" # pragma: no cover
@@ -203,7 +203,14 @@ class Pipeline:
203
203
 
204
204
  def get_indices(self) -> list[dict]:
205
205
  if self.indexer_step.process.is_async():
206
- indices = asyncio.run(self.indexer_step.run_async())
206
+
207
+ async def run_async():
208
+ output = []
209
+ async for i in self.indexer_step.run_async():
210
+ output.append(i)
211
+ return output
212
+
213
+ indices = asyncio.run(run_async())
207
214
  else:
208
215
  indices = self.indexer_step.run()
209
216
  indices_inputs = [{"file_data_path": i} for i in indices]
@@ -223,7 +223,7 @@ class OnedriveDownloader(Downloader):
223
223
  download_config: OnedriveDownloaderConfig
224
224
 
225
225
  @SourceConnectionNetworkError.wrap
226
- def _fetch_file(self, file_data: FileData):
226
+ def _fetch_file(self, file_data: FileData) -> DriveItem:
227
227
  if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath:
228
228
  raise ValueError(
229
229
  f"file data doesn't have enough information to get "
@@ -257,7 +257,7 @@ class OnedriveDownloader(Downloader):
257
257
  file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
258
258
  else:
259
259
  with download_path.open(mode="wb") as f:
260
- file.download(f).execute_query()
260
+ file.download_session(f).execute_query()
261
261
  return self.generate_download_response(file_data=file_data, download_path=download_path)
262
262
  except Exception as e:
263
263
  logger.error(f"[{CONNECTOR_TYPE}] Exception during downloading: {e}", exc_info=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -23,12 +23,12 @@ Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: pydantic>=2.7
26
- Requires-Dist: click
27
26
  Requires-Dist: pandas
28
- Requires-Dist: dataclasses-json
29
27
  Requires-Dist: tqdm
30
- Requires-Dist: opentelemetry-sdk
31
28
  Requires-Dist: python-dateutil
29
+ Requires-Dist: click
30
+ Requires-Dist: dataclasses-json
31
+ Requires-Dist: opentelemetry-sdk
32
32
  Provides-Extra: airtable
33
33
  Requires-Dist: pyairtable; extra == "airtable"
34
34
  Provides-Extra: astradb
@@ -45,8 +45,8 @@ Provides-Extra: biomed
45
45
  Requires-Dist: bs4; extra == "biomed"
46
46
  Requires-Dist: requests; extra == "biomed"
47
47
  Provides-Extra: box
48
- Requires-Dist: boxfs; extra == "box"
49
48
  Requires-Dist: fsspec; extra == "box"
49
+ Requires-Dist: boxfs; extra == "box"
50
50
  Provides-Extra: chroma
51
51
  Requires-Dist: chromadb; extra == "chroma"
52
52
  Provides-Extra: clarifai
@@ -63,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
63
63
  Provides-Extra: databricks-volumes
64
64
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
65
65
  Provides-Extra: delta-table
66
- Requires-Dist: deltalake; extra == "delta-table"
67
66
  Requires-Dist: boto3; extra == "delta-table"
67
+ Requires-Dist: deltalake; extra == "delta-table"
68
68
  Provides-Extra: discord
69
69
  Requires-Dist: discord.py; extra == "discord"
70
70
  Provides-Extra: doc
@@ -72,8 +72,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
72
72
  Provides-Extra: docx
73
73
  Requires-Dist: unstructured[docx]; extra == "docx"
74
74
  Provides-Extra: dropbox
75
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
76
75
  Requires-Dist: fsspec; extra == "dropbox"
76
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
77
77
  Provides-Extra: duckdb
78
78
  Requires-Dist: duckdb; extra == "duckdb"
79
79
  Provides-Extra: elasticsearch
@@ -83,8 +83,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
83
83
  Provides-Extra: embed-mixedbreadai
84
84
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
85
85
  Provides-Extra: embed-octoai
86
- Requires-Dist: openai; extra == "embed-octoai"
87
86
  Requires-Dist: tiktoken; extra == "embed-octoai"
87
+ Requires-Dist: openai; extra == "embed-octoai"
88
88
  Provides-Extra: embed-vertexai
89
89
  Requires-Dist: vertexai; extra == "embed-vertexai"
90
90
  Provides-Extra: embed-voyageai
@@ -103,8 +103,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
103
103
  Provides-Extra: google-drive
104
104
  Requires-Dist: google-api-python-client; extra == "google-drive"
105
105
  Provides-Extra: hubspot
106
- Requires-Dist: urllib3; extra == "hubspot"
107
106
  Requires-Dist: hubspot-api-client; extra == "hubspot"
107
+ Requires-Dist: urllib3; extra == "hubspot"
108
108
  Provides-Extra: jira
109
109
  Requires-Dist: atlassian-python-api; extra == "jira"
110
110
  Provides-Extra: kafka
@@ -122,23 +122,23 @@ Requires-Dist: pymongo; extra == "mongodb"
122
122
  Provides-Extra: msg
123
123
  Requires-Dist: unstructured[msg]; extra == "msg"
124
124
  Provides-Extra: neo4j
125
- Requires-Dist: networkx; extra == "neo4j"
126
125
  Requires-Dist: cymple; extra == "neo4j"
127
126
  Requires-Dist: neo4j; extra == "neo4j"
127
+ Requires-Dist: networkx; extra == "neo4j"
128
128
  Provides-Extra: notion
129
129
  Requires-Dist: backoff; extra == "notion"
130
- Requires-Dist: httpx; extra == "notion"
131
130
  Requires-Dist: notion-client; extra == "notion"
131
+ Requires-Dist: httpx; extra == "notion"
132
132
  Requires-Dist: htmlBuilder; extra == "notion"
133
133
  Provides-Extra: odt
134
134
  Requires-Dist: unstructured[odt]; extra == "odt"
135
135
  Provides-Extra: onedrive
136
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
137
136
  Requires-Dist: bs4; extra == "onedrive"
137
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
138
138
  Requires-Dist: msal; extra == "onedrive"
139
139
  Provides-Extra: openai
140
- Requires-Dist: openai; extra == "openai"
141
140
  Requires-Dist: tiktoken; extra == "openai"
141
+ Requires-Dist: openai; extra == "openai"
142
142
  Provides-Extra: opensearch
143
143
  Requires-Dist: opensearch-py; extra == "opensearch"
144
144
  Provides-Extra: org
@@ -169,8 +169,8 @@ Requires-Dist: unstructured[rst]; extra == "rst"
169
169
  Provides-Extra: rtf
170
170
  Requires-Dist: unstructured[rtf]; extra == "rtf"
171
171
  Provides-Extra: s3
172
- Requires-Dist: fsspec; extra == "s3"
173
172
  Requires-Dist: s3fs; extra == "s3"
173
+ Requires-Dist: fsspec; extra == "s3"
174
174
  Provides-Extra: salesforce
175
175
  Requires-Dist: simple-salesforce; extra == "salesforce"
176
176
  Provides-Extra: sftp
@@ -184,20 +184,20 @@ Requires-Dist: singlestoredb; extra == "singlestore"
184
184
  Provides-Extra: slack
185
185
  Requires-Dist: slack-sdk[optional]; extra == "slack"
186
186
  Provides-Extra: snowflake
187
- Requires-Dist: psycopg2-binary; extra == "snowflake"
188
187
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
188
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
189
189
  Provides-Extra: togetherai
190
190
  Requires-Dist: together; extra == "togetherai"
191
191
  Provides-Extra: tsv
192
192
  Requires-Dist: unstructured[tsv]; extra == "tsv"
193
193
  Provides-Extra: vastdb
194
194
  Requires-Dist: ibis; extra == "vastdb"
195
- Requires-Dist: pyarrow; extra == "vastdb"
196
195
  Requires-Dist: vastdb; extra == "vastdb"
196
+ Requires-Dist: pyarrow; extra == "vastdb"
197
197
  Provides-Extra: vectara
198
+ Requires-Dist: aiofiles; extra == "vectara"
198
199
  Requires-Dist: httpx; extra == "vectara"
199
200
  Requires-Dist: requests; extra == "vectara"
200
- Requires-Dist: aiofiles; extra == "vectara"
201
201
  Provides-Extra: weaviate
202
202
  Requires-Dist: weaviate-client; extra == "weaviate"
203
203
  Provides-Extra: wikipedia
@@ -102,7 +102,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
102
102
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
103
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
104
104
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
105
- unstructured_ingest/__version__.py,sha256=k5K6WAWnRkNeRW39AQyaFiSCUwHRsxlNOpkoF4MqU3c,42
105
+ unstructured_ingest/__version__.py,sha256=LZI8wLYHcTzImgX-mBT2GEDfuLfBbZawJ40Z_jZShYc,42
106
106
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
107
107
  unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
108
108
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -399,7 +399,7 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-
399
399
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
400
400
  unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
401
401
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
402
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=y6AkUBUL2r3t4OO0jWKomtN3v8U7EDtMPrJ8VYRo7VM,16344
402
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=-1TlqG33x_GGjGMk4Y8Psx1z6Prbuj11MMAR2WAuhBc,16520
403
403
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
404
404
  unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
405
405
  unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
@@ -433,7 +433,7 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
433
433
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
434
434
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
435
435
  unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
436
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=sVRk1LodwVS9do3kmetO8kvSdEzfR-oATXa6covC64Y,17365
436
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=9UK5nILtrAXSwpp_aeANgqvQf_UbH3J3czN7y-DL9d0,17386
437
437
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
438
438
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
439
439
  unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
@@ -561,9 +561,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
561
561
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
562
562
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
563
563
  unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
564
- unstructured_ingest-0.4.4.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
565
- unstructured_ingest-0.4.4.dist-info/METADATA,sha256=h_Yeg9jJuyJmsipS3juMfEozK8U6sNyA-PotmiuuBsE,8051
566
- unstructured_ingest-0.4.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
567
- unstructured_ingest-0.4.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
568
- unstructured_ingest-0.4.4.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
569
- unstructured_ingest-0.4.4.dist-info/RECORD,,
564
+ unstructured_ingest-0.4.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
565
+ unstructured_ingest-0.4.5.dist-info/METADATA,sha256=cetNdLOmsQvHFt7j2m2utZKATnaMx9BIAV2i386aoTc,8051
566
+ unstructured_ingest-0.4.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
567
+ unstructured_ingest-0.4.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
568
+ unstructured_ingest-0.4.5.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
569
+ unstructured_ingest-0.4.5.dist-info/RECORD,,