unstructured-ingest 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (28) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/__init__.py +17 -0
  3. unstructured_ingest/embed/bedrock.py +70 -0
  4. unstructured_ingest/embed/huggingface.py +73 -0
  5. unstructured_ingest/embed/interfaces.py +36 -0
  6. unstructured_ingest/embed/mixedbreadai.py +177 -0
  7. unstructured_ingest/embed/octoai.py +63 -0
  8. unstructured_ingest/embed/openai.py +61 -0
  9. unstructured_ingest/embed/vertexai.py +88 -0
  10. unstructured_ingest/embed/voyageai.py +69 -0
  11. unstructured_ingest/interfaces.py +17 -7
  12. unstructured_ingest/pipeline/reformat/embedding.py +3 -5
  13. unstructured_ingest/utils/data_prep.py +20 -12
  14. unstructured_ingest/v2/cli/base/src.py +2 -1
  15. unstructured_ingest/v2/pipeline/interfaces.py +3 -1
  16. unstructured_ingest/v2/pipeline/pipeline.py +25 -23
  17. unstructured_ingest/v2/processes/connectors/chroma.py +6 -1
  18. unstructured_ingest/v2/processes/connectors/google_drive.py +1 -2
  19. unstructured_ingest/v2/processes/connectors/onedrive.py +6 -4
  20. unstructured_ingest/v2/processes/connectors/pinecone.py +37 -15
  21. unstructured_ingest/v2/processes/connectors/sharepoint.py +1 -1
  22. unstructured_ingest/v2/processes/embedder.py +41 -24
  23. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/METADATA +214 -211
  24. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/RECORD +28 -19
  25. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/LICENSE.md +0 -0
  26. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/WHEEL +0 -0
  27. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/entry_points.txt +0 -0
  28. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.14.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=hVnhWghkqnr7x2fn9tKrlVtVMYcocdwt-lq1-dsXu4A,43
2
+ unstructured_ingest/__version__.py,sha256=FvuQAMXs2twVKSQLT39ab4eOk1k80Ve3pTtytKSWyZI,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
- unstructured_ingest/interfaces.py,sha256=AeEywcSKCMA5AiEdENLpu_yPcXp_c6wpvESePfC00yo,31214
5
+ unstructured_ingest/interfaces.py,sha256=OCXhP6PYUE_vtTmZjwEidA5jvHT50Rj_a5sOS7M6IxI,31411
6
6
  unstructured_ingest/logger.py,sha256=TrhyH7VbCWO5VVuhvL0yUyXxuem3b4pzbqj2uQHUwZk,4480
7
7
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
8
8
  unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
@@ -167,6 +167,15 @@ unstructured_ingest/connector/notion/types/database_properties/title.py,sha256=O
167
167
  unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha256=H9lKi8rCDPtKmuu7j9CnJoTUr6YmzIF4oXbv_OxuN9k,1162
168
168
  unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
169
169
  unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
170
+ unstructured_ingest/embed/__init__.py,sha256=whnTiGsSbNqaLObr058CKG5iGxk5OkN_41IBEtHQYW8,900
171
+ unstructured_ingest/embed/bedrock.py,sha256=5uq1S9-7uKaaHiniohm1HXNLhudIYN9TEcctUe2JIpM,2514
172
+ unstructured_ingest/embed/huggingface.py,sha256=fHgZ865I2Efs3QT43n57gmccF9sBzI6T4yhcu_r7zwM,2727
173
+ unstructured_ingest/embed/interfaces.py,sha256=O_USsEcVHRzE2dpHCJEJWKq04NLfdAnm55ZHHTQ3GO0,900
174
+ unstructured_ingest/embed/mixedbreadai.py,sha256=wMdY1a4PyynguIZQ4fPFImKGk9ryqHv0NRL3e3iSPEI,5491
175
+ unstructured_ingest/embed/octoai.py,sha256=ERJby6VdqcIO6NLTqLXVHmX7LNIM0Fsmhf3dn10Z4is,2347
176
+ unstructured_ingest/embed/openai.py,sha256=zOh3GHg1sPVUd3YzZLS5JIV21emYyrCHGYzqH5MIAiY,2250
177
+ unstructured_ingest/embed/vertexai.py,sha256=6IJlNFMW5GKb5r28aM97YUokHMXIlefjpplJxzgpOZc,3332
178
+ unstructured_ingest/embed/voyageai.py,sha256=PwyW-_dXZT1AGSkeF3c2heM-pdBxciUxdtP7PluCauY,2430
170
179
  unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
171
180
  unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
172
181
  unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
@@ -186,7 +195,7 @@ unstructured_ingest/pipeline/utils.py,sha256=RNx4bv2FhKOhaK_YTiRubta7n9wmJwqzznF
186
195
  unstructured_ingest/pipeline/write.py,sha256=xmDjmbieGRrcI342he7PkgxWaMoSJ5nWPmP5AM2xloU,669
187
196
  unstructured_ingest/pipeline/reformat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
197
  unstructured_ingest/pipeline/reformat/chunking.py,sha256=10LOAU6b2b-S6mzks8VpI1bP2pY1viNDM8dQSPJ8F_s,6035
189
- unstructured_ingest/pipeline/reformat/embedding.py,sha256=PLjVYHgEMnrHAnBW34uYPyhOzvVMhefIRpRTPd4Bnl8,2644
198
+ unstructured_ingest/pipeline/reformat/embedding.py,sha256=YVB8-GK-nAOZ0EaEbNWGrkSQIRyOHyEiQA3H4SEcqng,2522
190
199
  unstructured_ingest/runner/__init__.py,sha256=FO0X_jBIMilXdyjBajyFmzHoC3eVypNMGlhdOW4mcCM,2859
191
200
  unstructured_ingest/runner/airtable.py,sha256=1ndJ6PKT63E0gZN3KYFBj4Yo94zQYsIvSjC6ro2nIPE,1115
192
201
  unstructured_ingest/runner/astradb.py,sha256=FSBtQrsdC9E3eHUcAuQ0apcCnWolz-9tkvy-Uf7QeKg,1102
@@ -248,7 +257,7 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
248
257
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
258
  unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
250
259
  unstructured_ingest/utils/compression.py,sha256=mgndeNULGH7stoC51hhT4B9HwqMUSL0jhphcia6F_bw,4433
251
- unstructured_ingest/utils/data_prep.py,sha256=SqhRlkzqFH1Sl8YSxgOQvP1tyhcAFcTOepDTVEv11FY,4097
260
+ unstructured_ingest/utils/data_prep.py,sha256=9UKewDHB8-cMlQ8POvokhjVsy-ksiSqAAW2ibqPYAfk,4400
252
261
  unstructured_ingest/utils/dep_check.py,sha256=cVEqZtMwji8BIt7pjtUOMtEmN7KaNXRXwelEKFpOdW8,1914
253
262
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
254
263
  unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
@@ -265,7 +274,7 @@ unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X
265
274
  unstructured_ingest/v2/cli/base/cmd.py,sha256=JJ4ON8IrtfK1ub38er81EPOo3urZDdGL829k-JHcZ7A,11481
266
275
  unstructured_ingest/v2/cli/base/dest.py,sha256=_m5rUTHusHkXxzKUfcMtX9_xitbqyxajvIxuyev25vg,3197
267
276
  unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
268
- unstructured_ingest/v2/cli/base/src.py,sha256=kcBmARpZmH6HFL2GOCAnABkLaRwc85DLa4oiKwfQlpw,2832
277
+ unstructured_ingest/v2/cli/base/src.py,sha256=2oqOjsrXANHZ0PJYDc2NV9Dg15nUgWw_B0ouOJvme7I,2871
269
278
  unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
279
  unstructured_ingest/v2/cli/utils/click.py,sha256=SmUAiKiFXVCZ4_bhjrFKvYoLhcVEm5z7zJQw_M0Ad2w,6340
271
280
  unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=73DKHQQ6Tm0Lz5NCRduDlyfOhY2KH-MZN1n6jUgrsuU,7480
@@ -279,9 +288,9 @@ unstructured_ingest/v2/interfaces/processor.py,sha256=1taXZVAHKnWH420N1v-JNXfRGq
279
288
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
280
289
  unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84ixSSJ_NYRjflYsbY,1168
281
290
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
282
- unstructured_ingest/v2/pipeline/interfaces.py,sha256=V9zlEWI5OqTXnV4epfc_Y2TDZyPZCEB4qPam7NlUMEI,8354
291
+ unstructured_ingest/v2/pipeline/interfaces.py,sha256=xvs4AaT92UFdvrg6BNurIUsUOFcyPqvh80j3L8RcrzA,8397
283
292
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
284
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=D8AlVCflOjytyqhNwhpzyVJs-pHJ-FdPt9o1OJXAe-A,15010
293
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=eVxZWzMf9oLHVCSEyglJX2YK-xAs0jsV3tiHY8HVJLo,15074
285
294
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
295
  unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=I5JQn9rVJu9zEnsAZsJzDnxuASp2hdkF8ZRW4dOtgb0,3124
287
296
  unstructured_ingest/v2/pipeline/steps/download.py,sha256=uT2IoUEI8j5F0YUalYXEpjWXlpsI-TBOUfo-8JMGNLI,7649
@@ -295,27 +304,27 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=G9z8QQe9b_WokI5qyr4UOOqae
295
304
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
296
305
  unstructured_ingest/v2/processes/chunker.py,sha256=W2qPIddT-QEhHTKpA1krUhBrR0UFOq4nuko2eBjlG_I,6675
297
306
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
298
- unstructured_ingest/v2/processes/embedder.py,sha256=1AYF0o41tYtQv-ArGCc1PKGnlmNFDiFmhhpgEuG2d4I,5939
307
+ unstructured_ingest/v2/processes/embedder.py,sha256=ZBCIm0oHxWmtUEQYyAjXACqTYPt3LnvXLtoFhu6mu8A,6077
299
308
  unstructured_ingest/v2/processes/filter.py,sha256=eiAxdYiX8wd4vmD4J40x5t5wwJNmoGa5z33Z9Q-knK8,2145
300
309
  unstructured_ingest/v2/processes/partitioner.py,sha256=s7R7KVR-w7EtmqyieC-z-ZFv8H5bPn7IvXgqZddyoF8,10040
301
310
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
302
311
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=4zKMtzTqSzBKiHG92oE5jZUWw7Dc_RZ8c1VvwePrpjo,4801
303
312
  unstructured_ingest/v2/processes/connectors/astradb.py,sha256=bjlzJVNANnpTxRm8Ba8ZS0KetJ_yxmEyEoPJDwUkcOw,5774
304
313
  unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
305
- unstructured_ingest/v2/processes/connectors/chroma.py,sha256=_KaAtxN_8k9vNxAi0C9lSfeLZWcimtijy5Zb5yyyyAg,7954
314
+ unstructured_ingest/v2/processes/connectors/chroma.py,sha256=HRIHZSflSIRpVlLhXl_RLrmskESbAYait3TDBLS1fgU,8099
306
315
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
307
316
  unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=K0Sjt57vsVxL2eImqHzu7LnAONPUVTcDw2-hdLcWjV0,5984
308
317
  unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=0O7l5LTIKw5bEiA0Nnm1umRjMaUhjJr7XihYSzn1a9g,16750
309
- unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=z2LhIXSj2J7MtbYR7gDRX36_FSLcNxy_Z4CxD3xMAvg,13046
318
+ unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=6uoHmiR8Hn2GQ0YJVSDpBEHkkDjNvpZgFynlEeKkM88,13088
310
319
  unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=vF-Owg2ZDI4nC7sk-Ied-5o_qkfwJzDr3uztOeS8kC0,5653
311
320
  unstructured_ingest/v2/processes/connectors/local.py,sha256=sXO-t_HZbq3rE3RzPUiWPnlrCHDixcSxz6epg4XgyYo,6786
312
321
  unstructured_ingest/v2/processes/connectors/milvus.py,sha256=hNMtjdNF6Nv8E_0n5uDpki1kAFdtPplq_5N0W92rrVs,7761
313
322
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=q_GRuG2RQ5-8ajefifKuhFO52wCVhtU9j4ZIEf5hNas,4948
314
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=-B8ELr0rnspzrTy6HBvgbvuiF1eEKRQyCT1ocwmET5Q,9145
323
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=xUh-z0JltJ4iua2EUVUXXHuqjfQKlTmAkH0oam5gBhI,9207
315
324
  unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=z4UTenXy-iqy9Xlqlf1UTiGdOhIDPowiMg8juWnCh9M,6755
316
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=w7bY9s61c9_a-k3NjAmGjHXJQks-9KpRfpXKW9B6q9E,5744
325
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=Tz01LN_RssGx9k6aYcgA3vmp1OLYCIdJd8c1I8Bn7hQ,6840
317
326
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
318
- unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=_ZTjtFNcKcJ0z4cvEZml18TdOMm-Kbwlz8nxTTjp9nc,19500
327
+ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=JrnTwhp7vP0HHcZTXopPVA-GvE4pvfosw0S1VQzBz5Y,19501
319
328
  unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
320
329
  unstructured_ingest/v2/processes/connectors/sql.py,sha256=tDWL3YqL8MQuLsjW8A-KUkpSLh1iOn934OWfzPkqils,9298
321
330
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
@@ -329,9 +338,9 @@ unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=O1j0hIYWI4lPpTQ
329
338
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=cOMvMh0C9rtyEPJ0X59Fn-qb11LFUMRfeUgsi3QRWUk,6390
330
339
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=NkcU4U2DQWWuM8UHkez67C3SEOZpVyRtmtNS-z-F0Fw,6056
331
340
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
332
- unstructured_ingest-0.0.13.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
333
- unstructured_ingest-0.0.13.dist-info/METADATA,sha256=xIJhdAI3i4HtbkN5yFE5BwhBh4pYW6FWD9m_iPjV1TM,28110
334
- unstructured_ingest-0.0.13.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
335
- unstructured_ingest-0.0.13.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
336
- unstructured_ingest-0.0.13.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
337
- unstructured_ingest-0.0.13.dist-info/RECORD,,
341
+ unstructured_ingest-0.0.14.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
342
+ unstructured_ingest-0.0.14.dist-info/METADATA,sha256=Tz-j7IRCn1plW1J7ysFY3rRV2ckgj5H3gdkpWu4LfRI,28289
343
+ unstructured_ingest-0.0.14.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
344
+ unstructured_ingest-0.0.14.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
345
+ unstructured_ingest-0.0.14.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
346
+ unstructured_ingest-0.0.14.dist-info/RECORD,,