unstructured-ingest 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (39) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/v2/interfaces/processor.py +6 -1
  3. unstructured_ingest/v2/interfaces/uploader.py +9 -4
  4. unstructured_ingest/v2/otel.py +111 -0
  5. unstructured_ingest/v2/pipeline/interfaces.py +61 -28
  6. unstructured_ingest/v2/pipeline/otel.py +32 -0
  7. unstructured_ingest/v2/pipeline/pipeline.py +11 -7
  8. unstructured_ingest/v2/pipeline/steps/index.py +2 -0
  9. unstructured_ingest/v2/pipeline/steps/upload.py +7 -19
  10. unstructured_ingest/v2/processes/chunker.py +3 -1
  11. unstructured_ingest/v2/processes/connectors/astradb.py +3 -8
  12. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +4 -9
  13. unstructured_ingest/v2/processes/connectors/chroma.py +3 -8
  14. unstructured_ingest/v2/processes/connectors/couchbase.py +5 -9
  15. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +9 -10
  16. unstructured_ingest/v2/processes/connectors/elasticsearch.py +4 -7
  17. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +3 -3
  18. unstructured_ingest/v2/processes/connectors/fsspec/box.py +3 -3
  19. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +3 -3
  20. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +4 -6
  21. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +3 -3
  22. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +2 -3
  23. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +3 -3
  24. unstructured_ingest/v2/processes/connectors/kdbai.py +7 -8
  25. unstructured_ingest/v2/processes/connectors/local.py +15 -22
  26. unstructured_ingest/v2/processes/connectors/milvus.py +2 -14
  27. unstructured_ingest/v2/processes/connectors/mongodb.py +3 -8
  28. unstructured_ingest/v2/processes/connectors/pinecone.py +6 -24
  29. unstructured_ingest/v2/processes/connectors/singlestore.py +6 -6
  30. unstructured_ingest/v2/processes/connectors/sql.py +5 -7
  31. unstructured_ingest/v2/processes/connectors/weaviate.py +4 -11
  32. unstructured_ingest/v2/processes/partitioner.py +13 -3
  33. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/METADATA +275 -211
  34. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/RECORD +38 -37
  35. unstructured_ingest/v2/example.py +0 -37
  36. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/LICENSE.md +0 -0
  37. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/WHEEL +0 -0
  38. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/entry_points.txt +0 -0
  39. {unstructured_ingest-0.0.5.dist-info → unstructured_ingest-0.0.7.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=fyerMGKtoCxDg7Tqkq8qutVLtnLzvBaskhJnyThlAJk,42
2
+ unstructured_ingest/__version__.py,sha256=9_yNWUxyxaaKHXO3q2QY_s0mmWikO7bg69Xm4jGu4hQ,41
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
5
  unstructured_ingest/interfaces.py,sha256=AeEywcSKCMA5AiEdENLpu_yPcXp_c6wpvESePfC00yo,31214
@@ -254,9 +254,9 @@ unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmiz
254
254
  unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
255
255
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
256
256
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
257
- unstructured_ingest/v2/example.py,sha256=qkwmpMxUlaJXdDNKQ4LlUt3XGxgTUU3CXGGO57eW5Gs,1644
258
257
  unstructured_ingest/v2/logger.py,sha256=akcghdHwpKM3CfoeFzir0zmc7R9Hk7zjquU-X-gwUIw,4324
259
258
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
259
+ unstructured_ingest/v2/otel.py,sha256=jD-zuezaU5BHQEZfPSEusXNmesEvtrcfNjVPlQp-cmE,4130
260
260
  unstructured_ingest/v2/utils.py,sha256=ykmyvmRMHGahkpKbkFbJfEHwNjZccKqbYsixUtUtrFw,1478
261
261
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
262
262
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
@@ -275,62 +275,63 @@ unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK
275
275
  unstructured_ingest/v2/interfaces/file_data.py,sha256=w6sBMCDH1va6XbvVaZyb7EJendXRCa_mo1Qv8uTWzCU,1898
276
276
  unstructured_ingest/v2/interfaces/indexer.py,sha256=Bd1S-gTLsxhJBLEh1lYm_gXqwQLaEZMoqPq9yGxtN_E,713
277
277
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
278
- unstructured_ingest/v2/interfaces/processor.py,sha256=t1LIrkubfbqt7RMZ9bABrxd0Z9TJxG6zqozBC5Pi4Yc,1615
278
+ unstructured_ingest/v2/interfaces/processor.py,sha256=1taXZVAHKnWH420N1v-JNXfRGq5roTaYvxqcO1EzpnQ,1772
279
279
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
280
- unstructured_ingest/v2/interfaces/uploader.py,sha256=ymEC-0JFTvjuAFsz9QLRF_6rFNFlDURkAgZ1tBUb2ec,1009
280
+ unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84ixSSJ_NYRjflYsbY,1168
281
281
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
282
- unstructured_ingest/v2/pipeline/interfaces.py,sha256=Z50-6XFZNajfmJbLKunLxw3RuYMzCYiUp6F0jhQwERE,6441
283
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=rOiTGLW6e1kifSA_0bCzFktDVKN7eXG_BzEKtLBTjCA,14736
282
+ unstructured_ingest/v2/pipeline/interfaces.py,sha256=V9zlEWI5OqTXnV4epfc_Y2TDZyPZCEB4qPam7NlUMEI,8354
283
+ unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
284
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=D8AlVCflOjytyqhNwhpzyVJs-pHJ-FdPt9o1OJXAe-A,15010
284
285
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
286
  unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=I5JQn9rVJu9zEnsAZsJzDnxuASp2hdkF8ZRW4dOtgb0,3124
286
287
  unstructured_ingest/v2/pipeline/steps/download.py,sha256=IwysS1_YZRuraIifBW94LWhPNDYU-oaeFkEbgPNpBag,7690
287
288
  unstructured_ingest/v2/pipeline/steps/embed.py,sha256=5wONbMvT_hZRZtHPgquok1ryC66dajCU5iifVfIaP9Y,3102
288
289
  unstructured_ingest/v2/pipeline/steps/filter.py,sha256=1HM6aBZ5YI0wHQjMXx4KISsiueRlLXVn0mYyiXLMgy4,1188
289
- unstructured_ingest/v2/pipeline/steps/index.py,sha256=oyYFtDgWg-CJoBM75YTln1t6DjGoLooz2stEeiaB08Q,2537
290
+ unstructured_ingest/v2/pipeline/steps/index.py,sha256=0h5sc5mlnMuyxPKmbm4sY6LytqZiAWcP_FJvsYQF4WA,2632
290
291
  unstructured_ingest/v2/pipeline/steps/partition.py,sha256=2NuXpDQ9brf7D4vPhbalCGpjw80XRGYZAAO-Ist1yKs,3182
291
292
  unstructured_ingest/v2/pipeline/steps/stage.py,sha256=6gAPzp46DrsOtL914hqgATRDCMvBRI7VtvlsFuMWc4I,2211
292
293
  unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=fEjHPdXnMKpmN5vhqbrNOrVsQGKcoKxRSKE5PrViE9I,2389
293
- unstructured_ingest/v2/pipeline/steps/upload.py,sha256=5Y6oZV5IyfOvXr8RPYjEfy8_yp4XGatoY0tsznlH_wA,2278
294
+ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=G9z8QQe9b_WokI5qyr4UOOqaepEVgwFqMn9pWcta9gI,1917
294
295
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
295
- unstructured_ingest/v2/processes/chunker.py,sha256=4fAMoFEC4kFkdLsJrpkdtQpexX3WlC1BDR7PQ9eICo0,6524
296
+ unstructured_ingest/v2/processes/chunker.py,sha256=W2qPIddT-QEhHTKpA1krUhBrR0UFOq4nuko2eBjlG_I,6675
296
297
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
297
298
  unstructured_ingest/v2/processes/embedder.py,sha256=1AYF0o41tYtQv-ArGCc1PKGnlmNFDiFmhhpgEuG2d4I,5939
298
299
  unstructured_ingest/v2/processes/filter.py,sha256=eiAxdYiX8wd4vmD4J40x5t5wwJNmoGa5z33Z9Q-knK8,2145
299
- unstructured_ingest/v2/processes/partitioner.py,sha256=4lOwHiuCs2ZdQ0a3Eu7ila-UeDYmaQooryM3CBCOnJg,9575
300
+ unstructured_ingest/v2/processes/partitioner.py,sha256=s7R7KVR-w7EtmqyieC-z-ZFv8H5bPn7IvXgqZddyoF8,10040
300
301
  unstructured_ingest/v2/processes/uncompress.py,sha256=LL68WLq4EfZGORvv5vaQx03EXfiA7k62sbzElPEN8AM,1557
301
302
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=4zKMtzTqSzBKiHG92oE5jZUWw7Dc_RZ8c1VvwePrpjo,4801
302
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=dexaljYJ08V4URm971hhmZE9hKRCFHO-JNstXNeh4GI,5924
303
- unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=Tx2ux-w7CgEZ4bn0r0dtAUzAy4HB9PvpQRoh52oU0ac,8603
304
- unstructured_ingest/v2/processes/connectors/chroma.py,sha256=a5Ebd5Rj7OF0bTpj88aIRh6HsKJJYFDhVe-7RQEuzvs,7916
305
- unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=5-yKJDnT_iKTDsmMC-HZvUXiSpuNxmvhWQ-MiaZlLY8,12240
306
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=kn5xBdcB2UfnuYxNj0URvBD4WUNIUn0Roi_9mpYOMII,6120
307
- unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=9ZNNiX69-5beVIQrrYB5uU8x9F4KAogZsPiRshhEuso,16898
303
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=bjlzJVNANnpTxRm8Ba8ZS0KetJ_yxmEyEoPJDwUkcOw,5774
304
+ unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
305
+ unstructured_ingest/v2/processes/connectors/chroma.py,sha256=-ZZLwNS10hyW1-tb_Wl0YBswMc-_xPAp0nRCcmDqLas,7766
306
+ unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
307
+ unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=K0Sjt57vsVxL2eImqHzu7LnAONPUVTcDw2-hdLcWjV0,5984
308
+ unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=0O7l5LTIKw5bEiA0Nnm1umRjMaUhjJr7XihYSzn1a9g,16750
308
309
  unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=zRhhBCDFF4QzBpT2Ij1uXd5jdKTc_JR9WwfSLV9ynQc,12890
309
- unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=oTLbRsapX9nDvUM-rjSGW0lrCuGyJERi13ycPS00lgU,5698
310
- unstructured_ingest/v2/processes/connectors/local.py,sha256=5sY8su9nI3PoV-Y7o0jaM2lNtTL1CYnwaeBI1puddBI,7074
311
- unstructured_ingest/v2/processes/connectors/milvus.py,sha256=J3Zjg7dqhqQt35AYWiUt812bzfwHa9hVapWiWxswhHQ,7265
312
- unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=3FavmgKjtQADSyuH3EMIkfUgmRjIQfc0wVDlvLpd7Hs,5098
310
+ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=vF-Owg2ZDI4nC7sk-Ied-5o_qkfwJzDr3uztOeS8kC0,5653
311
+ unstructured_ingest/v2/processes/connectors/local.py,sha256=sXO-t_HZbq3rE3RzPUiWPnlrCHDixcSxz6epg4XgyYo,6786
312
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=9vCe-U_-wvmUohS56nLyeyHop7b4gVGuAakQtGTmdQk,6871
313
+ unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=q_GRuG2RQ5-8ajefifKuhFO52wCVhtU9j4ZIEf5hNas,4948
313
314
  unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=-B8ELr0rnspzrTy6HBvgbvuiF1eEKRQyCT1ocwmET5Q,9145
314
315
  unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=dEj4yYCgdhCD6376kuhKH7NVO5v-vsrN8dbULjEXfss,6811
315
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=cfegFH1t7cSjjfebPnbnaOXgOMFlIET8I73YGfR4uco,6115
316
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=_RwrV7qVbMOsvD8LuGYKQO3UrSFJvgjTGzx304Jj4l0,5459
316
317
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
317
318
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=_ZTjtFNcKcJ0z4cvEZml18TdOMm-Kbwlz8nxTTjp9nc,19500
318
- unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=Pkax_54XGhbp-BeC_jsLxRHEJlJeYMYsH5r0H1tVi8w,5526
319
- unstructured_ingest/v2/processes/connectors/sql.py,sha256=UU4EoDMy--QyHnY-Bw96QXcGFmnQAyHO_QhkdjYZi30,9386
319
+ unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
320
+ unstructured_ingest/v2/processes/connectors/sql.py,sha256=tDWL3YqL8MQuLsjW8A-KUkpSLh1iOn934OWfzPkqils,9298
320
321
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=nmpZZCeX0O7rGrwHSWM_heBgpZK9tKT6EV1Moer-z40,576
321
- unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=5tQbo0jfqhl61Uc6DWgLo4sb5tFL_4BO00H0tq0WoqU,9207
322
+ unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=4hBQ9GWbBv6ti9futVJCShNugDC6Vh7Hy9ZhEC4XDpM,8958
322
323
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
323
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=ovlU5s_s5vfjRqECXOlOTNONlaZ-hWqcjWJ2eZv8iIY,6214
324
- unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=MerISu_HxJ3n4-4VVnz9tBAbkCbhQmt0k_iHSSZvZYI,4435
325
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=_I_GXnjrzrfJO0zoXgaLQX3foIvXYl_-Cm3yfHOCkUs,4775
326
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=Nn9rOkQKH9aq575R-h-l9xZCFsLfrYiry8L5xbHP_DQ,12556
327
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=HoExrmUHigyWRBgEMneKigjcROc-GH_Jp2aUyWkHBH0,6006
328
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=tKxH6h80i7BDzEoMsWZZBLwJ1wpJ-WPxUwT2ezWRVLI,6394
329
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dpLFv5vGtw8aiLDgrf6E7elg8Pm2dKF0MQnnl84rIqM,6056
324
+ unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=GrC44PnA8PLHUJQ4aH3gETxL8v8UvknbKptxiXweqdc,6214
325
+ unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=dVKVCdbKdNSkkZBYJL14-u7aXOr50mzKCmhCVAneuqI,4435
326
+ unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=ZnoSGAZ4wtOhyg8G3PLYFMpbMVsBffvW-qp5jWwEDuA,4775
327
+ unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=AAP7NNxO5smWYwzsAVbfs91FzpAteZeFI8vZj34ytgg,12441
328
+ unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=O1j0hIYWI4lPpTQ5hsEKV8usDCrUm-t1qVcSNKsJQd0,6006
329
+ unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=cOMvMh0C9rtyEPJ0X59Fn-qb11LFUMRfeUgsi3QRWUk,6390
330
+ unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=NkcU4U2DQWWuM8UHkez67C3SEOZpVyRtmtNS-z-F0Fw,6056
330
331
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
331
- unstructured_ingest-0.0.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
332
- unstructured_ingest-0.0.5.dist-info/METADATA,sha256=wNXOD7EnOVJhbAlRhocsIBJ9fwXPxMxKCJVleIqnjmk,24725
333
- unstructured_ingest-0.0.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
334
- unstructured_ingest-0.0.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
335
- unstructured_ingest-0.0.5.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
336
- unstructured_ingest-0.0.5.dist-info/RECORD,,
332
+ unstructured_ingest-0.0.7.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
333
+ unstructured_ingest-0.0.7.dist-info/METADATA,sha256=zcBWenMe64xmdQNwKMvyoGuiVpjg1Bwrz7v0RoGME5c,28109
334
+ unstructured_ingest-0.0.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
335
+ unstructured_ingest-0.0.7.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
336
+ unstructured_ingest-0.0.7.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
337
+ unstructured_ingest-0.0.7.dist-info/RECORD,,
@@ -1,37 +0,0 @@
1
- from pathlib import Path
2
-
3
- from unstructured_ingest.v2.interfaces import ProcessorConfig
4
- from unstructured_ingest.v2.logger import logger
5
- from unstructured_ingest.v2.pipeline.pipeline import Pipeline
6
- from unstructured_ingest.v2.processes.chunker import ChunkerConfig
7
- from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (
8
- S3ConnectionConfig,
9
- S3DownloaderConfig,
10
- S3IndexerConfig,
11
- )
12
- from unstructured_ingest.v2.processes.connectors.local import (
13
- LocalUploaderConfig,
14
- )
15
- from unstructured_ingest.v2.processes.embedder import EmbedderConfig
16
- from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
17
-
18
- base_path = Path(__file__).parent.parent.parent.parent
19
- docs_path = base_path / "example-docs"
20
- work_dir = base_path / "tmp_ingest"
21
- output_path = work_dir / "output"
22
- download_path = work_dir / "download"
23
-
24
- if __name__ == "__main__":
25
- logger.info(f"Writing all content in: {work_dir.resolve()}")
26
- Pipeline.from_configs(
27
- context=ProcessorConfig(
28
- work_dir=str(work_dir.resolve()), tqdm=True, reprocess=True, verbose=True
29
- ),
30
- indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"),
31
- downloader_config=S3DownloaderConfig(download_dir=download_path),
32
- source_connection_config=S3ConnectionConfig(anonymous=True),
33
- partitioner_config=PartitionerConfig(strategy="fast"),
34
- chunker_config=ChunkerConfig(chunking_strategy="by_title"),
35
- embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
36
- uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
37
- ).run()