unstructured-ingest 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (82) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/interfaces.py +1 -1
  3. unstructured_ingest/cli/utils.py +1 -1
  4. unstructured_ingest/connector/astradb.py +1 -1
  5. unstructured_ingest/connector/biomed.py +4 -4
  6. unstructured_ingest/connector/chroma.py +1 -1
  7. unstructured_ingest/connector/databricks_volumes.py +2 -2
  8. unstructured_ingest/connector/fsspec/box.py +1 -1
  9. unstructured_ingest/connector/fsspec/fsspec.py +5 -5
  10. unstructured_ingest/connector/git.py +1 -1
  11. unstructured_ingest/connector/google_drive.py +4 -4
  12. unstructured_ingest/connector/hubspot.py +1 -1
  13. unstructured_ingest/connector/kafka.py +8 -8
  14. unstructured_ingest/connector/local.py +1 -1
  15. unstructured_ingest/connector/notion/helpers.py +4 -4
  16. unstructured_ingest/connector/onedrive.py +3 -3
  17. unstructured_ingest/connector/outlook.py +2 -2
  18. unstructured_ingest/connector/pinecone.py +1 -1
  19. unstructured_ingest/connector/sharepoint.py +8 -8
  20. unstructured_ingest/connector/vectara.py +6 -6
  21. unstructured_ingest/embed/__init__.py +17 -0
  22. unstructured_ingest/embed/bedrock.py +70 -0
  23. unstructured_ingest/embed/huggingface.py +73 -0
  24. unstructured_ingest/embed/interfaces.py +36 -0
  25. unstructured_ingest/embed/mixedbreadai.py +177 -0
  26. unstructured_ingest/embed/octoai.py +63 -0
  27. unstructured_ingest/embed/openai.py +61 -0
  28. unstructured_ingest/embed/vertexai.py +88 -0
  29. unstructured_ingest/embed/voyageai.py +69 -0
  30. unstructured_ingest/interfaces.py +21 -11
  31. unstructured_ingest/logger.py +1 -1
  32. unstructured_ingest/pipeline/copy.py +1 -1
  33. unstructured_ingest/pipeline/interfaces.py +2 -2
  34. unstructured_ingest/pipeline/partition.py +1 -1
  35. unstructured_ingest/pipeline/pipeline.py +1 -1
  36. unstructured_ingest/pipeline/reformat/chunking.py +2 -2
  37. unstructured_ingest/pipeline/reformat/embedding.py +4 -6
  38. unstructured_ingest/pipeline/source.py +2 -2
  39. unstructured_ingest/utils/compression.py +3 -3
  40. unstructured_ingest/utils/data_prep.py +20 -12
  41. unstructured_ingest/utils/string_and_date_utils.py +2 -2
  42. unstructured_ingest/v2/cli/base/cmd.py +3 -3
  43. unstructured_ingest/v2/cli/base/dest.py +1 -1
  44. unstructured_ingest/v2/cli/base/src.py +3 -2
  45. unstructured_ingest/v2/cli/utils/click.py +1 -1
  46. unstructured_ingest/v2/interfaces/processor.py +48 -13
  47. unstructured_ingest/v2/logger.py +1 -1
  48. unstructured_ingest/v2/otel.py +1 -1
  49. unstructured_ingest/v2/pipeline/interfaces.py +12 -3
  50. unstructured_ingest/v2/pipeline/pipeline.py +42 -29
  51. unstructured_ingest/v2/pipeline/steps/chunk.py +3 -3
  52. unstructured_ingest/v2/pipeline/steps/download.py +17 -2
  53. unstructured_ingest/v2/pipeline/steps/embed.py +3 -3
  54. unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  55. unstructured_ingest/v2/pipeline/steps/index.py +2 -2
  56. unstructured_ingest/v2/pipeline/steps/partition.py +3 -3
  57. unstructured_ingest/v2/pipeline/steps/stage.py +1 -1
  58. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  59. unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
  60. unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
  61. unstructured_ingest/v2/processes/connectors/chroma.py +6 -1
  62. unstructured_ingest/v2/processes/connectors/elasticsearch.py +1 -1
  63. unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
  64. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +4 -4
  65. unstructured_ingest/v2/processes/connectors/google_drive.py +2 -3
  66. unstructured_ingest/v2/processes/connectors/local.py +6 -5
  67. unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
  68. unstructured_ingest/v2/processes/connectors/onedrive.py +8 -6
  69. unstructured_ingest/v2/processes/connectors/opensearch.py +1 -1
  70. unstructured_ingest/v2/processes/connectors/pinecone.py +38 -16
  71. unstructured_ingest/v2/processes/connectors/sharepoint.py +10 -6
  72. unstructured_ingest/v2/processes/embedder.py +41 -24
  73. unstructured_ingest/v2/processes/filter.py +1 -1
  74. unstructured_ingest/v2/processes/partitioner.py +3 -3
  75. unstructured_ingest/v2/utils.py +7 -0
  76. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/METADATA +212 -211
  77. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/RECORD +81 -72
  78. unstructured_ingest/evaluate.py +0 -338
  79. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/LICENSE.md +0 -0
  80. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/WHEEL +0 -0
  81. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/entry_points.txt +0 -0
  82. {unstructured_ingest-0.0.13.dist-info → unstructured_ingest-0.0.15.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,16 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=hVnhWghkqnr7x2fn9tKrlVtVMYcocdwt-lq1-dsXu4A,43
2
+ unstructured_ingest/__version__.py,sha256=ue4xcU8omVbXrnrr4LFRKTwKGHRR5ke-SqtF142v0Xs,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
- unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
- unstructured_ingest/interfaces.py,sha256=AeEywcSKCMA5AiEdENLpu_yPcXp_c6wpvESePfC00yo,31214
6
- unstructured_ingest/logger.py,sha256=TrhyH7VbCWO5VVuhvL0yUyXxuem3b4pzbqj2uQHUwZk,4480
4
+ unstructured_ingest/interfaces.py,sha256=LuY-85clq5iyOtDd9vDABjK2MZCm6wRWK53cdb4SROY,31411
5
+ unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
7
6
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
8
7
  unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
9
8
  unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29-kqEAA,302
10
9
  unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
11
10
  unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
12
11
  unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
13
- unstructured_ingest/cli/interfaces.py,sha256=1Nw9siznXr3vtr5nh71amjRz1itLYroqqnBQ4-lAG5Q,24130
14
- unstructured_ingest/cli/utils.py,sha256=l7dmDf_KUO3SP4dcVDHjxYAU2b28yR-n-a8xoYVPmw4,7981
12
+ unstructured_ingest/cli/interfaces.py,sha256=6kMmTVm5ia6wUIdOMRu_uErkcIeWndr_6fhPc1AnoYM,24134
13
+ unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
15
14
  unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
15
  unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
17
16
  unstructured_ingest/cli/base/dest.py,sha256=uN44l7kPErm_BQqKFUgaiz_Xu6UKk-mnB1B8c0cb4lQ,3416
@@ -60,51 +59,51 @@ unstructured_ingest/cli/cmds/fsspec/s3.py,sha256=v-24oFxhabdShryK2dhP4cDBvVyoQ-8
60
59
  unstructured_ingest/cli/cmds/fsspec/sftp.py,sha256=TCB7sf_GYoifryQbbttknYSt9Q1kRCPtu8B8QgXl3lw,1537
61
60
  unstructured_ingest/connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
61
  unstructured_ingest/connector/airtable.py,sha256=KcLt-FEabO9D5ev5E4xUf06VYHpYpypP-adTVyhGcb8,10585
63
- unstructured_ingest/connector/astradb.py,sha256=T4jo80foCRQ_QwTClf9abFWWQHCUTlmSOdYK9X8r7aQ,8635
62
+ unstructured_ingest/connector/astradb.py,sha256=kDDcITwmUqgZ2y0cYMxMIsEr3c7s8SXrRI1LplTRlos,8635
64
63
  unstructured_ingest/connector/azure_cognitive_search.py,sha256=cqQdAaEzt4coU7sxnl4GY8Em4a6azFLyAKM6enkmjBA,5850
65
- unstructured_ingest/connector/biomed.py,sha256=dVVyPhlYkJUzbQNg6qwgfqlmCScA4Ttac9ErS9yX4MM,10847
66
- unstructured_ingest/connector/chroma.py,sha256=DgAe5xxDiV1BzAbKusL7JmkCGyEJ9lWTdoyy0txXoTw,5713
64
+ unstructured_ingest/connector/biomed.py,sha256=uwtBuKzpHfxbJckHAHcsnKo4dTCdag66tCDtCqKNSZM,10847
65
+ unstructured_ingest/connector/chroma.py,sha256=Nma6HebQxNY7CCWwWArkX3kMXf2xVv6L-jrfRjMi9LE,5713
67
66
  unstructured_ingest/connector/clarifai.py,sha256=kAtPGrjOps_aYdlhHkTtQc46Rfc0woNor6VY1UGEKZI,4211
68
67
  unstructured_ingest/connector/confluence.py,sha256=VbeGGcoMJQ3WIriHSGz1o5L1u24EeVrYQRjR6t1fcr0,10151
69
- unstructured_ingest/connector/databricks_volumes.py,sha256=zEJZFO2Uq-eQ5k62_SmNsKC4NZ-ykmtQY5XacLWrr0I,4948
68
+ unstructured_ingest/connector/databricks_volumes.py,sha256=s4Go3M-UHPYGUzEEEEhNGBlb9DmZMamjeg3nFRXVgV4,4948
70
69
  unstructured_ingest/connector/delta_table.py,sha256=fHUkZ8v3a20k_r64j-i6ulm4_Zi6eYGdiGKWj4q3BAs,7191
71
70
  unstructured_ingest/connector/discord.py,sha256=SelvVGEF2SThdf8FSSVXGrIBgQoOcNgdKFUfEvpIcg0,6153
72
71
  unstructured_ingest/connector/elasticsearch.py,sha256=UIqTQbXVhHprApfBCXBVBBmPMnWccjoaFgV6shrKG-U,14157
73
- unstructured_ingest/connector/git.py,sha256=Hjf22SrJ_oFn4llxTa_54zW3jnZ6JVYB9tYWhCsrr1o,3817
72
+ unstructured_ingest/connector/git.py,sha256=_kIHVXZsLzK8WAJ0s3El1eGX1S2UJoGbFGqOVCIDf8g,3817
74
73
  unstructured_ingest/connector/github.py,sha256=STgcJMcc4RSfOw-N-_Cb97LkHmk1nSI-ivdco7p-7y4,6578
75
74
  unstructured_ingest/connector/gitlab.py,sha256=OEilnSFabWT3XY0riNxVTXc9tS3f1lMyHI6oZzb3Cw0,4926
76
- unstructured_ingest/connector/google_drive.py,sha256=BgwYmQAL8C91HqxD0pOM2YX3iT406pHOoK8RxzqGRsk,13054
77
- unstructured_ingest/connector/hubspot.py,sha256=teIvVsX1sSf2vILX9sri8ohpC0SA3yHA5sc-moKqls0,9271
75
+ unstructured_ingest/connector/google_drive.py,sha256=Sl6r-IcbV_7s8LeMg2z8qiko2r5RAyRnDzBxMwvY6ng,13053
76
+ unstructured_ingest/connector/hubspot.py,sha256=jL-bqU4EJIqbG0YRk9IR3MKsHi_WHf86Fy6r1fVeCz4,9271
78
77
  unstructured_ingest/connector/jira.py,sha256=kxjGhbVSH8FJNPMGJbnpZEV5zZRfGFckVJFiOzExphQ,15690
79
- unstructured_ingest/connector/kafka.py,sha256=cVEX_yn_9Vdvz6lujf1GdThsJfxJnVsdjfTnqhx7p1A,10053
80
- unstructured_ingest/connector/local.py,sha256=OyGedubpn39bLs5Z4EeZqsb1Q-M-cJkWcrUV8eQ9yec,4479
78
+ unstructured_ingest/connector/kafka.py,sha256=D0XPf0IZ4XkLjnYy7I1FDc5XucNehJLlj-yCKjBMr0w,10053
79
+ unstructured_ingest/connector/local.py,sha256=ayEz7gtnr1ioiYWmJ5ElSlSC8_ZFk1rk-9sX2htRq6c,4479
81
80
  unstructured_ingest/connector/mongodb.py,sha256=UD8T1V435YvGY68dpL-fyFesD7bcLckptgXzzfgnILI,9771
82
- unstructured_ingest/connector/onedrive.py,sha256=hvVuC-Kup88ZMbJpXG8AxRiuQyscZw6nOVLOjlF_pK4,8911
81
+ unstructured_ingest/connector/onedrive.py,sha256=-yy3scFHVIUiPAAQdmJXel3_BMZnZc9qUI8HwecuoJ4,8911
83
82
  unstructured_ingest/connector/opensearch.py,sha256=kvzqEqanP6nGHjxCJ2e2CAz9iK8na3yYBX1l4ZuVq0A,7937
84
- unstructured_ingest/connector/outlook.py,sha256=Qbxrt_2ZSz329MxK5hb1_MYndPvPSXxCSfD0dMCy0Gs,10443
85
- unstructured_ingest/connector/pinecone.py,sha256=hh4hbW7P8ebXf9n4S7ilvcL3Qzt9XEeZwA6_BkPnFEY,4796
83
+ unstructured_ingest/connector/outlook.py,sha256=f7WXb1xhf4iA3B7HTOCz2KuqxrywuChoDsDSy-erwYY,10443
84
+ unstructured_ingest/connector/pinecone.py,sha256=koUO3EVXP_cglbs3XtXTgNQJVmUmYfDQpYi79jclP3k,4796
86
85
  unstructured_ingest/connector/qdrant.py,sha256=Y1PAW6ueAzkTxoeViZ7JjkErFJNJlSYvzaRU1c-hcJA,4964
87
86
  unstructured_ingest/connector/reddit.py,sha256=8pyVSXXKGS9vOlNBeXw1ev5oqu-uWka5hzgUI8CFRos,5457
88
87
  unstructured_ingest/connector/registry.py,sha256=SxXKzOGimHGYOPDSCsYm_xhbwNb-DIcv6XqxoPRIaIY,4846
89
88
  unstructured_ingest/connector/salesforce.py,sha256=FrzevH1xB9deXdgt1ph7xa8BRFI8qC2sxGR4KsUHWSY,10941
90
- unstructured_ingest/connector/sharepoint.py,sha256=4Ex4_rCOvA_7g2YmtsZd_mISjfCD_jRFtk_-JmC4lUc,22159
89
+ unstructured_ingest/connector/sharepoint.py,sha256=7-UKNTqyOD7N2kA4TpIW0z0T5tPzGeBvhZOBEiHrdeY,22159
91
90
  unstructured_ingest/connector/slack.py,sha256=1CJ19N2yWrAF1viUrqa4Yb-BUbCrUHmGMkUHhFEe6m4,7617
92
91
  unstructured_ingest/connector/sql.py,sha256=YWJIuNtXkhwW_h7nlxkmzZhzMcICkZc1ezZ1CTzcf54,7625
93
- unstructured_ingest/connector/vectara.py,sha256=r7nE2kC9-2mfpMSNmVH1F_OkLetSapJ2Xj4mFAJJJ88,9539
92
+ unstructured_ingest/connector/vectara.py,sha256=l_AuCYkY3nmojF1sg347sDpmx8oIoIZi0Z9iHxoH82E,9540
94
93
  unstructured_ingest/connector/weaviate.py,sha256=Pi0bqyTJhXk_1zdbmJCYvW1inHNTBa0i3cYKRRPcXO0,7291
95
94
  unstructured_ingest/connector/wikipedia.py,sha256=lGccBwl2JlFJNIWqKj3SmUyTrC4xpmeFliCfahFrXRs,5992
96
95
  unstructured_ingest/connector/fsspec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
96
  unstructured_ingest/connector/fsspec/azure.py,sha256=6rBbH3TtbMF4KxP5oNLADvu2HDLc8VgIaAJoWx8ukeA,2580
98
- unstructured_ingest/connector/fsspec/box.py,sha256=rFoyDy_xplMFrqpPpee_cSI0P0FgejGaLK_8BxhA6kY,3429
97
+ unstructured_ingest/connector/fsspec/box.py,sha256=6vC7y313Hu4zZtX3GZT9uTu6qyKZkhr3U6rRR2rFuR0,3428
99
98
  unstructured_ingest/connector/fsspec/dropbox.py,sha256=W46bpTDvw5AGqM9GMpzXpjqumJgt5SxVSaRW7jNBUa0,5911
100
- unstructured_ingest/connector/fsspec/fsspec.py,sha256=k9Olxy7PzFW6d2Kcujqf7IvqsdxzYmwaTkGIPoIKBFs,13084
99
+ unstructured_ingest/connector/fsspec/fsspec.py,sha256=XpbKKyn2f0yehLNN4FpFA_igLa4p69ni6rJr5hbby7Q,13085
101
100
  unstructured_ingest/connector/fsspec/gcs.py,sha256=2PH5jBn09d3ZoM2j0RR-HSOjM0n1HR4XIPSiTmwCT0s,2257
102
101
  unstructured_ingest/connector/fsspec/s3.py,sha256=iMsdTzy2KYqkxQJ57UyuZAahtvE21iMT5SsgD4DC7RU,1723
103
102
  unstructured_ingest/connector/fsspec/sftp.py,sha256=x2w8JGM81S_HXww7Aa-bTY1LjZSis56aOpCinga_bok,2653
104
103
  unstructured_ingest/connector/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
104
  unstructured_ingest/connector/notion/client.py,sha256=j5_CEFzI3Bx7oxTGOPoT5sazrzIPTX2467dmCLyziNg,9169
106
105
  unstructured_ingest/connector/notion/connector.py,sha256=8A9d-Pej-uXzjEy85zUloxIPK-EbpI5heVR6AVqOvU8,17538
107
- unstructured_ingest/connector/notion/helpers.py,sha256=jqg5-cPcrjm5G7dBF4jOjHxdJN1CI7yBeTefiX4hBoM,20702
106
+ unstructured_ingest/connector/notion/helpers.py,sha256=-eEB8eSqdD5bWX_QEA2hZz1siucC0FNEUEqCEJptiVk,20702
108
107
  unstructured_ingest/connector/notion/interfaces.py,sha256=SrTT-9c0nvk0fMqVgudYF647r04AdMKi6wkIkMy7Szw,563
109
108
  unstructured_ingest/connector/notion/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
109
  unstructured_ingest/connector/notion/types/block.py,sha256=AKOY-o6CTFC-caWlkLfKskMuFemH4-Vdrhv7HnRkS8w,3009
@@ -167,6 +166,15 @@ unstructured_ingest/connector/notion/types/database_properties/title.py,sha256=O
167
166
  unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha256=H9lKi8rCDPtKmuu7j9CnJoTUr6YmzIF4oXbv_OxuN9k,1162
168
167
  unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
169
168
  unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
169
+ unstructured_ingest/embed/__init__.py,sha256=whnTiGsSbNqaLObr058CKG5iGxk5OkN_41IBEtHQYW8,900
170
+ unstructured_ingest/embed/bedrock.py,sha256=5uq1S9-7uKaaHiniohm1HXNLhudIYN9TEcctUe2JIpM,2514
171
+ unstructured_ingest/embed/huggingface.py,sha256=fHgZ865I2Efs3QT43n57gmccF9sBzI6T4yhcu_r7zwM,2727
172
+ unstructured_ingest/embed/interfaces.py,sha256=O_USsEcVHRzE2dpHCJEJWKq04NLfdAnm55ZHHTQ3GO0,900
173
+ unstructured_ingest/embed/mixedbreadai.py,sha256=wMdY1a4PyynguIZQ4fPFImKGk9ryqHv0NRL3e3iSPEI,5491
174
+ unstructured_ingest/embed/octoai.py,sha256=ERJby6VdqcIO6NLTqLXVHmX7LNIM0Fsmhf3dn10Z4is,2347
175
+ unstructured_ingest/embed/openai.py,sha256=zOh3GHg1sPVUd3YzZLS5JIV21emYyrCHGYzqH5MIAiY,2250
176
+ unstructured_ingest/embed/vertexai.py,sha256=6IJlNFMW5GKb5r28aM97YUokHMXIlefjpplJxzgpOZc,3332
177
+ unstructured_ingest/embed/voyageai.py,sha256=PwyW-_dXZT1AGSkeF3c2heM-pdBxciUxdtP7PluCauY,2430
170
178
  unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
171
179
  unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
172
180
  unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
@@ -175,18 +183,18 @@ unstructured_ingest/ingest_backoff/__init__.py,sha256=cfdIJuZDFcF3w84sTyYqZ8vXnS
175
183
  unstructured_ingest/ingest_backoff/_common.py,sha256=ey0PN6Hf7aEpQQau710EHlEmQ3hq4YyYzgNLhPzzK58,3724
176
184
  unstructured_ingest/ingest_backoff/_wrapper.py,sha256=tukxuAYn-FbKTofluy9W16ah_6hrBbDAN4ufKEDzfdg,4136
177
185
  unstructured_ingest/pipeline/__init__.py,sha256=5kFH21WHi6i1JZri5miY5tB5c9R8sGMBeweYiWH2fqw,537
178
- unstructured_ingest/pipeline/copy.py,sha256=NwJGLrpP8r6WbWxp3epMYHbQycJUo81r6FjUOjrAlm0,768
186
+ unstructured_ingest/pipeline/copy.py,sha256=hKmugbjQ1PgSfyAyfSA3kH3aG9z4TiSjZBZgmXGdQvQ,768
179
187
  unstructured_ingest/pipeline/doc_factory.py,sha256=Y66k-CoIpwWAD3vWwBeHzI2YESlIsPUhL2OQ8i9RRWE,360
180
- unstructured_ingest/pipeline/interfaces.py,sha256=YTlJnMQTi4x3jyeU8o-zo4QbHW25f7mFO0jx0IgI84o,8136
181
- unstructured_ingest/pipeline/partition.py,sha256=xp1Oj_oHZjukGBWrgW-ElJlQMNWASqjqqNSfbi3tFQE,2779
188
+ unstructured_ingest/pipeline/interfaces.py,sha256=i-Nelobt7C3VN5vbq7a6K3qX9Sb1jUXcA0GmkuFcfBw,8136
189
+ unstructured_ingest/pipeline/partition.py,sha256=Pu-I7VRSh7B5qu_nLQ1uHkmoDuhq4YYSaaFWgy3IBGM,2779
182
190
  unstructured_ingest/pipeline/permissions.py,sha256=jTqiFYrOTPHEP79EmrgyzTi0SseqRCwYkcepH4HctLI,365
183
- unstructured_ingest/pipeline/pipeline.py,sha256=JHsXPGLY129woBcvXMV7wbcstHu_OLB5LR0jIxreNKg,4806
184
- unstructured_ingest/pipeline/source.py,sha256=YMRZkcdCwRWCiwhnDfTSYxdl9Vv5JH5ut3joijWjHOE,3096
191
+ unstructured_ingest/pipeline/pipeline.py,sha256=x-mYYXtosLHyHrNTuuJwkELCC8Gt3VjXjPMsp3kpLYA,4806
192
+ unstructured_ingest/pipeline/source.py,sha256=EMySsaS0V3DaiABMu6xnmc4JULQycy-YM_zTKH0_SJ0,3096
185
193
  unstructured_ingest/pipeline/utils.py,sha256=RNx4bv2FhKOhaK_YTiRubta7n9wmJwqzznFNlY25Dtw,168
186
194
  unstructured_ingest/pipeline/write.py,sha256=xmDjmbieGRrcI342he7PkgxWaMoSJ5nWPmP5AM2xloU,669
187
195
  unstructured_ingest/pipeline/reformat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
- unstructured_ingest/pipeline/reformat/chunking.py,sha256=10LOAU6b2b-S6mzks8VpI1bP2pY1viNDM8dQSPJ8F_s,6035
189
- unstructured_ingest/pipeline/reformat/embedding.py,sha256=PLjVYHgEMnrHAnBW34uYPyhOzvVMhefIRpRTPd4Bnl8,2644
196
+ unstructured_ingest/pipeline/reformat/chunking.py,sha256=vbJgi2Yl9Rd9yZxIf64Nxj6cjUJnJWRpDCagswQmrLw,6040
197
+ unstructured_ingest/pipeline/reformat/embedding.py,sha256=ohNvW9MhVGKVCx8ZlnLlLgkFQ_6UYLA7yUwT7Bzj9I8,2522
190
198
  unstructured_ingest/runner/__init__.py,sha256=FO0X_jBIMilXdyjBajyFmzHoC3eVypNMGlhdOW4mcCM,2859
191
199
  unstructured_ingest/runner/airtable.py,sha256=1ndJ6PKT63E0gZN3KYFBj4Yo94zQYsIvSjC6ro2nIPE,1115
192
200
  unstructured_ingest/runner/astradb.py,sha256=FSBtQrsdC9E3eHUcAuQ0apcCnWolz-9tkvy-Uf7QeKg,1102
@@ -247,27 +255,27 @@ unstructured_ingest/runner/writers/fsspec/gcs.py,sha256=ia-gconOz1kWI1jmYeB9NY6c
247
255
  unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdywczOCr2VI5e_bVms-Vw,622
248
256
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
257
  unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
250
- unstructured_ingest/utils/compression.py,sha256=mgndeNULGH7stoC51hhT4B9HwqMUSL0jhphcia6F_bw,4433
251
- unstructured_ingest/utils/data_prep.py,sha256=SqhRlkzqFH1Sl8YSxgOQvP1tyhcAFcTOepDTVEv11FY,4097
258
+ unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
259
+ unstructured_ingest/utils/data_prep.py,sha256=9UKewDHB8-cMlQ8POvokhjVsy-ksiSqAAW2ibqPYAfk,4400
252
260
  unstructured_ingest/utils/dep_check.py,sha256=cVEqZtMwji8BIt7pjtUOMtEmN7KaNXRXwelEKFpOdW8,1914
253
261
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
254
- unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
262
+ unstructured_ingest/utils/string_and_date_utils.py,sha256=LwcbLmWpwt1zEabLlyUd5kIf9oOWcZxsRzxDglLCMeU,1375
255
263
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
256
264
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
257
- unstructured_ingest/v2/logger.py,sha256=akcghdHwpKM3CfoeFzir0zmc7R9Hk7zjquU-X-gwUIw,4324
265
+ unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
258
266
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
259
- unstructured_ingest/v2/otel.py,sha256=jD-zuezaU5BHQEZfPSEusXNmesEvtrcfNjVPlQp-cmE,4130
260
- unstructured_ingest/v2/utils.py,sha256=ykmyvmRMHGahkpKbkFbJfEHwNjZccKqbYsixUtUtrFw,1478
267
+ unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
268
+ unstructured_ingest/v2/utils.py,sha256=9LnhpI8Otpq5HPcN2Dtkp7APD_euq1ylKhIsybuK99Y,1714
261
269
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
262
270
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
263
271
  unstructured_ingest/v2/cli/cmds.py,sha256=wWUTbvvxEqKAy6bNE6XhPnj0ELMeSbb9_r1NZl58xMM,489
264
272
  unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X85yF8lRxwX_OYQ,83
265
- unstructured_ingest/v2/cli/base/cmd.py,sha256=JJ4ON8IrtfK1ub38er81EPOo3urZDdGL829k-JHcZ7A,11481
266
- unstructured_ingest/v2/cli/base/dest.py,sha256=_m5rUTHusHkXxzKUfcMtX9_xitbqyxajvIxuyev25vg,3197
273
+ unstructured_ingest/v2/cli/base/cmd.py,sha256=a2NE9ZjUfuLIevz0aEs25UsLGCOroJwI-bPc6vBrMCw,11484
274
+ unstructured_ingest/v2/cli/base/dest.py,sha256=zDjqek7anr0JQ2ptEl8KIAsUXuCuHRnBQnJhoPj4NVM,3198
267
275
  unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
268
- unstructured_ingest/v2/cli/base/src.py,sha256=kcBmARpZmH6HFL2GOCAnABkLaRwc85DLa4oiKwfQlpw,2832
276
+ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdjtlPhqn5Mg,2872
269
277
  unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
- unstructured_ingest/v2/cli/utils/click.py,sha256=SmUAiKiFXVCZ4_bhjrFKvYoLhcVEm5z7zJQw_M0Ad2w,6340
278
+ unstructured_ingest/v2/cli/utils/click.py,sha256=Wn2s3PuvBCKB0lsK-W7X_Y0eYyWnS6Y9wWo1OhVBOzY,6344
271
279
  unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=73DKHQQ6Tm0Lz5NCRduDlyfOhY2KH-MZN1n6jUgrsuU,7480
272
280
  unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
273
281
  unstructured_ingest/v2/interfaces/connector.py,sha256=Lm7wJTlKUfVKJjk-BchS0RtZ9_Lo9tzZ62rPtF3HOGY,759
@@ -275,63 +283,64 @@ unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK
275
283
  unstructured_ingest/v2/interfaces/file_data.py,sha256=ieJK-hqHCEOmoYNGoFbCHziSaZyMtRS9VpSoYbwoKCE,1944
276
284
  unstructured_ingest/v2/interfaces/indexer.py,sha256=Bd1S-gTLsxhJBLEh1lYm_gXqwQLaEZMoqPq9yGxtN_E,713
277
285
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
278
- unstructured_ingest/v2/interfaces/processor.py,sha256=1taXZVAHKnWH420N1v-JNXfRGq5roTaYvxqcO1EzpnQ,1772
286
+ unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
279
287
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
280
288
  unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84ixSSJ_NYRjflYsbY,1168
281
289
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
282
- unstructured_ingest/v2/pipeline/interfaces.py,sha256=V9zlEWI5OqTXnV4epfc_Y2TDZyPZCEB4qPam7NlUMEI,8354
290
+ unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
283
291
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
284
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=D8AlVCflOjytyqhNwhpzyVJs-pHJ-FdPt9o1OJXAe-A,15010
292
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=GrcQNotpGl1EtKbpauNh2iHZVNm9vigjEOu7svlUOvM,15660
285
293
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
- unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=I5JQn9rVJu9zEnsAZsJzDnxuASp2hdkF8ZRW4dOtgb0,3124
287
- unstructured_ingest/v2/pipeline/steps/download.py,sha256=uT2IoUEI8j5F0YUalYXEpjWXlpsI-TBOUfo-8JMGNLI,7649
288
- unstructured_ingest/v2/pipeline/steps/embed.py,sha256=5wONbMvT_hZRZtHPgquok1ryC66dajCU5iifVfIaP9Y,3102
289
- unstructured_ingest/v2/pipeline/steps/filter.py,sha256=1HM6aBZ5YI0wHQjMXx4KISsiueRlLXVn0mYyiXLMgy4,1188
290
- unstructured_ingest/v2/pipeline/steps/index.py,sha256=0h5sc5mlnMuyxPKmbm4sY6LytqZiAWcP_FJvsYQF4WA,2632
291
- unstructured_ingest/v2/pipeline/steps/partition.py,sha256=2NuXpDQ9brf7D4vPhbalCGpjw80XRGYZAAO-Ist1yKs,3182
292
- unstructured_ingest/v2/pipeline/steps/stage.py,sha256=6gAPzp46DrsOtL914hqgATRDCMvBRI7VtvlsFuMWc4I,2211
293
- unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=6r-XPtj5X7RzS6gL9AvZGO8jeWReMEJqM7JAEcarReE,1745
294
+ unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=gTIxhkK_n6MC3JbLsqnDsrExmYSl1wSnA_CJsOnVZ3w,3124
295
+ unstructured_ingest/v2/pipeline/steps/download.py,sha256=Ld2dp1Znt-BqCcSOkBzBtZV-cGI8Kd-w0wN9Ez9h3Y8,8103
296
+ unstructured_ingest/v2/pipeline/steps/embed.py,sha256=zsOZ-FZzJWOqdKKRnvGJ6c8h18d3Wkscx8wEdkGxcmw,3102
297
+ unstructured_ingest/v2/pipeline/steps/filter.py,sha256=VAob-9p3bsacv_whb3Hb3rUqA6duL1d-zcUsJg7mxJ8,1188
298
+ unstructured_ingest/v2/pipeline/steps/index.py,sha256=0LrzRT-T1-dzZp_ot4ibajaavdhlXdsAwBQXvrEQS2I,2632
299
+ unstructured_ingest/v2/pipeline/steps/partition.py,sha256=-jhNtIZwru5gFs3-C_fXXtdRz8NE8MX8Y2ih0iKQKdk,3182
300
+ unstructured_ingest/v2/pipeline/steps/stage.py,sha256=ZvP-Rz_A5UkhZNoRUvVgciJbGXlP2WIbN5QIZ9wzP8I,2211
301
+ unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=pEu7VU27Fuu53eqBddAmXihV74A6JTwTfnjKeb__edY,1745
294
302
  unstructured_ingest/v2/pipeline/steps/upload.py,sha256=G9z8QQe9b_WokI5qyr4UOOqaepEVgwFqMn9pWcta9gI,1917
295
303
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
296
304
  unstructured_ingest/v2/processes/chunker.py,sha256=W2qPIddT-QEhHTKpA1krUhBrR0UFOq4nuko2eBjlG_I,6675
297
305
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
298
- unstructured_ingest/v2/processes/embedder.py,sha256=1AYF0o41tYtQv-ArGCc1PKGnlmNFDiFmhhpgEuG2d4I,5939
299
- unstructured_ingest/v2/processes/filter.py,sha256=eiAxdYiX8wd4vmD4J40x5t5wwJNmoGa5z33Z9Q-knK8,2145
300
- unstructured_ingest/v2/processes/partitioner.py,sha256=s7R7KVR-w7EtmqyieC-z-ZFv8H5bPn7IvXgqZddyoF8,10040
306
+ unstructured_ingest/v2/processes/embedder.py,sha256=ZBCIm0oHxWmtUEQYyAjXACqTYPt3LnvXLtoFhu6mu8A,6077
307
+ unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
308
+ unstructured_ingest/v2/processes/partitioner.py,sha256=KyWCo7qOQb1wyU8GJ2krejWSNE1vWRHvhEl1V_oDEU8,10040
301
309
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
302
- unstructured_ingest/v2/processes/connectors/__init__.py,sha256=4zKMtzTqSzBKiHG92oE5jZUWw7Dc_RZ8c1VvwePrpjo,4801
310
+ unstructured_ingest/v2/processes/connectors/__init__.py,sha256=6iBdoH6BW8oMK1ZvEi0IgEchuk0cNUPoNIaikpzeML8,4992
311
+ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=LLz3qVhbcZrHXeK1xu8lywj828wsDwYDfSsaNB2CwrA,8915
303
312
  unstructured_ingest/v2/processes/connectors/astradb.py,sha256=bjlzJVNANnpTxRm8Ba8ZS0KetJ_yxmEyEoPJDwUkcOw,5774
304
313
  unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
305
- unstructured_ingest/v2/processes/connectors/chroma.py,sha256=_KaAtxN_8k9vNxAi0C9lSfeLZWcimtijy5Zb5yyyyAg,7954
314
+ unstructured_ingest/v2/processes/connectors/chroma.py,sha256=HRIHZSflSIRpVlLhXl_RLrmskESbAYait3TDBLS1fgU,8099
306
315
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
307
316
  unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=K0Sjt57vsVxL2eImqHzu7LnAONPUVTcDw2-hdLcWjV0,5984
308
- unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=0O7l5LTIKw5bEiA0Nnm1umRjMaUhjJr7XihYSzn1a9g,16750
309
- unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=z2LhIXSj2J7MtbYR7gDRX36_FSLcNxy_Z4CxD3xMAvg,13046
317
+ unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=H8svPr-Yk3UniHT1O9kUd2QhTfrJdzbqRNLFZIMiITQ,16750
318
+ unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=7xOQthcqBd9auJxB0nxZlhh1vdjXpMX_CtQZa6YfZz0,13088
310
319
  unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=vF-Owg2ZDI4nC7sk-Ied-5o_qkfwJzDr3uztOeS8kC0,5653
311
- unstructured_ingest/v2/processes/connectors/local.py,sha256=sXO-t_HZbq3rE3RzPUiWPnlrCHDixcSxz6epg4XgyYo,6786
312
- unstructured_ingest/v2/processes/connectors/milvus.py,sha256=hNMtjdNF6Nv8E_0n5uDpki1kAFdtPplq_5N0W92rrVs,7761
320
+ unstructured_ingest/v2/processes/connectors/local.py,sha256=PdXWzkiXHqCaQq7M06LmcCabg0mRPmaIOET7LA5BwLc,6806
321
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=uF93R9V_tkCQ9XRUEiwMTYdR3vgH9dH9SWJgkaY6Nbw,7761
313
322
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=q_GRuG2RQ5-8ajefifKuhFO52wCVhtU9j4ZIEf5hNas,4948
314
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=-B8ELr0rnspzrTy6HBvgbvuiF1eEKRQyCT1ocwmET5Q,9145
315
- unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=z4UTenXy-iqy9Xlqlf1UTiGdOhIDPowiMg8juWnCh9M,6755
316
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=w7bY9s61c9_a-k3NjAmGjHXJQks-9KpRfpXKW9B6q9E,5744
323
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=ZiUo-dFo1LMOvFwphSLRZiR1PcrN8GWLTHhsh4TU6n0,9207
324
+ unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=PNkJGLCCkwxLvxKX6vF_0jy-LC4wKu8PCXzmULEo9sk,6755
325
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=slIvM5i7_u-LDlNJF-i9oV_1EcKFKuoTaGDRyhZ4p20,6840
317
326
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
318
- unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=_ZTjtFNcKcJ0z4cvEZml18TdOMm-Kbwlz8nxTTjp9nc,19500
327
+ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
319
328
  unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
320
329
  unstructured_ingest/v2/processes/connectors/sql.py,sha256=tDWL3YqL8MQuLsjW8A-KUkpSLh1iOn934OWfzPkqils,9298
321
330
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
322
331
  unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=4hBQ9GWbBv6ti9futVJCShNugDC6Vh7Hy9ZhEC4XDpM,8958
323
332
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
324
333
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=GrC44PnA8PLHUJQ4aH3gETxL8v8UvknbKptxiXweqdc,6214
325
- unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=dVKVCdbKdNSkkZBYJL14-u7aXOr50mzKCmhCVAneuqI,4435
334
+ unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=JgSgF7f4UOx_JUV8zghgykyBA4rKvqkErRm6zYXO1XA,4434
326
335
  unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=ZnoSGAZ4wtOhyg8G3PLYFMpbMVsBffvW-qp5jWwEDuA,4775
327
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=AAP7NNxO5smWYwzsAVbfs91FzpAteZeFI8vZj34ytgg,12441
336
+ unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=HptjVCGfGLOFoVwx8eYK_34sLb41ebCPbmSb5rCF10o,12441
328
337
  unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=O1j0hIYWI4lPpTQ5hsEKV8usDCrUm-t1qVcSNKsJQd0,6006
329
338
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=cOMvMh0C9rtyEPJ0X59Fn-qb11LFUMRfeUgsi3QRWUk,6390
330
339
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=NkcU4U2DQWWuM8UHkez67C3SEOZpVyRtmtNS-z-F0Fw,6056
331
340
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
332
- unstructured_ingest-0.0.13.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
333
- unstructured_ingest-0.0.13.dist-info/METADATA,sha256=xIJhdAI3i4HtbkN5yFE5BwhBh4pYW6FWD9m_iPjV1TM,28110
334
- unstructured_ingest-0.0.13.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
335
- unstructured_ingest-0.0.13.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
336
- unstructured_ingest-0.0.13.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
337
- unstructured_ingest-0.0.13.dist-info/RECORD,,
341
+ unstructured_ingest-0.0.15.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
342
+ unstructured_ingest-0.0.15.dist-info/METADATA,sha256=EYEY-EFmzs3wKIBRVBdDWrGD38a6H6kRw4M0MYC8AbU,28182
343
+ unstructured_ingest-0.0.15.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
344
+ unstructured_ingest-0.0.15.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
345
+ unstructured_ingest-0.0.15.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
346
+ unstructured_ingest-0.0.15.dist-info/RECORD,,
@@ -1,338 +0,0 @@
1
- #! /usr/bin/env python3
2
-
3
- from typing import List, Optional, Tuple, Union
4
-
5
- import click
6
- from unstructured.metrics.evaluate import (
7
- ElementTypeMetricsCalculator,
8
- ObjectDetectionMetricsCalculator,
9
- TableStructureMetricsCalculator,
10
- TextExtractionMetricsCalculator,
11
- filter_metrics,
12
- get_mean_grouping,
13
- )
14
-
15
-
16
- @click.group()
17
- def main():
18
- pass
19
-
20
-
21
- @main.command()
22
- @click.option("--output_dir", type=str, help="Directory to structured output.")
23
- @click.option("--source_dir", type=str, help="Directory to source.")
24
- @click.option(
25
- "--output_list",
26
- type=str,
27
- multiple=True,
28
- help="Optional: list of selected structured output file names under the \
29
- directory to be evaluate. If none, all files under directory will be use.",
30
- )
31
- @click.option(
32
- "--source_list",
33
- type=str,
34
- multiple=True,
35
- help="Optional: list of selected source file names under the directory \
36
- to be evaluate. If none, all files under directory will be use.",
37
- )
38
- @click.option(
39
- "--export_dir",
40
- type=str,
41
- default="metrics",
42
- help="Directory to save the output evaluation metrics to. Default to \
43
- your/working/dir/metrics/",
44
- )
45
- @click.option("--group_by", type=str, help="Input field for aggregration, or leave blank if none.")
46
- @click.option(
47
- "--weights",
48
- type=(int, int, int),
49
- default=(2, 1, 1),
50
- show_default=True,
51
- help="A list of weights to the Levenshtein distance calculation. Takes input as --weights 2 2 2\
52
- See text_extraction.py/calculate_edit_distance for more details.",
53
- )
54
- @click.option(
55
- "--visualize",
56
- is_flag=True,
57
- show_default=True,
58
- default=False,
59
- help="Add the flag to show progress bar.",
60
- )
61
- @click.option(
62
- "--output_type",
63
- type=str,
64
- default="json",
65
- show_default=True,
66
- help="Takes in either `txt` or `json` as output_type.",
67
- )
68
- def measure_text_extraction_accuracy_command(
69
- output_dir: str,
70
- source_dir: str,
71
- export_dir: str,
72
- weights: Tuple[int, int, int],
73
- visualize: bool,
74
- output_type: str,
75
- output_list: Optional[List[str]] = None,
76
- source_list: Optional[List[str]] = None,
77
- group_by: Optional[str] = None,
78
- ):
79
- return (
80
- TextExtractionMetricsCalculator(
81
- documents_dir=output_dir,
82
- ground_truths_dir=source_dir,
83
- group_by=group_by,
84
- weights=weights,
85
- document_type=output_type,
86
- )
87
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
88
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
89
- )
90
-
91
-
92
- @main.command()
93
- @click.option("--output_dir", type=str, help="Directory to structured output.")
94
- @click.option("--source_dir", type=str, help="Directory to structured source.")
95
- @click.option(
96
- "--output_list",
97
- type=str,
98
- multiple=True,
99
- help="Optional: list of selected structured output file names under the \
100
- directory to be evaluate. If none, all files under directory will be used.",
101
- )
102
- @click.option(
103
- "--source_list",
104
- type=str,
105
- multiple=True,
106
- help="Optional: list of selected source file names under the directory \
107
- to be evaluate. If none, all files under directory will be used.",
108
- )
109
- @click.option(
110
- "--export_dir",
111
- type=str,
112
- default="metrics",
113
- help="Directory to save the output evaluation metrics to. Default to \
114
- your/working/dir/metrics/",
115
- )
116
- @click.option(
117
- "--visualize",
118
- is_flag=True,
119
- show_default=True,
120
- default=False,
121
- help="Add the flag to show progress bar.",
122
- )
123
- def measure_element_type_accuracy_command(
124
- output_dir: str,
125
- source_dir: str,
126
- export_dir: str,
127
- visualize: bool,
128
- output_list: Optional[List[str]] = None,
129
- source_list: Optional[List[str]] = None,
130
- ):
131
- return (
132
- ElementTypeMetricsCalculator(
133
- documents_dir=output_dir,
134
- ground_truths_dir=source_dir,
135
- )
136
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
137
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
138
- )
139
-
140
-
141
- @main.command()
142
- @click.option(
143
- "--group_by",
144
- type=str,
145
- required=True,
146
- help="The category to group by; valid values are 'doctype' and 'connector'.",
147
- )
148
- @click.option(
149
- "--data_input",
150
- type=str,
151
- required=True,
152
- help="A datafram or path to the CSV/TSV file containing the data",
153
- )
154
- @click.option(
155
- "--export_dir",
156
- type=str,
157
- default="metrics",
158
- help="Directory to save the output evaluation metrics to. Default to \
159
- your/working/dir/metrics/",
160
- )
161
- @click.option(
162
- "--eval_name",
163
- type=str,
164
- help="Evaluated metric. Expecting one of 'text_extraction' or 'element_type'",
165
- )
166
- @click.option(
167
- "--agg_name",
168
- type=str,
169
- help="String to use with export filename. Default is `cct` for `text_extraction` \
170
- and `element-type` for `element_type`",
171
- )
172
- @click.option(
173
- "--export_filename", type=str, help="Optional. Define your file name for the output here."
174
- )
175
- def get_mean_grouping_command(
176
- group_by: str,
177
- data_input: str,
178
- export_dir: str,
179
- eval_name: str,
180
- agg_name: Optional[str] = None,
181
- export_filename: Optional[str] = None,
182
- ):
183
- return get_mean_grouping(
184
- group_by=group_by,
185
- data_input=data_input,
186
- export_dir=export_dir,
187
- eval_name=eval_name,
188
- agg_name=agg_name,
189
- export_filename=export_filename,
190
- )
191
-
192
-
193
- @main.command()
194
- @click.option("--output_dir", type=str, help="Directory to structured output.")
195
- @click.option("--source_dir", type=str, help="Directory to structured source.")
196
- @click.option(
197
- "--output_list",
198
- type=str,
199
- multiple=True,
200
- help="Optional: list of selected structured output file names under the \
201
- directory to be evaluate. If none, all files under directory will be used.",
202
- )
203
- @click.option(
204
- "--source_list",
205
- type=str,
206
- multiple=True,
207
- help="Optional: list of selected source file names under the directory \
208
- to be evaluate. If none, all files under directory will be used.",
209
- )
210
- @click.option(
211
- "--export_dir",
212
- type=str,
213
- default="metrics",
214
- help="Directory to save the output evaluation metrics to. Default to \
215
- your/working/dir/metrics/",
216
- )
217
- @click.option(
218
- "--visualize",
219
- is_flag=True,
220
- show_default=True,
221
- default=False,
222
- help="Add the flag to show progress bar.",
223
- )
224
- @click.option(
225
- "--cutoff",
226
- type=float,
227
- show_default=True,
228
- default=0.8,
229
- help="The cutoff value for the element level alignment. \
230
- If not set, a default value is used",
231
- )
232
- def measure_table_structure_accuracy_command(
233
- output_dir: str,
234
- source_dir: str,
235
- export_dir: str,
236
- visualize: bool,
237
- output_list: Optional[List[str]] = None,
238
- source_list: Optional[List[str]] = None,
239
- cutoff: Optional[float] = None,
240
- ):
241
- return (
242
- TableStructureMetricsCalculator(
243
- documents_dir=output_dir,
244
- ground_truths_dir=source_dir,
245
- cutoff=cutoff,
246
- )
247
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
248
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
249
- )
250
-
251
-
252
- @main.command()
253
- @click.option("--output_dir", type=str, help="Directory to structured output.")
254
- @click.option("--source_dir", type=str, help="Directory to structured source.")
255
- @click.option(
256
- "--output_list",
257
- type=str,
258
- multiple=True,
259
- help=(
260
- "Optional: list of selected structured output file names under the "
261
- "directory to be evaluated. If none, all files under directory will be used."
262
- ),
263
- )
264
- @click.option(
265
- "--source_list",
266
- type=str,
267
- multiple=True,
268
- help="Optional: list of selected source file names under the directory \
269
- to be evaluate. If none, all files under directory will be used.",
270
- )
271
- @click.option(
272
- "--export_dir",
273
- type=str,
274
- default="metrics",
275
- help="Directory to save the output evaluation metrics to. Default to \
276
- your/working/dir/metrics/",
277
- )
278
- @click.option(
279
- "--visualize",
280
- is_flag=True,
281
- show_default=True,
282
- default=False,
283
- help="Add the flag to show progress bar.",
284
- )
285
- def measure_object_detection_metrics_command(
286
- output_dir: str,
287
- source_dir: str,
288
- export_dir: str,
289
- visualize: bool,
290
- output_list: Optional[List[str]] = None,
291
- source_list: Optional[List[str]] = None,
292
- ):
293
- return (
294
- ObjectDetectionMetricsCalculator(
295
- documents_dir=output_dir,
296
- ground_truths_dir=source_dir,
297
- )
298
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
299
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
300
- )
301
-
302
-
303
- @main.command()
304
- @click.option(
305
- "--data_input", type=str, required=True, help="Takes in path to data file as .tsv .csv .txt"
306
- )
307
- @click.option(
308
- "--filter_list",
309
- type=str,
310
- required=True,
311
- help="Takes in list of string to filter the data_input.",
312
- )
313
- @click.option(
314
- "--filter_by",
315
- type=str,
316
- required=True,
317
- help="Field from data_input to match with filter_list. Default is `filename`.",
318
- )
319
- @click.option(
320
- "--export_filename", type=str, help="Export filename. Required when return_type is `file`"
321
- )
322
- @click.option("--export_dir", type=str, help="Export directory.")
323
- @click.option("--return_type", type=str, help="`dataframe` or `file`. Default is `file`.")
324
- def filter_metrics_command(
325
- data_input: str,
326
- filter_list: Union[str, List[str]],
327
- filter_by: str = "filename",
328
- export_filename: Optional[str] = None,
329
- export_dir: str = "metrics",
330
- return_type: str = "file",
331
- ):
332
- return filter_metrics(
333
- data_input, filter_list, filter_by, export_filename, export_dir, return_type
334
- )
335
-
336
-
337
- if __name__ == "__main__":
338
- main()