unstructured-ingest 0.0.14__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (70) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/interfaces.py +1 -1
  3. unstructured_ingest/cli/utils.py +1 -1
  4. unstructured_ingest/connector/astradb.py +1 -1
  5. unstructured_ingest/connector/biomed.py +4 -4
  6. unstructured_ingest/connector/chroma.py +1 -1
  7. unstructured_ingest/connector/databricks_volumes.py +2 -2
  8. unstructured_ingest/connector/fsspec/box.py +1 -1
  9. unstructured_ingest/connector/fsspec/fsspec.py +5 -5
  10. unstructured_ingest/connector/git.py +1 -1
  11. unstructured_ingest/connector/google_drive.py +4 -4
  12. unstructured_ingest/connector/hubspot.py +1 -1
  13. unstructured_ingest/connector/kafka.py +8 -8
  14. unstructured_ingest/connector/local.py +1 -1
  15. unstructured_ingest/connector/notion/helpers.py +4 -4
  16. unstructured_ingest/connector/onedrive.py +3 -3
  17. unstructured_ingest/connector/outlook.py +2 -2
  18. unstructured_ingest/connector/pinecone.py +1 -1
  19. unstructured_ingest/connector/sharepoint.py +8 -8
  20. unstructured_ingest/connector/vectara.py +6 -6
  21. unstructured_ingest/interfaces.py +4 -4
  22. unstructured_ingest/logger.py +1 -1
  23. unstructured_ingest/pipeline/copy.py +1 -1
  24. unstructured_ingest/pipeline/interfaces.py +2 -2
  25. unstructured_ingest/pipeline/partition.py +1 -1
  26. unstructured_ingest/pipeline/pipeline.py +1 -1
  27. unstructured_ingest/pipeline/reformat/chunking.py +2 -2
  28. unstructured_ingest/pipeline/reformat/embedding.py +1 -1
  29. unstructured_ingest/pipeline/source.py +2 -2
  30. unstructured_ingest/utils/compression.py +3 -3
  31. unstructured_ingest/utils/string_and_date_utils.py +2 -2
  32. unstructured_ingest/v2/cli/base/cmd.py +3 -3
  33. unstructured_ingest/v2/cli/base/dest.py +1 -1
  34. unstructured_ingest/v2/cli/base/src.py +1 -1
  35. unstructured_ingest/v2/cli/utils/click.py +1 -1
  36. unstructured_ingest/v2/interfaces/processor.py +48 -13
  37. unstructured_ingest/v2/logger.py +1 -1
  38. unstructured_ingest/v2/otel.py +1 -1
  39. unstructured_ingest/v2/pipeline/interfaces.py +9 -2
  40. unstructured_ingest/v2/pipeline/pipeline.py +17 -6
  41. unstructured_ingest/v2/pipeline/steps/chunk.py +3 -3
  42. unstructured_ingest/v2/pipeline/steps/download.py +17 -2
  43. unstructured_ingest/v2/pipeline/steps/embed.py +3 -3
  44. unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  45. unstructured_ingest/v2/pipeline/steps/index.py +2 -2
  46. unstructured_ingest/v2/pipeline/steps/partition.py +3 -3
  47. unstructured_ingest/v2/pipeline/steps/stage.py +1 -1
  48. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  49. unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
  50. unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
  51. unstructured_ingest/v2/processes/connectors/elasticsearch.py +1 -1
  52. unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
  53. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +4 -4
  54. unstructured_ingest/v2/processes/connectors/google_drive.py +1 -1
  55. unstructured_ingest/v2/processes/connectors/local.py +6 -5
  56. unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
  57. unstructured_ingest/v2/processes/connectors/onedrive.py +2 -2
  58. unstructured_ingest/v2/processes/connectors/opensearch.py +1 -1
  59. unstructured_ingest/v2/processes/connectors/pinecone.py +2 -2
  60. unstructured_ingest/v2/processes/connectors/sharepoint.py +9 -5
  61. unstructured_ingest/v2/processes/filter.py +1 -1
  62. unstructured_ingest/v2/processes/partitioner.py +3 -3
  63. unstructured_ingest/v2/utils.py +7 -0
  64. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/METADATA +213 -215
  65. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/RECORD +69 -69
  66. unstructured_ingest/evaluate.py +0 -338
  67. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/LICENSE.md +0 -0
  68. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/WHEEL +0 -0
  69. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/entry_points.txt +0 -0
  70. {unstructured_ingest-0.0.14.dist-info → unstructured_ingest-0.0.15.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,16 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=FvuQAMXs2twVKSQLT39ab4eOk1k80Ve3pTtytKSWyZI,43
2
+ unstructured_ingest/__version__.py,sha256=ue4xcU8omVbXrnrr4LFRKTwKGHRR5ke-SqtF142v0Xs,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
- unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
- unstructured_ingest/interfaces.py,sha256=OCXhP6PYUE_vtTmZjwEidA5jvHT50Rj_a5sOS7M6IxI,31411
6
- unstructured_ingest/logger.py,sha256=TrhyH7VbCWO5VVuhvL0yUyXxuem3b4pzbqj2uQHUwZk,4480
4
+ unstructured_ingest/interfaces.py,sha256=LuY-85clq5iyOtDd9vDABjK2MZCm6wRWK53cdb4SROY,31411
5
+ unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
7
6
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
8
7
  unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
9
8
  unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29-kqEAA,302
10
9
  unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
11
10
  unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
12
11
  unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
13
- unstructured_ingest/cli/interfaces.py,sha256=1Nw9siznXr3vtr5nh71amjRz1itLYroqqnBQ4-lAG5Q,24130
14
- unstructured_ingest/cli/utils.py,sha256=l7dmDf_KUO3SP4dcVDHjxYAU2b28yR-n-a8xoYVPmw4,7981
12
+ unstructured_ingest/cli/interfaces.py,sha256=6kMmTVm5ia6wUIdOMRu_uErkcIeWndr_6fhPc1AnoYM,24134
13
+ unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
15
14
  unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
15
  unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
17
16
  unstructured_ingest/cli/base/dest.py,sha256=uN44l7kPErm_BQqKFUgaiz_Xu6UKk-mnB1B8c0cb4lQ,3416
@@ -60,51 +59,51 @@ unstructured_ingest/cli/cmds/fsspec/s3.py,sha256=v-24oFxhabdShryK2dhP4cDBvVyoQ-8
60
59
  unstructured_ingest/cli/cmds/fsspec/sftp.py,sha256=TCB7sf_GYoifryQbbttknYSt9Q1kRCPtu8B8QgXl3lw,1537
61
60
  unstructured_ingest/connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
61
  unstructured_ingest/connector/airtable.py,sha256=KcLt-FEabO9D5ev5E4xUf06VYHpYpypP-adTVyhGcb8,10585
63
- unstructured_ingest/connector/astradb.py,sha256=T4jo80foCRQ_QwTClf9abFWWQHCUTlmSOdYK9X8r7aQ,8635
62
+ unstructured_ingest/connector/astradb.py,sha256=kDDcITwmUqgZ2y0cYMxMIsEr3c7s8SXrRI1LplTRlos,8635
64
63
  unstructured_ingest/connector/azure_cognitive_search.py,sha256=cqQdAaEzt4coU7sxnl4GY8Em4a6azFLyAKM6enkmjBA,5850
65
- unstructured_ingest/connector/biomed.py,sha256=dVVyPhlYkJUzbQNg6qwgfqlmCScA4Ttac9ErS9yX4MM,10847
66
- unstructured_ingest/connector/chroma.py,sha256=DgAe5xxDiV1BzAbKusL7JmkCGyEJ9lWTdoyy0txXoTw,5713
64
+ unstructured_ingest/connector/biomed.py,sha256=uwtBuKzpHfxbJckHAHcsnKo4dTCdag66tCDtCqKNSZM,10847
65
+ unstructured_ingest/connector/chroma.py,sha256=Nma6HebQxNY7CCWwWArkX3kMXf2xVv6L-jrfRjMi9LE,5713
67
66
  unstructured_ingest/connector/clarifai.py,sha256=kAtPGrjOps_aYdlhHkTtQc46Rfc0woNor6VY1UGEKZI,4211
68
67
  unstructured_ingest/connector/confluence.py,sha256=VbeGGcoMJQ3WIriHSGz1o5L1u24EeVrYQRjR6t1fcr0,10151
69
- unstructured_ingest/connector/databricks_volumes.py,sha256=zEJZFO2Uq-eQ5k62_SmNsKC4NZ-ykmtQY5XacLWrr0I,4948
68
+ unstructured_ingest/connector/databricks_volumes.py,sha256=s4Go3M-UHPYGUzEEEEhNGBlb9DmZMamjeg3nFRXVgV4,4948
70
69
  unstructured_ingest/connector/delta_table.py,sha256=fHUkZ8v3a20k_r64j-i6ulm4_Zi6eYGdiGKWj4q3BAs,7191
71
70
  unstructured_ingest/connector/discord.py,sha256=SelvVGEF2SThdf8FSSVXGrIBgQoOcNgdKFUfEvpIcg0,6153
72
71
  unstructured_ingest/connector/elasticsearch.py,sha256=UIqTQbXVhHprApfBCXBVBBmPMnWccjoaFgV6shrKG-U,14157
73
- unstructured_ingest/connector/git.py,sha256=Hjf22SrJ_oFn4llxTa_54zW3jnZ6JVYB9tYWhCsrr1o,3817
72
+ unstructured_ingest/connector/git.py,sha256=_kIHVXZsLzK8WAJ0s3El1eGX1S2UJoGbFGqOVCIDf8g,3817
74
73
  unstructured_ingest/connector/github.py,sha256=STgcJMcc4RSfOw-N-_Cb97LkHmk1nSI-ivdco7p-7y4,6578
75
74
  unstructured_ingest/connector/gitlab.py,sha256=OEilnSFabWT3XY0riNxVTXc9tS3f1lMyHI6oZzb3Cw0,4926
76
- unstructured_ingest/connector/google_drive.py,sha256=BgwYmQAL8C91HqxD0pOM2YX3iT406pHOoK8RxzqGRsk,13054
77
- unstructured_ingest/connector/hubspot.py,sha256=teIvVsX1sSf2vILX9sri8ohpC0SA3yHA5sc-moKqls0,9271
75
+ unstructured_ingest/connector/google_drive.py,sha256=Sl6r-IcbV_7s8LeMg2z8qiko2r5RAyRnDzBxMwvY6ng,13053
76
+ unstructured_ingest/connector/hubspot.py,sha256=jL-bqU4EJIqbG0YRk9IR3MKsHi_WHf86Fy6r1fVeCz4,9271
78
77
  unstructured_ingest/connector/jira.py,sha256=kxjGhbVSH8FJNPMGJbnpZEV5zZRfGFckVJFiOzExphQ,15690
79
- unstructured_ingest/connector/kafka.py,sha256=cVEX_yn_9Vdvz6lujf1GdThsJfxJnVsdjfTnqhx7p1A,10053
80
- unstructured_ingest/connector/local.py,sha256=OyGedubpn39bLs5Z4EeZqsb1Q-M-cJkWcrUV8eQ9yec,4479
78
+ unstructured_ingest/connector/kafka.py,sha256=D0XPf0IZ4XkLjnYy7I1FDc5XucNehJLlj-yCKjBMr0w,10053
79
+ unstructured_ingest/connector/local.py,sha256=ayEz7gtnr1ioiYWmJ5ElSlSC8_ZFk1rk-9sX2htRq6c,4479
81
80
  unstructured_ingest/connector/mongodb.py,sha256=UD8T1V435YvGY68dpL-fyFesD7bcLckptgXzzfgnILI,9771
82
- unstructured_ingest/connector/onedrive.py,sha256=hvVuC-Kup88ZMbJpXG8AxRiuQyscZw6nOVLOjlF_pK4,8911
81
+ unstructured_ingest/connector/onedrive.py,sha256=-yy3scFHVIUiPAAQdmJXel3_BMZnZc9qUI8HwecuoJ4,8911
83
82
  unstructured_ingest/connector/opensearch.py,sha256=kvzqEqanP6nGHjxCJ2e2CAz9iK8na3yYBX1l4ZuVq0A,7937
84
- unstructured_ingest/connector/outlook.py,sha256=Qbxrt_2ZSz329MxK5hb1_MYndPvPSXxCSfD0dMCy0Gs,10443
85
- unstructured_ingest/connector/pinecone.py,sha256=hh4hbW7P8ebXf9n4S7ilvcL3Qzt9XEeZwA6_BkPnFEY,4796
83
+ unstructured_ingest/connector/outlook.py,sha256=f7WXb1xhf4iA3B7HTOCz2KuqxrywuChoDsDSy-erwYY,10443
84
+ unstructured_ingest/connector/pinecone.py,sha256=koUO3EVXP_cglbs3XtXTgNQJVmUmYfDQpYi79jclP3k,4796
86
85
  unstructured_ingest/connector/qdrant.py,sha256=Y1PAW6ueAzkTxoeViZ7JjkErFJNJlSYvzaRU1c-hcJA,4964
87
86
  unstructured_ingest/connector/reddit.py,sha256=8pyVSXXKGS9vOlNBeXw1ev5oqu-uWka5hzgUI8CFRos,5457
88
87
  unstructured_ingest/connector/registry.py,sha256=SxXKzOGimHGYOPDSCsYm_xhbwNb-DIcv6XqxoPRIaIY,4846
89
88
  unstructured_ingest/connector/salesforce.py,sha256=FrzevH1xB9deXdgt1ph7xa8BRFI8qC2sxGR4KsUHWSY,10941
90
- unstructured_ingest/connector/sharepoint.py,sha256=4Ex4_rCOvA_7g2YmtsZd_mISjfCD_jRFtk_-JmC4lUc,22159
89
+ unstructured_ingest/connector/sharepoint.py,sha256=7-UKNTqyOD7N2kA4TpIW0z0T5tPzGeBvhZOBEiHrdeY,22159
91
90
  unstructured_ingest/connector/slack.py,sha256=1CJ19N2yWrAF1viUrqa4Yb-BUbCrUHmGMkUHhFEe6m4,7617
92
91
  unstructured_ingest/connector/sql.py,sha256=YWJIuNtXkhwW_h7nlxkmzZhzMcICkZc1ezZ1CTzcf54,7625
93
- unstructured_ingest/connector/vectara.py,sha256=r7nE2kC9-2mfpMSNmVH1F_OkLetSapJ2Xj4mFAJJJ88,9539
92
+ unstructured_ingest/connector/vectara.py,sha256=l_AuCYkY3nmojF1sg347sDpmx8oIoIZi0Z9iHxoH82E,9540
94
93
  unstructured_ingest/connector/weaviate.py,sha256=Pi0bqyTJhXk_1zdbmJCYvW1inHNTBa0i3cYKRRPcXO0,7291
95
94
  unstructured_ingest/connector/wikipedia.py,sha256=lGccBwl2JlFJNIWqKj3SmUyTrC4xpmeFliCfahFrXRs,5992
96
95
  unstructured_ingest/connector/fsspec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
96
  unstructured_ingest/connector/fsspec/azure.py,sha256=6rBbH3TtbMF4KxP5oNLADvu2HDLc8VgIaAJoWx8ukeA,2580
98
- unstructured_ingest/connector/fsspec/box.py,sha256=rFoyDy_xplMFrqpPpee_cSI0P0FgejGaLK_8BxhA6kY,3429
97
+ unstructured_ingest/connector/fsspec/box.py,sha256=6vC7y313Hu4zZtX3GZT9uTu6qyKZkhr3U6rRR2rFuR0,3428
99
98
  unstructured_ingest/connector/fsspec/dropbox.py,sha256=W46bpTDvw5AGqM9GMpzXpjqumJgt5SxVSaRW7jNBUa0,5911
100
- unstructured_ingest/connector/fsspec/fsspec.py,sha256=k9Olxy7PzFW6d2Kcujqf7IvqsdxzYmwaTkGIPoIKBFs,13084
99
+ unstructured_ingest/connector/fsspec/fsspec.py,sha256=XpbKKyn2f0yehLNN4FpFA_igLa4p69ni6rJr5hbby7Q,13085
101
100
  unstructured_ingest/connector/fsspec/gcs.py,sha256=2PH5jBn09d3ZoM2j0RR-HSOjM0n1HR4XIPSiTmwCT0s,2257
102
101
  unstructured_ingest/connector/fsspec/s3.py,sha256=iMsdTzy2KYqkxQJ57UyuZAahtvE21iMT5SsgD4DC7RU,1723
103
102
  unstructured_ingest/connector/fsspec/sftp.py,sha256=x2w8JGM81S_HXww7Aa-bTY1LjZSis56aOpCinga_bok,2653
104
103
  unstructured_ingest/connector/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
104
  unstructured_ingest/connector/notion/client.py,sha256=j5_CEFzI3Bx7oxTGOPoT5sazrzIPTX2467dmCLyziNg,9169
106
105
  unstructured_ingest/connector/notion/connector.py,sha256=8A9d-Pej-uXzjEy85zUloxIPK-EbpI5heVR6AVqOvU8,17538
107
- unstructured_ingest/connector/notion/helpers.py,sha256=jqg5-cPcrjm5G7dBF4jOjHxdJN1CI7yBeTefiX4hBoM,20702
106
+ unstructured_ingest/connector/notion/helpers.py,sha256=-eEB8eSqdD5bWX_QEA2hZz1siucC0FNEUEqCEJptiVk,20702
108
107
  unstructured_ingest/connector/notion/interfaces.py,sha256=SrTT-9c0nvk0fMqVgudYF647r04AdMKi6wkIkMy7Szw,563
109
108
  unstructured_ingest/connector/notion/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
109
  unstructured_ingest/connector/notion/types/block.py,sha256=AKOY-o6CTFC-caWlkLfKskMuFemH4-Vdrhv7HnRkS8w,3009
@@ -184,18 +183,18 @@ unstructured_ingest/ingest_backoff/__init__.py,sha256=cfdIJuZDFcF3w84sTyYqZ8vXnS
184
183
  unstructured_ingest/ingest_backoff/_common.py,sha256=ey0PN6Hf7aEpQQau710EHlEmQ3hq4YyYzgNLhPzzK58,3724
185
184
  unstructured_ingest/ingest_backoff/_wrapper.py,sha256=tukxuAYn-FbKTofluy9W16ah_6hrBbDAN4ufKEDzfdg,4136
186
185
  unstructured_ingest/pipeline/__init__.py,sha256=5kFH21WHi6i1JZri5miY5tB5c9R8sGMBeweYiWH2fqw,537
187
- unstructured_ingest/pipeline/copy.py,sha256=NwJGLrpP8r6WbWxp3epMYHbQycJUo81r6FjUOjrAlm0,768
186
+ unstructured_ingest/pipeline/copy.py,sha256=hKmugbjQ1PgSfyAyfSA3kH3aG9z4TiSjZBZgmXGdQvQ,768
188
187
  unstructured_ingest/pipeline/doc_factory.py,sha256=Y66k-CoIpwWAD3vWwBeHzI2YESlIsPUhL2OQ8i9RRWE,360
189
- unstructured_ingest/pipeline/interfaces.py,sha256=YTlJnMQTi4x3jyeU8o-zo4QbHW25f7mFO0jx0IgI84o,8136
190
- unstructured_ingest/pipeline/partition.py,sha256=xp1Oj_oHZjukGBWrgW-ElJlQMNWASqjqqNSfbi3tFQE,2779
188
+ unstructured_ingest/pipeline/interfaces.py,sha256=i-Nelobt7C3VN5vbq7a6K3qX9Sb1jUXcA0GmkuFcfBw,8136
189
+ unstructured_ingest/pipeline/partition.py,sha256=Pu-I7VRSh7B5qu_nLQ1uHkmoDuhq4YYSaaFWgy3IBGM,2779
191
190
  unstructured_ingest/pipeline/permissions.py,sha256=jTqiFYrOTPHEP79EmrgyzTi0SseqRCwYkcepH4HctLI,365
192
- unstructured_ingest/pipeline/pipeline.py,sha256=JHsXPGLY129woBcvXMV7wbcstHu_OLB5LR0jIxreNKg,4806
193
- unstructured_ingest/pipeline/source.py,sha256=YMRZkcdCwRWCiwhnDfTSYxdl9Vv5JH5ut3joijWjHOE,3096
191
+ unstructured_ingest/pipeline/pipeline.py,sha256=x-mYYXtosLHyHrNTuuJwkELCC8Gt3VjXjPMsp3kpLYA,4806
192
+ unstructured_ingest/pipeline/source.py,sha256=EMySsaS0V3DaiABMu6xnmc4JULQycy-YM_zTKH0_SJ0,3096
194
193
  unstructured_ingest/pipeline/utils.py,sha256=RNx4bv2FhKOhaK_YTiRubta7n9wmJwqzznFNlY25Dtw,168
195
194
  unstructured_ingest/pipeline/write.py,sha256=xmDjmbieGRrcI342he7PkgxWaMoSJ5nWPmP5AM2xloU,669
196
195
  unstructured_ingest/pipeline/reformat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
- unstructured_ingest/pipeline/reformat/chunking.py,sha256=10LOAU6b2b-S6mzks8VpI1bP2pY1viNDM8dQSPJ8F_s,6035
198
- unstructured_ingest/pipeline/reformat/embedding.py,sha256=YVB8-GK-nAOZ0EaEbNWGrkSQIRyOHyEiQA3H4SEcqng,2522
196
+ unstructured_ingest/pipeline/reformat/chunking.py,sha256=vbJgi2Yl9Rd9yZxIf64Nxj6cjUJnJWRpDCagswQmrLw,6040
197
+ unstructured_ingest/pipeline/reformat/embedding.py,sha256=ohNvW9MhVGKVCx8ZlnLlLgkFQ_6UYLA7yUwT7Bzj9I8,2522
199
198
  unstructured_ingest/runner/__init__.py,sha256=FO0X_jBIMilXdyjBajyFmzHoC3eVypNMGlhdOW4mcCM,2859
200
199
  unstructured_ingest/runner/airtable.py,sha256=1ndJ6PKT63E0gZN3KYFBj4Yo94zQYsIvSjC6ro2nIPE,1115
201
200
  unstructured_ingest/runner/astradb.py,sha256=FSBtQrsdC9E3eHUcAuQ0apcCnWolz-9tkvy-Uf7QeKg,1102
@@ -256,27 +255,27 @@ unstructured_ingest/runner/writers/fsspec/gcs.py,sha256=ia-gconOz1kWI1jmYeB9NY6c
256
255
  unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdywczOCr2VI5e_bVms-Vw,622
257
256
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
258
257
  unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
259
- unstructured_ingest/utils/compression.py,sha256=mgndeNULGH7stoC51hhT4B9HwqMUSL0jhphcia6F_bw,4433
258
+ unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
260
259
  unstructured_ingest/utils/data_prep.py,sha256=9UKewDHB8-cMlQ8POvokhjVsy-ksiSqAAW2ibqPYAfk,4400
261
260
  unstructured_ingest/utils/dep_check.py,sha256=cVEqZtMwji8BIt7pjtUOMtEmN7KaNXRXwelEKFpOdW8,1914
262
261
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
263
- unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
262
+ unstructured_ingest/utils/string_and_date_utils.py,sha256=LwcbLmWpwt1zEabLlyUd5kIf9oOWcZxsRzxDglLCMeU,1375
264
263
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
265
264
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
266
- unstructured_ingest/v2/logger.py,sha256=akcghdHwpKM3CfoeFzir0zmc7R9Hk7zjquU-X-gwUIw,4324
265
+ unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
267
266
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
268
- unstructured_ingest/v2/otel.py,sha256=jD-zuezaU5BHQEZfPSEusXNmesEvtrcfNjVPlQp-cmE,4130
269
- unstructured_ingest/v2/utils.py,sha256=ykmyvmRMHGahkpKbkFbJfEHwNjZccKqbYsixUtUtrFw,1478
267
+ unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
268
+ unstructured_ingest/v2/utils.py,sha256=9LnhpI8Otpq5HPcN2Dtkp7APD_euq1ylKhIsybuK99Y,1714
270
269
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
270
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
272
271
  unstructured_ingest/v2/cli/cmds.py,sha256=wWUTbvvxEqKAy6bNE6XhPnj0ELMeSbb9_r1NZl58xMM,489
273
272
  unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X85yF8lRxwX_OYQ,83
274
- unstructured_ingest/v2/cli/base/cmd.py,sha256=JJ4ON8IrtfK1ub38er81EPOo3urZDdGL829k-JHcZ7A,11481
275
- unstructured_ingest/v2/cli/base/dest.py,sha256=_m5rUTHusHkXxzKUfcMtX9_xitbqyxajvIxuyev25vg,3197
273
+ unstructured_ingest/v2/cli/base/cmd.py,sha256=a2NE9ZjUfuLIevz0aEs25UsLGCOroJwI-bPc6vBrMCw,11484
274
+ unstructured_ingest/v2/cli/base/dest.py,sha256=zDjqek7anr0JQ2ptEl8KIAsUXuCuHRnBQnJhoPj4NVM,3198
276
275
  unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
277
- unstructured_ingest/v2/cli/base/src.py,sha256=2oqOjsrXANHZ0PJYDc2NV9Dg15nUgWw_B0ouOJvme7I,2871
276
+ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdjtlPhqn5Mg,2872
278
277
  unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
279
- unstructured_ingest/v2/cli/utils/click.py,sha256=SmUAiKiFXVCZ4_bhjrFKvYoLhcVEm5z7zJQw_M0Ad2w,6340
278
+ unstructured_ingest/v2/cli/utils/click.py,sha256=Wn2s3PuvBCKB0lsK-W7X_Y0eYyWnS6Y9wWo1OhVBOzY,6344
280
279
  unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=73DKHQQ6Tm0Lz5NCRduDlyfOhY2KH-MZN1n6jUgrsuU,7480
281
280
  unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
282
281
  unstructured_ingest/v2/interfaces/connector.py,sha256=Lm7wJTlKUfVKJjk-BchS0RtZ9_Lo9tzZ62rPtF3HOGY,759
@@ -284,63 +283,64 @@ unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK
284
283
  unstructured_ingest/v2/interfaces/file_data.py,sha256=ieJK-hqHCEOmoYNGoFbCHziSaZyMtRS9VpSoYbwoKCE,1944
285
284
  unstructured_ingest/v2/interfaces/indexer.py,sha256=Bd1S-gTLsxhJBLEh1lYm_gXqwQLaEZMoqPq9yGxtN_E,713
286
285
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
287
- unstructured_ingest/v2/interfaces/processor.py,sha256=1taXZVAHKnWH420N1v-JNXfRGq5roTaYvxqcO1EzpnQ,1772
286
+ unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
288
287
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
289
288
  unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84ixSSJ_NYRjflYsbY,1168
290
289
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
291
- unstructured_ingest/v2/pipeline/interfaces.py,sha256=xvs4AaT92UFdvrg6BNurIUsUOFcyPqvh80j3L8RcrzA,8397
290
+ unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
292
291
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
293
- unstructured_ingest/v2/pipeline/pipeline.py,sha256=eVxZWzMf9oLHVCSEyglJX2YK-xAs0jsV3tiHY8HVJLo,15074
292
+ unstructured_ingest/v2/pipeline/pipeline.py,sha256=GrcQNotpGl1EtKbpauNh2iHZVNm9vigjEOu7svlUOvM,15660
294
293
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
295
- unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=I5JQn9rVJu9zEnsAZsJzDnxuASp2hdkF8ZRW4dOtgb0,3124
296
- unstructured_ingest/v2/pipeline/steps/download.py,sha256=uT2IoUEI8j5F0YUalYXEpjWXlpsI-TBOUfo-8JMGNLI,7649
297
- unstructured_ingest/v2/pipeline/steps/embed.py,sha256=5wONbMvT_hZRZtHPgquok1ryC66dajCU5iifVfIaP9Y,3102
298
- unstructured_ingest/v2/pipeline/steps/filter.py,sha256=1HM6aBZ5YI0wHQjMXx4KISsiueRlLXVn0mYyiXLMgy4,1188
299
- unstructured_ingest/v2/pipeline/steps/index.py,sha256=0h5sc5mlnMuyxPKmbm4sY6LytqZiAWcP_FJvsYQF4WA,2632
300
- unstructured_ingest/v2/pipeline/steps/partition.py,sha256=2NuXpDQ9brf7D4vPhbalCGpjw80XRGYZAAO-Ist1yKs,3182
301
- unstructured_ingest/v2/pipeline/steps/stage.py,sha256=6gAPzp46DrsOtL914hqgATRDCMvBRI7VtvlsFuMWc4I,2211
302
- unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=6r-XPtj5X7RzS6gL9AvZGO8jeWReMEJqM7JAEcarReE,1745
294
+ unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=gTIxhkK_n6MC3JbLsqnDsrExmYSl1wSnA_CJsOnVZ3w,3124
295
+ unstructured_ingest/v2/pipeline/steps/download.py,sha256=Ld2dp1Znt-BqCcSOkBzBtZV-cGI8Kd-w0wN9Ez9h3Y8,8103
296
+ unstructured_ingest/v2/pipeline/steps/embed.py,sha256=zsOZ-FZzJWOqdKKRnvGJ6c8h18d3Wkscx8wEdkGxcmw,3102
297
+ unstructured_ingest/v2/pipeline/steps/filter.py,sha256=VAob-9p3bsacv_whb3Hb3rUqA6duL1d-zcUsJg7mxJ8,1188
298
+ unstructured_ingest/v2/pipeline/steps/index.py,sha256=0LrzRT-T1-dzZp_ot4ibajaavdhlXdsAwBQXvrEQS2I,2632
299
+ unstructured_ingest/v2/pipeline/steps/partition.py,sha256=-jhNtIZwru5gFs3-C_fXXtdRz8NE8MX8Y2ih0iKQKdk,3182
300
+ unstructured_ingest/v2/pipeline/steps/stage.py,sha256=ZvP-Rz_A5UkhZNoRUvVgciJbGXlP2WIbN5QIZ9wzP8I,2211
301
+ unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=pEu7VU27Fuu53eqBddAmXihV74A6JTwTfnjKeb__edY,1745
303
302
  unstructured_ingest/v2/pipeline/steps/upload.py,sha256=G9z8QQe9b_WokI5qyr4UOOqaepEVgwFqMn9pWcta9gI,1917
304
303
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
305
304
  unstructured_ingest/v2/processes/chunker.py,sha256=W2qPIddT-QEhHTKpA1krUhBrR0UFOq4nuko2eBjlG_I,6675
306
305
  unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
307
306
  unstructured_ingest/v2/processes/embedder.py,sha256=ZBCIm0oHxWmtUEQYyAjXACqTYPt3LnvXLtoFhu6mu8A,6077
308
- unstructured_ingest/v2/processes/filter.py,sha256=eiAxdYiX8wd4vmD4J40x5t5wwJNmoGa5z33Z9Q-knK8,2145
309
- unstructured_ingest/v2/processes/partitioner.py,sha256=s7R7KVR-w7EtmqyieC-z-ZFv8H5bPn7IvXgqZddyoF8,10040
307
+ unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
308
+ unstructured_ingest/v2/processes/partitioner.py,sha256=KyWCo7qOQb1wyU8GJ2krejWSNE1vWRHvhEl1V_oDEU8,10040
310
309
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
311
- unstructured_ingest/v2/processes/connectors/__init__.py,sha256=4zKMtzTqSzBKiHG92oE5jZUWw7Dc_RZ8c1VvwePrpjo,4801
310
+ unstructured_ingest/v2/processes/connectors/__init__.py,sha256=6iBdoH6BW8oMK1ZvEi0IgEchuk0cNUPoNIaikpzeML8,4992
311
+ unstructured_ingest/v2/processes/connectors/airtable.py,sha256=LLz3qVhbcZrHXeK1xu8lywj828wsDwYDfSsaNB2CwrA,8915
312
312
  unstructured_ingest/v2/processes/connectors/astradb.py,sha256=bjlzJVNANnpTxRm8Ba8ZS0KetJ_yxmEyEoPJDwUkcOw,5774
313
313
  unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
314
314
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=HRIHZSflSIRpVlLhXl_RLrmskESbAYait3TDBLS1fgU,8099
315
315
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
316
316
  unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=K0Sjt57vsVxL2eImqHzu7LnAONPUVTcDw2-hdLcWjV0,5984
317
- unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=0O7l5LTIKw5bEiA0Nnm1umRjMaUhjJr7XihYSzn1a9g,16750
318
- unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=6uoHmiR8Hn2GQ0YJVSDpBEHkkDjNvpZgFynlEeKkM88,13088
317
+ unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=H8svPr-Yk3UniHT1O9kUd2QhTfrJdzbqRNLFZIMiITQ,16750
318
+ unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=7xOQthcqBd9auJxB0nxZlhh1vdjXpMX_CtQZa6YfZz0,13088
319
319
  unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=vF-Owg2ZDI4nC7sk-Ied-5o_qkfwJzDr3uztOeS8kC0,5653
320
- unstructured_ingest/v2/processes/connectors/local.py,sha256=sXO-t_HZbq3rE3RzPUiWPnlrCHDixcSxz6epg4XgyYo,6786
321
- unstructured_ingest/v2/processes/connectors/milvus.py,sha256=hNMtjdNF6Nv8E_0n5uDpki1kAFdtPplq_5N0W92rrVs,7761
320
+ unstructured_ingest/v2/processes/connectors/local.py,sha256=PdXWzkiXHqCaQq7M06LmcCabg0mRPmaIOET7LA5BwLc,6806
321
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=uF93R9V_tkCQ9XRUEiwMTYdR3vgH9dH9SWJgkaY6Nbw,7761
322
322
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=q_GRuG2RQ5-8ajefifKuhFO52wCVhtU9j4ZIEf5hNas,4948
323
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=xUh-z0JltJ4iua2EUVUXXHuqjfQKlTmAkH0oam5gBhI,9207
324
- unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=z4UTenXy-iqy9Xlqlf1UTiGdOhIDPowiMg8juWnCh9M,6755
325
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=Tz01LN_RssGx9k6aYcgA3vmp1OLYCIdJd8c1I8Bn7hQ,6840
323
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=ZiUo-dFo1LMOvFwphSLRZiR1PcrN8GWLTHhsh4TU6n0,9207
324
+ unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=PNkJGLCCkwxLvxKX6vF_0jy-LC4wKu8PCXzmULEo9sk,6755
325
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=slIvM5i7_u-LDlNJF-i9oV_1EcKFKuoTaGDRyhZ4p20,6840
326
326
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
327
- unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=JrnTwhp7vP0HHcZTXopPVA-GvE4pvfosw0S1VQzBz5Y,19501
327
+ unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
328
328
  unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
329
329
  unstructured_ingest/v2/processes/connectors/sql.py,sha256=tDWL3YqL8MQuLsjW8A-KUkpSLh1iOn934OWfzPkqils,9298
330
330
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
331
331
  unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=4hBQ9GWbBv6ti9futVJCShNugDC6Vh7Hy9ZhEC4XDpM,8958
332
332
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
333
333
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=GrC44PnA8PLHUJQ4aH3gETxL8v8UvknbKptxiXweqdc,6214
334
- unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=dVKVCdbKdNSkkZBYJL14-u7aXOr50mzKCmhCVAneuqI,4435
334
+ unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=JgSgF7f4UOx_JUV8zghgykyBA4rKvqkErRm6zYXO1XA,4434
335
335
  unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=ZnoSGAZ4wtOhyg8G3PLYFMpbMVsBffvW-qp5jWwEDuA,4775
336
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=AAP7NNxO5smWYwzsAVbfs91FzpAteZeFI8vZj34ytgg,12441
336
+ unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=HptjVCGfGLOFoVwx8eYK_34sLb41ebCPbmSb5rCF10o,12441
337
337
  unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=O1j0hIYWI4lPpTQ5hsEKV8usDCrUm-t1qVcSNKsJQd0,6006
338
338
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=cOMvMh0C9rtyEPJ0X59Fn-qb11LFUMRfeUgsi3QRWUk,6390
339
339
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=NkcU4U2DQWWuM8UHkez67C3SEOZpVyRtmtNS-z-F0Fw,6056
340
340
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
341
- unstructured_ingest-0.0.14.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
342
- unstructured_ingest-0.0.14.dist-info/METADATA,sha256=Tz-j7IRCn1plW1J7ysFY3rRV2ckgj5H3gdkpWu4LfRI,28289
343
- unstructured_ingest-0.0.14.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
344
- unstructured_ingest-0.0.14.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
345
- unstructured_ingest-0.0.14.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
346
- unstructured_ingest-0.0.14.dist-info/RECORD,,
341
+ unstructured_ingest-0.0.15.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
342
+ unstructured_ingest-0.0.15.dist-info/METADATA,sha256=EYEY-EFmzs3wKIBRVBdDWrGD38a6H6kRw4M0MYC8AbU,28182
343
+ unstructured_ingest-0.0.15.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
344
+ unstructured_ingest-0.0.15.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
345
+ unstructured_ingest-0.0.15.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
346
+ unstructured_ingest-0.0.15.dist-info/RECORD,,
@@ -1,338 +0,0 @@
1
- #! /usr/bin/env python3
2
-
3
- from typing import List, Optional, Tuple, Union
4
-
5
- import click
6
- from unstructured.metrics.evaluate import (
7
- ElementTypeMetricsCalculator,
8
- ObjectDetectionMetricsCalculator,
9
- TableStructureMetricsCalculator,
10
- TextExtractionMetricsCalculator,
11
- filter_metrics,
12
- get_mean_grouping,
13
- )
14
-
15
-
16
- @click.group()
17
- def main():
18
- pass
19
-
20
-
21
- @main.command()
22
- @click.option("--output_dir", type=str, help="Directory to structured output.")
23
- @click.option("--source_dir", type=str, help="Directory to source.")
24
- @click.option(
25
- "--output_list",
26
- type=str,
27
- multiple=True,
28
- help="Optional: list of selected structured output file names under the \
29
- directory to be evaluate. If none, all files under directory will be use.",
30
- )
31
- @click.option(
32
- "--source_list",
33
- type=str,
34
- multiple=True,
35
- help="Optional: list of selected source file names under the directory \
36
- to be evaluate. If none, all files under directory will be use.",
37
- )
38
- @click.option(
39
- "--export_dir",
40
- type=str,
41
- default="metrics",
42
- help="Directory to save the output evaluation metrics to. Default to \
43
- your/working/dir/metrics/",
44
- )
45
- @click.option("--group_by", type=str, help="Input field for aggregration, or leave blank if none.")
46
- @click.option(
47
- "--weights",
48
- type=(int, int, int),
49
- default=(2, 1, 1),
50
- show_default=True,
51
- help="A list of weights to the Levenshtein distance calculation. Takes input as --weights 2 2 2\
52
- See text_extraction.py/calculate_edit_distance for more details.",
53
- )
54
- @click.option(
55
- "--visualize",
56
- is_flag=True,
57
- show_default=True,
58
- default=False,
59
- help="Add the flag to show progress bar.",
60
- )
61
- @click.option(
62
- "--output_type",
63
- type=str,
64
- default="json",
65
- show_default=True,
66
- help="Takes in either `txt` or `json` as output_type.",
67
- )
68
- def measure_text_extraction_accuracy_command(
69
- output_dir: str,
70
- source_dir: str,
71
- export_dir: str,
72
- weights: Tuple[int, int, int],
73
- visualize: bool,
74
- output_type: str,
75
- output_list: Optional[List[str]] = None,
76
- source_list: Optional[List[str]] = None,
77
- group_by: Optional[str] = None,
78
- ):
79
- return (
80
- TextExtractionMetricsCalculator(
81
- documents_dir=output_dir,
82
- ground_truths_dir=source_dir,
83
- group_by=group_by,
84
- weights=weights,
85
- document_type=output_type,
86
- )
87
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
88
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
89
- )
90
-
91
-
92
- @main.command()
93
- @click.option("--output_dir", type=str, help="Directory to structured output.")
94
- @click.option("--source_dir", type=str, help="Directory to structured source.")
95
- @click.option(
96
- "--output_list",
97
- type=str,
98
- multiple=True,
99
- help="Optional: list of selected structured output file names under the \
100
- directory to be evaluate. If none, all files under directory will be used.",
101
- )
102
- @click.option(
103
- "--source_list",
104
- type=str,
105
- multiple=True,
106
- help="Optional: list of selected source file names under the directory \
107
- to be evaluate. If none, all files under directory will be used.",
108
- )
109
- @click.option(
110
- "--export_dir",
111
- type=str,
112
- default="metrics",
113
- help="Directory to save the output evaluation metrics to. Default to \
114
- your/working/dir/metrics/",
115
- )
116
- @click.option(
117
- "--visualize",
118
- is_flag=True,
119
- show_default=True,
120
- default=False,
121
- help="Add the flag to show progress bar.",
122
- )
123
- def measure_element_type_accuracy_command(
124
- output_dir: str,
125
- source_dir: str,
126
- export_dir: str,
127
- visualize: bool,
128
- output_list: Optional[List[str]] = None,
129
- source_list: Optional[List[str]] = None,
130
- ):
131
- return (
132
- ElementTypeMetricsCalculator(
133
- documents_dir=output_dir,
134
- ground_truths_dir=source_dir,
135
- )
136
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
137
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
138
- )
139
-
140
-
141
- @main.command()
142
- @click.option(
143
- "--group_by",
144
- type=str,
145
- required=True,
146
- help="The category to group by; valid values are 'doctype' and 'connector'.",
147
- )
148
- @click.option(
149
- "--data_input",
150
- type=str,
151
- required=True,
152
- help="A datafram or path to the CSV/TSV file containing the data",
153
- )
154
- @click.option(
155
- "--export_dir",
156
- type=str,
157
- default="metrics",
158
- help="Directory to save the output evaluation metrics to. Default to \
159
- your/working/dir/metrics/",
160
- )
161
- @click.option(
162
- "--eval_name",
163
- type=str,
164
- help="Evaluated metric. Expecting one of 'text_extraction' or 'element_type'",
165
- )
166
- @click.option(
167
- "--agg_name",
168
- type=str,
169
- help="String to use with export filename. Default is `cct` for `text_extraction` \
170
- and `element-type` for `element_type`",
171
- )
172
- @click.option(
173
- "--export_filename", type=str, help="Optional. Define your file name for the output here."
174
- )
175
- def get_mean_grouping_command(
176
- group_by: str,
177
- data_input: str,
178
- export_dir: str,
179
- eval_name: str,
180
- agg_name: Optional[str] = None,
181
- export_filename: Optional[str] = None,
182
- ):
183
- return get_mean_grouping(
184
- group_by=group_by,
185
- data_input=data_input,
186
- export_dir=export_dir,
187
- eval_name=eval_name,
188
- agg_name=agg_name,
189
- export_filename=export_filename,
190
- )
191
-
192
-
193
- @main.command()
194
- @click.option("--output_dir", type=str, help="Directory to structured output.")
195
- @click.option("--source_dir", type=str, help="Directory to structured source.")
196
- @click.option(
197
- "--output_list",
198
- type=str,
199
- multiple=True,
200
- help="Optional: list of selected structured output file names under the \
201
- directory to be evaluate. If none, all files under directory will be used.",
202
- )
203
- @click.option(
204
- "--source_list",
205
- type=str,
206
- multiple=True,
207
- help="Optional: list of selected source file names under the directory \
208
- to be evaluate. If none, all files under directory will be used.",
209
- )
210
- @click.option(
211
- "--export_dir",
212
- type=str,
213
- default="metrics",
214
- help="Directory to save the output evaluation metrics to. Default to \
215
- your/working/dir/metrics/",
216
- )
217
- @click.option(
218
- "--visualize",
219
- is_flag=True,
220
- show_default=True,
221
- default=False,
222
- help="Add the flag to show progress bar.",
223
- )
224
- @click.option(
225
- "--cutoff",
226
- type=float,
227
- show_default=True,
228
- default=0.8,
229
- help="The cutoff value for the element level alignment. \
230
- If not set, a default value is used",
231
- )
232
- def measure_table_structure_accuracy_command(
233
- output_dir: str,
234
- source_dir: str,
235
- export_dir: str,
236
- visualize: bool,
237
- output_list: Optional[List[str]] = None,
238
- source_list: Optional[List[str]] = None,
239
- cutoff: Optional[float] = None,
240
- ):
241
- return (
242
- TableStructureMetricsCalculator(
243
- documents_dir=output_dir,
244
- ground_truths_dir=source_dir,
245
- cutoff=cutoff,
246
- )
247
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
248
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
249
- )
250
-
251
-
252
- @main.command()
253
- @click.option("--output_dir", type=str, help="Directory to structured output.")
254
- @click.option("--source_dir", type=str, help="Directory to structured source.")
255
- @click.option(
256
- "--output_list",
257
- type=str,
258
- multiple=True,
259
- help=(
260
- "Optional: list of selected structured output file names under the "
261
- "directory to be evaluated. If none, all files under directory will be used."
262
- ),
263
- )
264
- @click.option(
265
- "--source_list",
266
- type=str,
267
- multiple=True,
268
- help="Optional: list of selected source file names under the directory \
269
- to be evaluate. If none, all files under directory will be used.",
270
- )
271
- @click.option(
272
- "--export_dir",
273
- type=str,
274
- default="metrics",
275
- help="Directory to save the output evaluation metrics to. Default to \
276
- your/working/dir/metrics/",
277
- )
278
- @click.option(
279
- "--visualize",
280
- is_flag=True,
281
- show_default=True,
282
- default=False,
283
- help="Add the flag to show progress bar.",
284
- )
285
- def measure_object_detection_metrics_command(
286
- output_dir: str,
287
- source_dir: str,
288
- export_dir: str,
289
- visualize: bool,
290
- output_list: Optional[List[str]] = None,
291
- source_list: Optional[List[str]] = None,
292
- ):
293
- return (
294
- ObjectDetectionMetricsCalculator(
295
- documents_dir=output_dir,
296
- ground_truths_dir=source_dir,
297
- )
298
- .on_files(document_paths=output_list, ground_truth_paths=source_list)
299
- .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
300
- )
301
-
302
-
303
- @main.command()
304
- @click.option(
305
- "--data_input", type=str, required=True, help="Takes in path to data file as .tsv .csv .txt"
306
- )
307
- @click.option(
308
- "--filter_list",
309
- type=str,
310
- required=True,
311
- help="Takes in list of string to filter the data_input.",
312
- )
313
- @click.option(
314
- "--filter_by",
315
- type=str,
316
- required=True,
317
- help="Field from data_input to match with filter_list. Default is `filename`.",
318
- )
319
- @click.option(
320
- "--export_filename", type=str, help="Export filename. Required when return_type is `file`"
321
- )
322
- @click.option("--export_dir", type=str, help="Export directory.")
323
- @click.option("--return_type", type=str, help="`dataframe` or `file`. Default is `file`.")
324
- def filter_metrics_command(
325
- data_input: str,
326
- filter_list: Union[str, List[str]],
327
- filter_by: str = "filename",
328
- export_filename: Optional[str] = None,
329
- export_dir: str = "metrics",
330
- return_type: str = "file",
331
- ):
332
- return filter_metrics(
333
- data_input, filter_list, filter_by, export_filename, export_dir, return_type
334
- )
335
-
336
-
337
- if __name__ == "__main__":
338
- main()