unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +1 -1
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
  279. unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,19 +1,269 @@
1
- import typing as t
2
- from abc import ABC
3
- from dataclasses import dataclass, field
1
+ import inspect
2
+ from abc import ABC, abstractmethod
3
+ from collections import Counter
4
+ from dataclasses import dataclass, field, fields
5
+ from typing import Any, Optional, Type, TypeVar
4
6
 
5
- from unstructured_ingest.cli.interfaces import CliConfig
6
- from unstructured_ingest.interfaces import BaseConfig
7
+ import click
8
+ from pydantic import BaseModel
9
+
10
+ from unstructured_ingest.cli.base.importer import import_from_string
11
+ from unstructured_ingest.cli.utils.click import extract_config
12
+ from unstructured_ingest.cli.utils.model_conversion import options_from_base_model, post_check
13
+ from unstructured_ingest.interfaces import ProcessorConfig
14
+ from unstructured_ingest.logger import logger
15
+ from unstructured_ingest.pipeline.pipeline import Pipeline
16
+ from unstructured_ingest.processes.chunker import Chunker, ChunkerConfig
17
+ from unstructured_ingest.processes.connector_registry import (
18
+ DownloaderT,
19
+ IndexerT,
20
+ RegistryEntry,
21
+ UploaderT,
22
+ UploadStager,
23
+ UploadStagerConfig,
24
+ UploadStagerT,
25
+ destination_registry,
26
+ source_registry,
27
+ )
28
+ from unstructured_ingest.processes.connectors.local import LocalUploader, LocalUploaderConfig
29
+ from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
30
+ from unstructured_ingest.processes.filter import Filterer, FiltererConfig
31
+ from unstructured_ingest.processes.partitioner import Partitioner, PartitionerConfig
32
+
33
+ CommandT = TypeVar("CommandT", bound=click.Command)
7
34
 
8
35
 
9
36
  @dataclass
10
37
  class BaseCmd(ABC):
11
38
  cmd_name: str
12
- cli_config: t.Optional[t.Type[BaseConfig]] = None
13
- additional_cli_options: t.List[t.Type[CliConfig]] = field(default_factory=list)
14
- addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict)
15
- is_fsspec: bool = False
39
+ registry_entry: RegistryEntry
40
+ default_configs: list[Type[BaseModel]] = field(default_factory=list)
41
+
42
+ @abstractmethod
43
+ def get_registry_options(self):
44
+ pass
45
+
46
+ def get_default_options(self) -> list[click.Option]:
47
+ options = []
48
+ for extra in self.default_configs:
49
+ options.extend(options_from_base_model(model=extra))
50
+ return options
51
+
52
+ @classmethod
53
+ def consolidate_options(cls, options: list[click.Option]) -> list[click.Option]:
54
+ option_names = [option.name for option in options]
55
+ duplicate_names = [name for name, count in Counter(option_names).items() if count > 1]
56
+ if not duplicate_names:
57
+ return options
58
+ consolidated_options = []
59
+ current_names = []
60
+ for option in options:
61
+ if option.name not in current_names:
62
+ current_names.append(option.name)
63
+ consolidated_options.append(option)
64
+ continue
65
+ existing_option = next(o for o in consolidated_options if o.name == option.name)
66
+ if existing_option.__dict__ == option.__dict__:
67
+ continue
68
+ option_diff = cls.get_options_diff(o1=option, o2=existing_option)
69
+ raise ValueError(
70
+ "Conflicting duplicate {} option defined: {}".format(
71
+ option.name, " | ".join([f"{d[0]}: {d[1]}" for d in option_diff])
72
+ )
73
+ )
74
+ return consolidated_options
75
+
76
+ @staticmethod
77
+ def get_options_diff(o1: click.Option, o2: click.Option):
78
+ o1_dict = o1.__dict__
79
+ o2_dict = o2.__dict__
80
+ for d in [o1_dict, o2_dict]:
81
+ d["opts"] = ",".join(d["opts"])
82
+ d["secondary_opts"] = ",".join(d["secondary_opts"])
83
+ option_diff = set(o1_dict.items()) ^ set(o2_dict.items())
84
+ return option_diff
16
85
 
17
86
  @property
18
87
  def cmd_name_key(self):
19
88
  return self.cmd_name.replace("-", "_")
89
+
90
+ @property
91
+ def cli_cmd_name(self):
92
+ return self.cmd_name.replace("_", "-")
93
+
94
+ @abstractmethod
95
+ def cmd(self, ctx: click.Context, **options) -> None:
96
+ pass
97
+
98
+ def add_options(self, cmd: CommandT) -> CommandT:
99
+ options = self.get_registry_options()
100
+ options.extend(self.get_default_options())
101
+ post_check(options=options, name=cmd.name)
102
+ cmd.params.extend(options)
103
+ return cmd
104
+
105
+ def get_pipeline(
106
+ self,
107
+ src: str,
108
+ source_options: dict[str, Any],
109
+ dest: Optional[str] = None,
110
+ destination_options: Optional[dict[str, Any]] = None,
111
+ ) -> Pipeline:
112
+ logger.debug(
113
+ f"creating pipeline from cli using source {src} with options: {source_options}"
114
+ )
115
+ pipeline_kwargs: dict[str, Any] = {
116
+ "context": self.get_processor_config(options=source_options),
117
+ "downloader": self.get_downloader(src=src, options=source_options),
118
+ "indexer": self.get_indexer(src=src, options=source_options),
119
+ "partitioner": self.get_partitioner(options=source_options),
120
+ }
121
+ if chunker := self.get_chunker(options=source_options):
122
+ pipeline_kwargs["chunker"] = chunker
123
+ if filterer := self.get_filterer(options=source_options):
124
+ pipeline_kwargs["filterer"] = filterer
125
+ if embedder := self.get_embedder(options=source_options):
126
+ pipeline_kwargs["embedder"] = embedder
127
+ if dest:
128
+ logger.debug(
129
+ f"setting destination on pipeline {dest} with options: {destination_options}"
130
+ )
131
+ if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options):
132
+ pipeline_kwargs["stager"] = uploader_stager
133
+ pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options)
134
+ else:
135
+ # Default to local uploader
136
+ # TODO remove after v1 no longer supported
137
+ destination_options = destination_options or {}
138
+ if "output_dir" not in destination_options:
139
+ destination_options["output_dir"] = source_options["output_dir"]
140
+ pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options)
141
+ return Pipeline(**pipeline_kwargs)
142
+
143
+ @staticmethod
144
+ def get_default_uploader(options: dict[str, Any]) -> UploaderT:
145
+ uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig)
146
+ return LocalUploader(upload_config=uploader_config)
147
+
148
+ @staticmethod
149
+ def get_chunker(options: dict[str, Any]) -> Optional[Chunker]:
150
+ chunker_config = extract_config(flat_data=options, config=ChunkerConfig)
151
+ if not chunker_config.chunking_strategy:
152
+ return None
153
+ return Chunker(config=chunker_config)
154
+
155
+ @staticmethod
156
+ def get_filterer(options: dict[str, Any]) -> Optional[Filterer]:
157
+ filterer_configs = extract_config(flat_data=options, config=FiltererConfig)
158
+ if not filterer_configs.model_dump():
159
+ return None
160
+ return Filterer(config=filterer_configs)
161
+
162
+ @staticmethod
163
+ def get_embedder(options: dict[str, Any]) -> Optional[Embedder]:
164
+ embedder_config = extract_config(flat_data=options, config=EmbedderConfig)
165
+ if not embedder_config.embedding_provider:
166
+ return None
167
+ return Embedder(config=embedder_config)
168
+
169
+ @staticmethod
170
+ def get_partitioner(options: dict[str, Any]) -> Partitioner:
171
+ partitioner_config = extract_config(flat_data=options, config=PartitionerConfig)
172
+ return Partitioner(config=partitioner_config)
173
+
174
+ @staticmethod
175
+ def get_processor_config(options: dict[str, Any]) -> ProcessorConfig:
176
+ return extract_config(flat_data=options, config=ProcessorConfig)
177
+
178
+ @staticmethod
179
+ def get_indexer(src: str, options: dict[str, Any]) -> IndexerT:
180
+ source_entry = source_registry[src]
181
+ indexer_kwargs: dict[str, Any] = {}
182
+ if indexer_config_cls := source_entry.indexer_config:
183
+ indexer_kwargs["index_config"] = extract_config(
184
+ flat_data=options, config=indexer_config_cls
185
+ )
186
+ if connection_config_cls := source_entry.connection_config:
187
+ indexer_kwargs["connection_config"] = extract_config(
188
+ flat_data=options, config=connection_config_cls
189
+ )
190
+ indexer_cls = source_entry.indexer
191
+ return indexer_cls(**indexer_kwargs)
192
+
193
+ @staticmethod
194
+ def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT:
195
+ source_entry = source_registry[src]
196
+ downloader_kwargs: dict[str, Any] = {}
197
+ if downloader_config_cls := source_entry.downloader_config:
198
+ downloader_kwargs["download_config"] = extract_config(
199
+ flat_data=options, config=downloader_config_cls
200
+ )
201
+ if connection_config_cls := source_entry.connection_config:
202
+ downloader_kwargs["connection_config"] = extract_config(
203
+ flat_data=options, config=connection_config_cls
204
+ )
205
+ downloader_cls = source_entry.downloader
206
+ return downloader_cls(**downloader_kwargs)
207
+
208
+ @staticmethod
209
+ def get_custom_stager(
210
+ stager_reference: str, stager_config_kwargs: Optional[dict] = None
211
+ ) -> Optional[UploadStagerT]:
212
+ uploader_cls = import_from_string(stager_reference)
213
+ if not inspect.isclass(uploader_cls):
214
+ raise ValueError(
215
+ f"custom stager must be a reference to a python class, got: {type(uploader_cls)}"
216
+ )
217
+ if not issubclass(uploader_cls, UploadStager):
218
+ raise ValueError(
219
+ "custom stager must be an implementation of the UploadStager interface"
220
+ )
221
+ fields_dict = {f.name: f.type for f in fields(uploader_cls)}
222
+ upload_stager_config_cls = fields_dict["upload_stager_config"]
223
+ if not inspect.isclass(upload_stager_config_cls):
224
+ raise ValueError(
225
+ f"custom stager config must be a class, got: {type(upload_stager_config_cls)}"
226
+ )
227
+ if not issubclass(upload_stager_config_cls, UploadStagerConfig):
228
+ raise ValueError(
229
+ "custom stager config must be an implementation "
230
+ "of the UploadStagerUploadStagerConfig interface"
231
+ )
232
+ upload_stager_kwargs: dict[str, Any] = {}
233
+ if stager_config_kwargs:
234
+ upload_stager_kwargs["upload_stager_config"] = upload_stager_config_cls(
235
+ **stager_config_kwargs
236
+ )
237
+ return uploader_cls(**upload_stager_kwargs)
238
+
239
+ @staticmethod
240
+ def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]:
241
+ if custom_stager := options.get("custom_stager"):
242
+ return BaseCmd.get_custom_stager(
243
+ stager_reference=custom_stager,
244
+ stager_config_kwargs=options.get("custom_stager_config_kwargs"),
245
+ )
246
+ dest_entry = destination_registry[dest]
247
+ upload_stager_kwargs: dict[str, Any] = {}
248
+ if upload_stager_config_cls := dest_entry.upload_stager_config:
249
+ upload_stager_kwargs["upload_stager_config"] = extract_config(
250
+ flat_data=options, config=upload_stager_config_cls
251
+ )
252
+ if upload_stager_cls := dest_entry.upload_stager:
253
+ return upload_stager_cls(**upload_stager_kwargs)
254
+ return None
255
+
256
+ @staticmethod
257
+ def get_uploader(dest, options: dict[str, Any]) -> UploaderT:
258
+ dest_entry = destination_registry[dest]
259
+ uploader_kwargs: dict[str, Any] = {}
260
+ if uploader_config_cls := dest_entry.uploader_config:
261
+ uploader_kwargs["upload_config"] = extract_config(
262
+ flat_data=options, config=uploader_config_cls
263
+ )
264
+ if connection_config_cls := dest_entry.connection_config:
265
+ uploader_kwargs["connection_config"] = extract_config(
266
+ flat_data=options, config=connection_config_cls
267
+ )
268
+ uploader_cls = dest_entry.uploader
269
+ return uploader_cls(**uploader_kwargs)
@@ -1,87 +1,84 @@
1
1
  import logging
2
- import typing as t
3
2
  from dataclasses import dataclass
4
3
 
5
4
  import click
6
5
 
7
6
  from unstructured_ingest.cli.base.cmd import BaseCmd
8
- from unstructured_ingest.cli.cmd_factory import get_src_cmd
9
- from unstructured_ingest.cli.common import (
10
- log_options,
11
- )
12
- from unstructured_ingest.cli.interfaces import BaseConfig, CliFilesStorageConfig
13
- from unstructured_ingest.cli.utils import (
14
- add_options,
15
- conform_click_options,
16
- extract_config,
17
- extract_configs,
18
- )
19
- from unstructured_ingest.logger import ingest_log_streaming_init, logger
20
- from unstructured_ingest.runner.writers import writer_map
7
+ from unstructured_ingest.cli.utils.click import Dict, conform_click_options
8
+ from unstructured_ingest.cli.utils.model_conversion import options_from_base_model
9
+ from unstructured_ingest.logger import logger
10
+ from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
21
11
 
22
12
 
23
13
  @dataclass
24
- class BaseDestCmd(BaseCmd):
25
- write_config: t.Optional[t.Type[BaseConfig]] = None
14
+ class DestCmd(BaseCmd):
15
+ registry_entry: DestinationRegistryEntry
26
16
 
27
- def get_dest_runner(self, source_cmd: str, options: dict, parent_options: dict):
28
- src_cmd_fn = get_src_cmd(cmd_name=source_cmd)
29
- src_cmd = src_cmd_fn()
30
- runner = src_cmd.get_source_runner(options=parent_options)
31
- addition_configs = self.addition_configs
32
- if "connector_config" not in addition_configs:
33
- addition_configs["connector_config"] = self.cli_config
34
- if self.write_config:
35
- addition_configs["write_config"] = self.write_config
36
- configs = extract_configs(
37
- options,
38
- validate=[self.cli_config] if self.cli_config else None,
39
- extras=addition_configs,
40
- add_defaults=False,
41
- )
42
- writer_cls = writer_map[self.cmd_name_key]
43
- writer = writer_cls(**configs) # type: ignore
44
- runner.writer = writer
45
- runner.writer_kwargs = options
46
- return runner
47
-
48
- def check_dest_options(self, options: dict):
49
- extract_config(flat_data=options, config=self.cli_config)
17
+ def get_registry_options(self):
18
+ options = []
19
+ configs = [
20
+ config
21
+ for config in [
22
+ self.registry_entry.uploader_config,
23
+ self.registry_entry.upload_stager_config,
24
+ self.registry_entry.connection_config,
25
+ ]
26
+ if config
27
+ ]
28
+ for config in configs:
29
+ options.extend(options_from_base_model(model=config))
30
+ options = self.consolidate_options(options=options)
31
+ return options
50
32
 
51
- def dest(self, ctx: click.Context, **options):
33
+ def cmd(self, ctx: click.Context, **options) -> None:
34
+ logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO)
52
35
  if not ctx.parent:
53
36
  raise click.ClickException("destination command called without a parent")
54
37
  if not ctx.parent.info_name:
55
38
  raise click.ClickException("parent command missing info name")
56
39
  source_cmd = ctx.parent.info_name.replace("-", "_")
57
- parent_options: dict = ctx.parent.params if ctx.parent else {}
40
+ source_options: dict = ctx.parent.params if ctx.parent else {}
58
41
  conform_click_options(options)
59
- verbose = parent_options.get("verbose", False)
60
- ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
61
- log_options(parent_options, verbose=verbose)
62
- log_options(options, verbose=verbose)
63
42
  try:
64
- self.check_dest_options(options=options)
65
- runner = self.get_dest_runner(
66
- source_cmd=source_cmd,
67
- options=options,
68
- parent_options=parent_options,
43
+ pipeline = self.get_pipeline(
44
+ src=source_cmd,
45
+ source_options=source_options,
46
+ dest=self.cmd_name,
47
+ destination_options=options,
69
48
  )
70
- runner.run(**parent_options)
49
+ pipeline.run()
71
50
  except Exception as e:
72
- logger.error(e, exc_info=True)
51
+ logger.error(f"failed to run destination command {self.cmd_name}: {e}", exc_info=True)
73
52
  raise click.ClickException(str(e)) from e
74
53
 
75
- def get_dest_cmd(self) -> click.Command:
54
+ def get_cmd(self) -> click.Command:
76
55
  # Dynamically create the command without the use of click decorators
77
- fn = self.dest
56
+ fn = self.cmd
78
57
  fn = click.pass_context(fn)
79
- cmd: click.Group = click.command(fn)
80
- cmd.name = self.cmd_name
58
+ cmd = click.command(fn)
59
+ if not isinstance(cmd, click.core.Command):
60
+ raise ValueError(f"generated command was not of expected type Command: {type(cmd)}")
61
+ cmd.name = self.cli_cmd_name
81
62
  cmd.invoke_without_command = True
82
- options = [self.cli_config] if self.cli_config else []
83
- options += self.additional_cli_options
84
- if self.is_fsspec and CliFilesStorageConfig not in options:
85
- options.append(CliFilesStorageConfig)
86
- add_options(cmd, extras=options, is_src=False)
63
+ self.add_options(cmd)
64
+ cmd.params.append(
65
+ click.Option(
66
+ ["--custom-stager"],
67
+ required=False,
68
+ type=str,
69
+ default=None,
70
+ help="Pass a pointer to a custom upload stager to use, "
71
+ "must be in format '<module>:<attribute>'",
72
+ )
73
+ )
74
+ cmd.params.append(
75
+ click.Option(
76
+ ["--custom-stager-config-kwargs"],
77
+ required=False,
78
+ type=Dict(),
79
+ default=None,
80
+ help="Any kwargs to instantiate the configuration "
81
+ "associated with the customer stager",
82
+ )
83
+ )
87
84
  return cmd
@@ -1,57 +1,75 @@
1
1
  import logging
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field
3
+ from typing import Any
3
4
 
4
5
  import click
6
+ from pydantic import BaseModel
5
7
 
6
8
  from unstructured_ingest.cli.base.cmd import BaseCmd
7
- from unstructured_ingest.cli.common import (
8
- log_options,
9
+ from unstructured_ingest.cli.utils.click import Group, conform_click_options
10
+ from unstructured_ingest.cli.utils.model_conversion import options_from_base_model
11
+ from unstructured_ingest.interfaces import ProcessorConfig
12
+ from unstructured_ingest.logger import logger
13
+ from unstructured_ingest.processes import (
14
+ ChunkerConfig,
15
+ EmbedderConfig,
16
+ FiltererConfig,
17
+ PartitionerConfig,
9
18
  )
10
- from unstructured_ingest.cli.interfaces import CliFilesStorageConfig
11
- from unstructured_ingest.cli.utils import Group, add_options, conform_click_options, extract_configs
12
- from unstructured_ingest.logger import ingest_log_streaming_init, logger
13
- from unstructured_ingest.runner import runner_map
19
+ from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
14
20
 
15
21
 
16
22
  @dataclass
17
- class BaseSrcCmd(BaseCmd):
18
- def get_source_runner(self, options: dict):
19
- addition_configs = self.addition_configs
20
- if "connector_config" not in addition_configs:
21
- addition_configs["connector_config"] = self.cli_config
22
- configs = extract_configs(
23
- options,
24
- validate=[self.cli_config] if self.cli_config else None,
25
- extras=addition_configs,
26
- )
27
- runner = runner_map[self.cmd_name_key]
28
- return runner(**configs) # type: ignore
29
-
30
- def src(self, ctx: click.Context, **options):
23
+ class SrcCmd(BaseCmd):
24
+ registry_entry: SourceRegistryEntry
25
+ default_configs: list[BaseModel] = field(
26
+ default_factory=lambda: [
27
+ ProcessorConfig,
28
+ PartitionerConfig,
29
+ EmbedderConfig,
30
+ FiltererConfig,
31
+ ChunkerConfig,
32
+ ]
33
+ )
34
+
35
+ def get_registry_options(self):
36
+ options = []
37
+ configs = [
38
+ config
39
+ for config in [
40
+ self.registry_entry.connection_config,
41
+ self.registry_entry.indexer_config,
42
+ self.registry_entry.downloader_config,
43
+ ]
44
+ if config
45
+ ]
46
+ for config in configs:
47
+ options.extend(options_from_base_model(model=config))
48
+ options = self.consolidate_options(options=options)
49
+ return options
50
+
51
+ def cmd(self, ctx: click.Context, **options: dict[str, Any]) -> None:
31
52
  if ctx.invoked_subcommand:
32
53
  return
33
54
 
34
55
  conform_click_options(options)
35
- verbose = options.get("verbose", False)
36
- ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
37
- log_options(options, verbose=verbose)
56
+ logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO)
38
57
  try:
39
- runner = self.get_source_runner(options=options)
40
- runner.run(**options)
58
+ pipeline = self.get_pipeline(src=self.cmd_name, source_options=options)
59
+ pipeline.run()
41
60
  except Exception as e:
42
- logger.error(e, exc_info=True)
61
+ logger.error(f"failed to run source command {self.cmd_name}: {e}", exc_info=True)
43
62
  raise click.ClickException(str(e)) from e
44
63
 
45
- def get_src_cmd(self) -> click.Group:
64
+ def get_cmd(self) -> click.Group:
46
65
  # Dynamically create the command without the use of click decorators
47
- fn = self.src
66
+ fn = self.cmd
48
67
  fn = click.pass_context(fn)
49
- cmd: click.Group = click.group(fn, cls=Group)
50
- cmd.name = self.cmd_name
68
+ cmd = click.group(fn, cls=Group)
69
+ if not isinstance(cmd, click.core.Group):
70
+ raise ValueError(f"generated src command was not of expected type Group: {type(cmd)}")
71
+ cmd.name = self.cli_cmd_name
51
72
  cmd.invoke_without_command = True
52
- extra_options = [self.cli_config] if self.cli_config else []
53
- extra_options += self.additional_cli_options
54
- if self.is_fsspec and CliFilesStorageConfig not in extra_options:
55
- extra_options.append(CliFilesStorageConfig)
56
- add_options(cmd, extras=extra_options)
73
+ self.add_options(cmd)
74
+
57
75
  return cmd
@@ -1,13 +1,6 @@
1
- from typing import TYPE_CHECKING
2
-
3
1
  import click
4
2
 
5
- from unstructured_ingest.cli import dest, src
6
- from unstructured_ingest.v2.cli.cmds import dest as dest_v2
7
- from unstructured_ingest.v2.cli.cmds import src as src_v2
8
-
9
- if TYPE_CHECKING:
10
- from click import Command
3
+ from unstructured_ingest.cli.cmds import dest, src
11
4
 
12
5
 
13
6
  @click.group()
@@ -15,23 +8,17 @@ def ingest():
15
8
  pass
16
9
 
17
10
 
18
- def get_cmd() -> "Command":
11
+ def get_cmd() -> click.Command:
19
12
  """Construct and return a Click command object representing the main command for the CLI.
20
13
 
21
14
  This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those
22
15
  to the main command as nested subcommands.
23
16
  """
24
17
  cmd = ingest
25
- src_dict = {s.name: s for s in src}
26
- dest_dict = {d.name: d for d in dest}
27
- for s in src_v2:
28
- src_dict[s.name] = s
29
- for d in dest_v2:
30
- dest_dict[d.name] = d
31
18
  # Add all subcommands
32
- for src_subcommand in src_dict.values():
19
+ for src_subcommand in src:
33
20
  # Add all destination subcommands
34
- for dest_subcommand in dest_dict.values():
21
+ for dest_subcommand in dest:
35
22
  src_subcommand.add_command(dest_subcommand)
36
23
  cmd.add_command(src_subcommand)
37
24
  return cmd
@@ -1,7 +1,7 @@
1
1
  import click
2
2
 
3
- from unstructured_ingest.v2.cli.base import DestCmd, SrcCmd
4
- from unstructured_ingest.v2.processes.connector_registry import (
3
+ from unstructured_ingest.cli.base import DestCmd, SrcCmd
4
+ from unstructured_ingest.processes.connector_registry import (
5
5
  destination_registry,
6
6
  source_registry,
7
7
  )
@@ -25,7 +25,7 @@ from pydantic.fields import FieldInfo
25
25
  from pydantic.types import _SecretBase
26
26
  from pydantic_core import PydanticUndefined
27
27
 
28
- from unstructured_ingest.v2.cli.utils.click import (
28
+ from unstructured_ingest.cli.utils.click import (
29
29
  DelimitedString,
30
30
  Dict,
31
31
  PydanticDate,
@@ -106,7 +106,7 @@ def get_numerical_type(field: FieldInfo) -> click.ParamType:
106
106
  if range_args:
107
107
  return click.IntRange(**range_args) # type: ignore[arg-type]
108
108
  return click.INT
109
- # Non-integer numerical types default to float
109
+ # Non-integer numerical data_types default to float
110
110
  if range_args:
111
111
  return click.FloatRange(**range_args) # type: ignore[arg-type]
112
112
  return click.FLOAT
@@ -183,13 +183,13 @@ def is_subclass(x: Any, y: Any) -> bool:
183
183
  return False
184
184
 
185
185
 
186
- def post_check(options: list[Option]):
186
+ def post_check(options: list[Option], name: str):
187
187
  option_names = [option.name for option in options]
188
188
  duplicate_names = [name for name, count in Counter(option_names).items() if count > 1]
189
189
  if duplicate_names:
190
190
  raise ValueError(
191
- "the following field name were reused, all must be unique: {}".format(
192
- ", ".join(duplicate_names)
191
+ "[{}] the following field name were reused, all must be unique: {}".format(
192
+ name, ", ".join(duplicate_names)
193
193
  )
194
194
  )
195
195
 
@@ -218,5 +218,5 @@ def options_from_base_model(model: Union[BaseModel, Type[BaseModel]]) -> list[Op
218
218
  field_info.description = f"[sensitive] {field_info.description}"
219
219
  options.append(get_option_from_field(option_name=option_name, field_info=field_info))
220
220
 
221
- post_check(options=options)
221
+ post_check(options=options, name=model.__name__)
222
222
  return options
@@ -5,7 +5,7 @@ from uuid import NAMESPACE_DNS, uuid5
5
5
 
6
6
  from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
7
7
 
8
- from unstructured_ingest.v2.logger import logger
8
+ from unstructured_ingest.logger import logger
9
9
 
10
10
 
11
11
  class SourceIdentifiers(BaseModel):
@@ -13,15 +13,15 @@ from unstructured_ingest.embed.interfaces import (
13
13
  BaseEmbeddingEncoder,
14
14
  EmbeddingConfig,
15
15
  )
16
- from unstructured_ingest.logger import logger
17
- from unstructured_ingest.utils.dep_check import requires_dependencies
18
- from unstructured_ingest.v2.errors import (
16
+ from unstructured_ingest.errors_v2 import (
19
17
  ProviderError,
20
18
  RateLimitError,
21
19
  UserAuthError,
22
20
  UserError,
23
21
  is_internal_error,
24
22
  )
23
+ from unstructured_ingest.logger import logger
24
+ from unstructured_ingest.utils.dep_check import requires_dependencies
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from botocore.client import BaseClient