unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +1 -1
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
  279. unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,25 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
5
- from unstructured_ingest.interfaces import BaseDestinationConnector
6
- from unstructured_ingest.runner.writers.base_writer import Writer
7
-
8
- if t.TYPE_CHECKING:
9
- from unstructured_ingest.connector.databricks_volumes import (
10
- DatabricksVolumesWriteConfig,
11
- SimpleDatabricksVolumesConfig,
12
- )
13
-
14
-
15
- @dataclass
16
- class DatabricksVolumesWriter(Writer, EnhancedDataClassJsonMixin):
17
- write_config: "DatabricksVolumesWriteConfig"
18
- connector_config: "SimpleDatabricksVolumesConfig"
19
-
20
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
21
- from unstructured_ingest.connector.databricks_volumes import (
22
- DatabricksVolumesDestinationConnector,
23
- )
24
-
25
- return DatabricksVolumesDestinationConnector
@@ -1,24 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.delta_table import (
9
- DeltaTableWriteConfig,
10
- SimpleDeltaTableConfig,
11
- )
12
-
13
-
14
- @dataclass
15
- class DeltaTableWriter(Writer):
16
- write_config: "DeltaTableWriteConfig"
17
- connector_config: "SimpleDeltaTableConfig"
18
-
19
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
20
- from unstructured_ingest.connector.delta_table import (
21
- DeltaTableDestinationConnector,
22
- )
23
-
24
- return DeltaTableDestinationConnector
@@ -1,24 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.elasticsearch import (
9
- ElasticsearchWriteConfig,
10
- SimpleElasticsearchConfig,
11
- )
12
-
13
-
14
- @dataclass
15
- class ElasticsearchWriter(Writer):
16
- connector_config: "SimpleElasticsearchConfig"
17
- write_config: "ElasticsearchWriteConfig"
18
-
19
- def get_connector_cls(self) -> BaseDestinationConnector:
20
- from unstructured_ingest.connector.elasticsearch import (
21
- ElasticsearchDestinationConnector,
22
- )
23
-
24
- return ElasticsearchDestinationConnector
@@ -1,24 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.fsspec.azure import (
9
- AzureWriteConfig,
10
- SimpleAzureBlobStorageConfig,
11
- )
12
-
13
-
14
- @dataclass
15
- class AzureWriter(Writer):
16
- connector_config: "SimpleAzureBlobStorageConfig"
17
- write_config: "AzureWriteConfig"
18
-
19
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
20
- from unstructured_ingest.connector.fsspec.azure import (
21
- AzureBlobStorageDestinationConnector,
22
- )
23
-
24
- return AzureBlobStorageDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
9
-
10
-
11
- @dataclass
12
- class BoxWriter(Writer):
13
- connector_config: "SimpleBoxConfig"
14
- write_config: "BoxWriteConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.fsspec.box import (
18
- BoxDestinationConnector,
19
- )
20
-
21
- return BoxDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.fsspec.dropbox import DropboxWriteConfig, SimpleDropboxConfig
9
-
10
-
11
- @dataclass
12
- class DropboxWriter(Writer):
13
- connector_config: "SimpleDropboxConfig"
14
- write_config: "DropboxWriteConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.fsspec.dropbox import (
18
- DropboxDestinationConnector,
19
- )
20
-
21
- return DropboxDestinationConnector
@@ -1,19 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
9
-
10
-
11
- @dataclass
12
- class GcsWriter(Writer):
13
- connector_config: "SimpleGcsConfig"
14
- write_config: "GcsWriteConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.fsspec.gcs import GcsDestinationConnector
18
-
19
- return GcsDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
9
-
10
-
11
- @dataclass
12
- class S3Writer(Writer):
13
- connector_config: "SimpleS3Config"
14
- write_config: "S3WriteConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.fsspec.s3 import (
18
- S3DestinationConnector,
19
- )
20
-
21
- return S3DestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig
9
-
10
-
11
- @dataclass
12
- class KafkaWriter(Writer):
13
- write_config: "KafkaWriteConfig"
14
- connector_config: "SimpleKafkaConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.kafka import (
18
- KafkaDestinationConnector,
19
- )
20
-
21
- return KafkaDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.mongodb import MongoDBWriteConfig, SimpleMongoDBConfig
9
-
10
-
11
- @dataclass
12
- class MongodbWriter(Writer):
13
- write_config: "MongoDBWriteConfig"
14
- connector_config: "SimpleMongoDBConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.mongodb import (
18
- MongoDBDestinationConnector,
19
- )
20
-
21
- return MongoDBDestinationConnector
@@ -1,26 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.elasticsearch import (
9
- ElasticsearchWriteConfig,
10
- )
11
- from unstructured_ingest.connector.opensearch import (
12
- SimpleOpenSearchConfig,
13
- )
14
-
15
-
16
- @dataclass
17
- class OpenSearchWriter(Writer):
18
- connector_config: "SimpleOpenSearchConfig"
19
- write_config: "ElasticsearchWriteConfig"
20
-
21
- def get_connector_cls(self) -> BaseDestinationConnector:
22
- from unstructured_ingest.connector.opensearch import (
23
- OpenSearchDestinationConnector,
24
- )
25
-
26
- return OpenSearchDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig
9
-
10
-
11
- @dataclass
12
- class PineconeWriter(Writer):
13
- write_config: "PineconeWriteConfig"
14
- connector_config: "SimplePineconeConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.pinecone import (
18
- PineconeDestinationConnector,
19
- )
20
-
21
- return PineconeDestinationConnector
@@ -1,19 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig
9
-
10
-
11
- @dataclass
12
- class QdrantWriter(Writer):
13
- write_config: "QdrantWriteConfig"
14
- connector_config: "SimpleQdrantConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.qdrant import QdrantDestinationConnector
18
-
19
- return QdrantDestinationConnector
@@ -1,22 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.sql import SimpleSqlConfig
9
- from unstructured_ingest.interfaces import WriteConfig
10
-
11
-
12
- @dataclass
13
- class SqlWriter(Writer):
14
- write_config: "WriteConfig"
15
- connector_config: "SimpleSqlConfig"
16
-
17
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
18
- from unstructured_ingest.connector.sql import (
19
- SqlDestinationConnector,
20
- )
21
-
22
- return SqlDestinationConnector
@@ -1,22 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
5
- from unstructured_ingest.interfaces import BaseDestinationConnector
6
- from unstructured_ingest.runner.writers.base_writer import Writer
7
-
8
- if t.TYPE_CHECKING:
9
- from unstructured_ingest.connector.vectara import SimpleVectaraConfig, VectaraWriteConfig
10
-
11
-
12
- @dataclass
13
- class VectaraWriter(Writer, EnhancedDataClassJsonMixin):
14
- write_config: "VectaraWriteConfig"
15
- connector_config: "SimpleVectaraConfig"
16
-
17
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
18
- from unstructured_ingest.connector.vectara import (
19
- VectaraDestinationConnector,
20
- )
21
-
22
- return VectaraDestinationConnector
@@ -1,21 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- from unstructured_ingest.interfaces import BaseDestinationConnector
5
- from unstructured_ingest.runner.writers.base_writer import Writer
6
-
7
- if t.TYPE_CHECKING:
8
- from unstructured_ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig
9
-
10
-
11
- @dataclass
12
- class WeaviateWriter(Writer):
13
- write_config: "WeaviateWriteConfig"
14
- connector_config: "SimpleWeaviateConfig"
15
-
16
- def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
- from unstructured_ingest.connector.weaviate import (
18
- WeaviateDestinationConnector,
19
- )
20
-
21
- return WeaviateDestinationConnector
@@ -1,9 +0,0 @@
1
- GOOGLE_DRIVE_EXPORT_TYPES = {
2
- "application/vnd.google-apps.document": "application/"
3
- "vnd.openxmlformats-officedocument.wordprocessingml.document",
4
- "application/vnd.google-apps.spreadsheet": "application/"
5
- "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
6
- "application/vnd.google-apps.presentation": "application/"
7
- "vnd.openxmlformats-officedocument.presentationml.presentation",
8
- "application/vnd.google-apps.photo": "image/jpeg",
9
- }
@@ -1 +0,0 @@
1
- from __future__ import annotations
File without changes
@@ -1,4 +0,0 @@
1
- from .dest import DestCmd
2
- from .src import SrcCmd
3
-
4
- __all__ = ["SrcCmd", "DestCmd"]
@@ -1,269 +0,0 @@
1
- import inspect
2
- from abc import ABC, abstractmethod
3
- from collections import Counter
4
- from dataclasses import dataclass, field, fields
5
- from typing import Any, Optional, Type, TypeVar
6
-
7
- import click
8
- from pydantic import BaseModel
9
-
10
- from unstructured_ingest.v2.cli.base.importer import import_from_string
11
- from unstructured_ingest.v2.cli.utils.click import extract_config
12
- from unstructured_ingest.v2.cli.utils.model_conversion import options_from_base_model, post_check
13
- from unstructured_ingest.v2.interfaces import ProcessorConfig
14
- from unstructured_ingest.v2.logger import logger
15
- from unstructured_ingest.v2.pipeline.pipeline import Pipeline
16
- from unstructured_ingest.v2.processes.chunker import Chunker, ChunkerConfig
17
- from unstructured_ingest.v2.processes.connector_registry import (
18
- DownloaderT,
19
- IndexerT,
20
- RegistryEntry,
21
- UploaderT,
22
- UploadStager,
23
- UploadStagerConfig,
24
- UploadStagerT,
25
- destination_registry,
26
- source_registry,
27
- )
28
- from unstructured_ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig
29
- from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
30
- from unstructured_ingest.v2.processes.filter import Filterer, FiltererConfig
31
- from unstructured_ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
32
-
33
- CommandT = TypeVar("CommandT", bound=click.Command)
34
-
35
-
36
- @dataclass
37
- class BaseCmd(ABC):
38
- cmd_name: str
39
- registry_entry: RegistryEntry
40
- default_configs: list[Type[BaseModel]] = field(default_factory=list)
41
-
42
- @abstractmethod
43
- def get_registry_options(self):
44
- pass
45
-
46
- def get_default_options(self) -> list[click.Option]:
47
- options = []
48
- for extra in self.default_configs:
49
- options.extend(options_from_base_model(model=extra))
50
- return options
51
-
52
- @classmethod
53
- def consolidate_options(cls, options: list[click.Option]) -> list[click.Option]:
54
- option_names = [option.name for option in options]
55
- duplicate_names = [name for name, count in Counter(option_names).items() if count > 1]
56
- if not duplicate_names:
57
- return options
58
- consolidated_options = []
59
- current_names = []
60
- for option in options:
61
- if option.name not in current_names:
62
- current_names.append(option.name)
63
- consolidated_options.append(option)
64
- continue
65
- existing_option = next(o for o in consolidated_options if o.name == option.name)
66
- if existing_option.__dict__ == option.__dict__:
67
- continue
68
- option_diff = cls.get_options_diff(o1=option, o2=existing_option)
69
- raise ValueError(
70
- "Conflicting duplicate {} option defined: {}".format(
71
- option.name, " | ".join([f"{d[0]}: {d[1]}" for d in option_diff])
72
- )
73
- )
74
- return consolidated_options
75
-
76
- @staticmethod
77
- def get_options_diff(o1: click.Option, o2: click.Option):
78
- o1_dict = o1.__dict__
79
- o2_dict = o2.__dict__
80
- for d in [o1_dict, o2_dict]:
81
- d["opts"] = ",".join(d["opts"])
82
- d["secondary_opts"] = ",".join(d["secondary_opts"])
83
- option_diff = set(o1_dict.items()) ^ set(o2_dict.items())
84
- return option_diff
85
-
86
- @property
87
- def cmd_name_key(self):
88
- return self.cmd_name.replace("-", "_")
89
-
90
- @property
91
- def cli_cmd_name(self):
92
- return self.cmd_name.replace("_", "-")
93
-
94
- @abstractmethod
95
- def cmd(self, ctx: click.Context, **options) -> None:
96
- pass
97
-
98
- def add_options(self, cmd: CommandT) -> CommandT:
99
- options = self.get_registry_options()
100
- options.extend(self.get_default_options())
101
- post_check(options)
102
- cmd.params.extend(options)
103
- return cmd
104
-
105
- def get_pipeline(
106
- self,
107
- src: str,
108
- source_options: dict[str, Any],
109
- dest: Optional[str] = None,
110
- destination_options: Optional[dict[str, Any]] = None,
111
- ) -> Pipeline:
112
- logger.debug(
113
- f"creating pipeline from cli using source {src} with options: {source_options}"
114
- )
115
- pipeline_kwargs: dict[str, Any] = {
116
- "context": self.get_processor_config(options=source_options),
117
- "downloader": self.get_downloader(src=src, options=source_options),
118
- "indexer": self.get_indexer(src=src, options=source_options),
119
- "partitioner": self.get_partitioner(options=source_options),
120
- }
121
- if chunker := self.get_chunker(options=source_options):
122
- pipeline_kwargs["chunker"] = chunker
123
- if filterer := self.get_filterer(options=source_options):
124
- pipeline_kwargs["filterer"] = filterer
125
- if embedder := self.get_embedder(options=source_options):
126
- pipeline_kwargs["embedder"] = embedder
127
- if dest:
128
- logger.debug(
129
- f"setting destination on pipeline {dest} with options: {destination_options}"
130
- )
131
- if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options):
132
- pipeline_kwargs["stager"] = uploader_stager
133
- pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options)
134
- else:
135
- # Default to local uploader
136
- # TODO remove after v1 no longer supported
137
- destination_options = destination_options or {}
138
- if "output_dir" not in destination_options:
139
- destination_options["output_dir"] = source_options["output_dir"]
140
- pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options)
141
- return Pipeline(**pipeline_kwargs)
142
-
143
- @staticmethod
144
- def get_default_uploader(options: dict[str, Any]) -> UploaderT:
145
- uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig)
146
- return LocalUploader(upload_config=uploader_config)
147
-
148
- @staticmethod
149
- def get_chunker(options: dict[str, Any]) -> Optional[Chunker]:
150
- chunker_config = extract_config(flat_data=options, config=ChunkerConfig)
151
- if not chunker_config.chunking_strategy:
152
- return None
153
- return Chunker(config=chunker_config)
154
-
155
- @staticmethod
156
- def get_filterer(options: dict[str, Any]) -> Optional[Filterer]:
157
- filterer_configs = extract_config(flat_data=options, config=FiltererConfig)
158
- if not filterer_configs.model_dump():
159
- return None
160
- return Filterer(config=filterer_configs)
161
-
162
- @staticmethod
163
- def get_embedder(options: dict[str, Any]) -> Optional[Embedder]:
164
- embedder_config = extract_config(flat_data=options, config=EmbedderConfig)
165
- if not embedder_config.embedding_provider:
166
- return None
167
- return Embedder(config=embedder_config)
168
-
169
- @staticmethod
170
- def get_partitioner(options: dict[str, Any]) -> Partitioner:
171
- partitioner_config = extract_config(flat_data=options, config=PartitionerConfig)
172
- return Partitioner(config=partitioner_config)
173
-
174
- @staticmethod
175
- def get_processor_config(options: dict[str, Any]) -> ProcessorConfig:
176
- return extract_config(flat_data=options, config=ProcessorConfig)
177
-
178
- @staticmethod
179
- def get_indexer(src: str, options: dict[str, Any]) -> IndexerT:
180
- source_entry = source_registry[src]
181
- indexer_kwargs: dict[str, Any] = {}
182
- if indexer_config_cls := source_entry.indexer_config:
183
- indexer_kwargs["index_config"] = extract_config(
184
- flat_data=options, config=indexer_config_cls
185
- )
186
- if connection_config_cls := source_entry.connection_config:
187
- indexer_kwargs["connection_config"] = extract_config(
188
- flat_data=options, config=connection_config_cls
189
- )
190
- indexer_cls = source_entry.indexer
191
- return indexer_cls(**indexer_kwargs)
192
-
193
- @staticmethod
194
- def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT:
195
- source_entry = source_registry[src]
196
- downloader_kwargs: dict[str, Any] = {}
197
- if downloader_config_cls := source_entry.downloader_config:
198
- downloader_kwargs["download_config"] = extract_config(
199
- flat_data=options, config=downloader_config_cls
200
- )
201
- if connection_config_cls := source_entry.connection_config:
202
- downloader_kwargs["connection_config"] = extract_config(
203
- flat_data=options, config=connection_config_cls
204
- )
205
- downloader_cls = source_entry.downloader
206
- return downloader_cls(**downloader_kwargs)
207
-
208
- @staticmethod
209
- def get_custom_stager(
210
- stager_reference: str, stager_config_kwargs: Optional[dict] = None
211
- ) -> Optional[UploadStagerT]:
212
- uploader_cls = import_from_string(stager_reference)
213
- if not inspect.isclass(uploader_cls):
214
- raise ValueError(
215
- f"custom stager must be a reference to a python class, got: {type(uploader_cls)}"
216
- )
217
- if not issubclass(uploader_cls, UploadStager):
218
- raise ValueError(
219
- "custom stager must be an implementation of the UploadStager interface"
220
- )
221
- fields_dict = {f.name: f.type for f in fields(uploader_cls)}
222
- upload_stager_config_cls = fields_dict["upload_stager_config"]
223
- if not inspect.isclass(upload_stager_config_cls):
224
- raise ValueError(
225
- f"custom stager config must be a class, got: {type(upload_stager_config_cls)}"
226
- )
227
- if not issubclass(upload_stager_config_cls, UploadStagerConfig):
228
- raise ValueError(
229
- "custom stager config must be an implementation "
230
- "of the UploadStagerUploadStagerConfig interface"
231
- )
232
- upload_stager_kwargs: dict[str, Any] = {}
233
- if stager_config_kwargs:
234
- upload_stager_kwargs["upload_stager_config"] = upload_stager_config_cls(
235
- **stager_config_kwargs
236
- )
237
- return uploader_cls(**upload_stager_kwargs)
238
-
239
- @staticmethod
240
- def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]:
241
- if custom_stager := options.get("custom_stager"):
242
- return BaseCmd.get_custom_stager(
243
- stager_reference=custom_stager,
244
- stager_config_kwargs=options.get("custom_stager_config_kwargs"),
245
- )
246
- dest_entry = destination_registry[dest]
247
- upload_stager_kwargs: dict[str, Any] = {}
248
- if upload_stager_config_cls := dest_entry.upload_stager_config:
249
- upload_stager_kwargs["upload_stager_config"] = extract_config(
250
- flat_data=options, config=upload_stager_config_cls
251
- )
252
- if upload_stager_cls := dest_entry.upload_stager:
253
- return upload_stager_cls(**upload_stager_kwargs)
254
- return None
255
-
256
- @staticmethod
257
- def get_uploader(dest, options: dict[str, Any]) -> UploaderT:
258
- dest_entry = destination_registry[dest]
259
- uploader_kwargs: dict[str, Any] = {}
260
- if uploader_config_cls := dest_entry.uploader_config:
261
- uploader_kwargs["upload_config"] = extract_config(
262
- flat_data=options, config=uploader_config_cls
263
- )
264
- if connection_config_cls := dest_entry.connection_config:
265
- uploader_kwargs["connection_config"] = extract_config(
266
- flat_data=options, config=connection_config_cls
267
- )
268
- uploader_cls = dest_entry.uploader
269
- return uploader_cls(**uploader_kwargs)