unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +1 -1
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
  279. unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,94 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig, Dict
8
- from unstructured_ingest.connector.delta_table import DeltaTableWriteConfig, SimpleDeltaTableConfig
9
-
10
- CMD_NAME = "delta-table"
11
-
12
-
13
- @dataclass
14
- class DeltaTableCliConfig(SimpleDeltaTableConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--table-uri"],
20
- required=True,
21
- help="the path of the DeltaTable",
22
- ),
23
- click.Option(
24
- ["--version"],
25
- default=None,
26
- type=int,
27
- help="version of the DeltaTable",
28
- ),
29
- click.Option(
30
- ["--storage_options"],
31
- required=False,
32
- type=Dict(),
33
- default=None,
34
- help="a dictionary of the options to use for the storage backend, "
35
- "passed in as a json string",
36
- ),
37
- click.Option(
38
- ["--without-files"],
39
- is_flag=True,
40
- default=False,
41
- help="If set, will load table without tracking files.",
42
- ),
43
- ]
44
- return options
45
-
46
-
47
- @dataclass
48
- class DeltaTableCliWriteConfig(DeltaTableWriteConfig, CliConfig):
49
- @staticmethod
50
- def get_cli_options() -> t.List[click.Option]:
51
- options = [
52
- click.Option(
53
- ["--overwrite-schema"],
54
- is_flag=True,
55
- default=False,
56
- help="Flag to overwrite schema of destination table",
57
- ),
58
- click.Option(
59
- ["--drop-empty-cols"],
60
- is_flag=True,
61
- default=False,
62
- help="Flag to drop any columns that have no content",
63
- ),
64
- click.Option(
65
- ["--mode"],
66
- default="error",
67
- type=click.Choice(["error", "append", "overwrite", "ignore"]),
68
- help="How to handle existing data. Default is to error if table already exists. "
69
- "If 'append', will add new data. "
70
- "If 'overwrite', will replace table with new data. "
71
- "If 'ignore', will not write anything if table already exists.",
72
- ),
73
- ]
74
- return options
75
-
76
-
77
- def get_base_src_cmd() -> BaseSrcCmd:
78
- cmd_cls = BaseSrcCmd(
79
- cmd_name=CMD_NAME,
80
- cli_config=DeltaTableCliConfig,
81
- )
82
- return cmd_cls
83
-
84
-
85
- def get_base_dest_cmd():
86
- from unstructured_ingest.cli.base.dest import BaseDestCmd
87
-
88
- cmd_cls = BaseDestCmd(
89
- cmd_name=CMD_NAME,
90
- cli_config=DeltaTableCliConfig,
91
- additional_cli_options=[DeltaTableCliWriteConfig],
92
- write_config=DeltaTableWriteConfig,
93
- )
94
- return cmd_cls
@@ -1,47 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- DelimitedString,
10
- )
11
- from unstructured_ingest.connector.discord import SimpleDiscordConfig
12
-
13
-
14
- @dataclass
15
- class DiscordCliConfig(SimpleDiscordConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--token"],
21
- required=True,
22
- help="Bot token used to access Discord API, must have "
23
- "READ_MESSAGE_HISTORY scope for the bot user",
24
- ),
25
- click.Option(
26
- ["--channels"],
27
- required=True,
28
- type=DelimitedString(),
29
- help="Comma-delimited list of discord channel ids to ingest from.",
30
- ),
31
- click.Option(
32
- ["--period"],
33
- default=None,
34
- type=click.IntRange(0),
35
- help="Number of days to go back in the history of "
36
- "discord channels, must be a number",
37
- ),
38
- ]
39
- return options
40
-
41
-
42
- def get_base_src_cmd() -> BaseSrcCmd:
43
- cmd_cls = BaseSrcCmd(
44
- cmd_name="discord",
45
- cli_config=DiscordCliConfig,
46
- )
47
- return cmd_cls
@@ -1,133 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
8
- from unstructured_ingest.connector.elasticsearch import (
9
- ElasticsearchWriteConfig,
10
- SimpleElasticsearchConfig,
11
- )
12
-
13
- CMD_NAME = "elasticsearch"
14
-
15
-
16
- @dataclass
17
- class ElasticsearchCliConfig(SimpleElasticsearchConfig, CliConfig):
18
- @staticmethod
19
- def get_cli_options() -> t.List[click.Option]:
20
- options = [
21
- click.Option(
22
- ["--index-name"],
23
- required=True,
24
- type=str,
25
- help="Name of the Elasticsearch index to pull data from, or upload data to.",
26
- ),
27
- click.Option(
28
- ["--hosts"],
29
- type=DelimitedString(),
30
- help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"',
31
- ),
32
- click.Option(
33
- ["--fields"],
34
- type=DelimitedString(),
35
- default=[],
36
- help="If provided, will limit the fields returned by Elasticsearch "
37
- "to this comma-delimited list",
38
- ),
39
- click.Option(
40
- ["--username"], type=str, default=None, help="username when using basic auth"
41
- ),
42
- click.Option(
43
- ["--password"],
44
- type=str,
45
- default=None,
46
- help="password when using basic auth or connecting to a cloud instance",
47
- ),
48
- click.Option(
49
- ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud"
50
- ),
51
- click.Option(
52
- ["--es-api-key"], type=str, default=None, help="api key used for authentication"
53
- ),
54
- click.Option(
55
- ["--api-key-id"],
56
- type=str,
57
- default=None,
58
- help="id associated with api key used for authentication: "
59
- "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html", # noqa: E501
60
- ),
61
- click.Option(
62
- ["--bearer-auth"],
63
- type=str,
64
- default=None,
65
- help="bearer token used for HTTP bearer authentication",
66
- ),
67
- click.Option(
68
- ["--ca-certs"],
69
- type=click.Path(),
70
- default=None,
71
- ),
72
- click.Option(
73
- ["--ssl-assert-fingerprint"],
74
- type=str,
75
- default=None,
76
- help="SHA256 fingerprint value",
77
- ),
78
- click.Option(
79
- ["--batch-size"],
80
- default=100,
81
- type=click.IntRange(0),
82
- help="how many records to read at a time per process",
83
- ),
84
- ]
85
- return options
86
-
87
-
88
- @dataclass
89
- class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig):
90
- @staticmethod
91
- def get_cli_options() -> t.List[click.Option]:
92
- options = [
93
- click.Option(
94
- ["--batch-size-bytes"],
95
- required=False,
96
- default=15_000_000,
97
- type=int,
98
- help="Size limit (in bytes) for each batch of items to be uploaded. Check"
99
- " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html"
100
- "#_how_big_is_too_big for more information.",
101
- ),
102
- click.Option(
103
- ["--num-processes"],
104
- required=False,
105
- default=1,
106
- type=int,
107
- help="Number of processes to be used while uploading content",
108
- ),
109
- ]
110
- return options
111
-
112
-
113
- def get_base_src_cmd() -> BaseSrcCmd:
114
- cmd_cls = BaseSrcCmd(
115
- cmd_name="elasticsearch",
116
- cli_config=ElasticsearchCliConfig,
117
- )
118
- return cmd_cls
119
-
120
-
121
- def get_base_dest_cmd():
122
- from unstructured_ingest.cli.base.dest import BaseDestCmd
123
-
124
- cmd_cls = BaseDestCmd(
125
- cmd_name="elasticsearch",
126
- cli_config=ElasticsearchCliConfig,
127
- additional_cli_options=[ElasticsearchCliWriteConfig],
128
- addition_configs={
129
- "connector_config": SimpleElasticsearchConfig,
130
- "write_config": ElasticsearchCliWriteConfig,
131
- },
132
- )
133
- return cmd_cls
@@ -1,94 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.fsspec.azure import (
11
- AzureWriteConfig,
12
- SimpleAzureBlobStorageConfig,
13
- )
14
-
15
- CMD_NAME = "azure"
16
-
17
-
18
- @dataclass
19
- class AzureCliConfig(SimpleAzureBlobStorageConfig, CliConfig):
20
- @staticmethod
21
- def get_cli_options() -> t.List[click.Option]:
22
- options = [
23
- click.Option(
24
- ["--account-key"],
25
- default=None,
26
- help="The storage account key. This is used for shared key "
27
- "authentication. If any of account key, sas token or "
28
- "client_id are not specified, anonymous access will be used.",
29
- ),
30
- click.Option(
31
- ["--account-name"],
32
- default=None,
33
- help="The storage account name. This is used to authenticate "
34
- "requests signed with an account key and to construct "
35
- "the storage endpoint. It is required unless a connection "
36
- "string is given, or if a custom domain is used with "
37
- "anonymous authentication.",
38
- ),
39
- click.Option(
40
- ["--connection-string"],
41
- default=None,
42
- help="If specified, this will override all other parameters. See "
43
- "http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ " # noqa: E501
44
- "for the connection string format.",
45
- ),
46
- click.Option(
47
- ["--sas_token"],
48
- default=None,
49
- help="A shared access signature token to use to authenticate "
50
- "requests instead of the account key. If account key and "
51
- "sas token are both specified, account key will be used "
52
- "to sign. If any of account key, sas token or client_id "
53
- "are not specified, anonymous access will be used.",
54
- ),
55
- ]
56
- return options
57
-
58
-
59
- @dataclass
60
- class AzureCliWriteConfig(AzureWriteConfig, CliConfig):
61
- @staticmethod
62
- def get_cli_options() -> t.List[click.Option]:
63
- options = [
64
- click.Option(
65
- ["--overwrite"],
66
- is_flag=True,
67
- default=False,
68
- show_default=True,
69
- help="If set, will overwrite content if content already exists",
70
- )
71
- ]
72
- return options
73
-
74
-
75
- def get_base_src_cmd() -> BaseSrcCmd:
76
- cmd_cls = BaseSrcCmd(
77
- cmd_name=CMD_NAME,
78
- cli_config=AzureCliConfig,
79
- is_fsspec=True,
80
- )
81
- return cmd_cls
82
-
83
-
84
- def get_base_dest_cmd():
85
- from unstructured_ingest.cli.base.dest import BaseDestCmd
86
-
87
- cmd_cls = BaseDestCmd(
88
- cmd_name=CMD_NAME,
89
- cli_config=AzureCliConfig,
90
- write_config=AzureCliWriteConfig,
91
- is_fsspec=True,
92
- additional_cli_options=[AzureCliWriteConfig],
93
- )
94
- return cmd_cls
@@ -1,48 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
11
-
12
- CMD_NAME = "box"
13
-
14
-
15
- @dataclass
16
- class BoxCliConfig(SimpleBoxConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--box-app-config"],
22
- default=None,
23
- type=click.Path(),
24
- help="Path to Box app credentials as json file.",
25
- ),
26
- ]
27
- return options
28
-
29
-
30
- def get_base_src_cmd() -> BaseSrcCmd:
31
- cmd_cls = BaseSrcCmd(
32
- cmd_name=CMD_NAME,
33
- cli_config=BoxCliConfig,
34
- is_fsspec=True,
35
- )
36
- return cmd_cls
37
-
38
-
39
- def get_base_dest_cmd():
40
- from unstructured_ingest.cli.base.dest import BaseDestCmd
41
-
42
- cmd_cls = BaseDestCmd(
43
- cmd_name=CMD_NAME,
44
- cli_config=BoxCliConfig,
45
- write_config=BoxWriteConfig,
46
- is_fsspec=True,
47
- )
48
- return cmd_cls
@@ -1,51 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.fsspec.dropbox import (
11
- DropboxWriteConfig,
12
- SimpleDropboxConfig,
13
- )
14
-
15
- CMD_NAME = "dropbox"
16
-
17
-
18
- @dataclass
19
- class DropboxCliConfig(SimpleDropboxConfig, CliConfig):
20
- @staticmethod
21
- def get_cli_options() -> t.List[click.Option]:
22
- options = [
23
- click.Option(
24
- ["--token"],
25
- required=True,
26
- type=str,
27
- help="Dropbox access token.",
28
- ),
29
- ]
30
- return options
31
-
32
-
33
- def get_base_src_cmd() -> BaseSrcCmd:
34
- cmd_cls = BaseSrcCmd(
35
- cmd_name=CMD_NAME,
36
- cli_config=DropboxCliConfig,
37
- is_fsspec=True,
38
- )
39
- return cmd_cls
40
-
41
-
42
- def get_base_dest_cmd():
43
- from unstructured_ingest.cli.base.dest import BaseDestCmd
44
-
45
- cmd_cls = BaseDestCmd(
46
- cmd_name=CMD_NAME,
47
- cli_config=DropboxCliConfig,
48
- write_config=DropboxWriteConfig,
49
- is_fsspec=True,
50
- )
51
- return cmd_cls
@@ -1,15 +0,0 @@
1
- from unstructured_ingest.cli.base.src import BaseSrcCmd
2
-
3
- CMD_NAME = "fsspec"
4
-
5
-
6
- def get_base_src_cmd() -> BaseSrcCmd:
7
- cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, is_fsspec=True)
8
- return cmd_cls
9
-
10
-
11
- def get_base_dest_cmd():
12
- from unstructured_ingest.cli.base.dest import BaseDestCmd
13
-
14
- cmd_cls = BaseDestCmd(cmd_name=CMD_NAME, is_fsspec=True)
15
- return cmd_cls
@@ -1,71 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- FileOrJson,
10
- )
11
- from unstructured_ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
12
-
13
- CMD_NAME = "gcs"
14
-
15
-
16
- @dataclass
17
- class GcsCliConfig(SimpleGcsConfig, CliConfig):
18
- @staticmethod
19
- def get_cli_options() -> t.List[click.Option]:
20
- help_string = """
21
- Options:
22
- - ``None``, GCSFS will attempt to guess your credentials in the
23
- following order: gcloud CLI default, gcsfs cached token, google compute
24
- metadata service, anonymous.
25
- - ``'google_default'``, your default gcloud credentials will be used,
26
- which are typically established by doing ``gcloud login`` in a terminal.
27
- - ``'cache'``, credentials from previously successful gcsfs
28
- authentication will be used (use this after "browser" auth succeeded)
29
- - ``'anon'``, no authentication is performed, and you can only
30
- access data which is accessible to allUsers (in this case, the project and
31
- access level parameters are meaningless)
32
- - ``'browser'``, you get an access code with which you can
33
- authenticate via a specially provided URL
34
- - if ``'cloud'``, we assume we are running within google compute
35
- or google container engine, and query the internal metadata directly for
36
- a token.
37
- - you may supply a token generated by the
38
- [gcloud](https://cloud.google.com/sdk/docs/)
39
- utility; this is either a python dictionary or the name of a file
40
- containing the JSON returned by logging in with the gcloud CLI tool.
41
- """
42
- options = [
43
- click.Option(
44
- ["--service-account-key"],
45
- default=None,
46
- type=FileOrJson(allow_raw_str=True),
47
- help=help_string,
48
- ),
49
- ]
50
- return options
51
-
52
-
53
- def get_base_src_cmd() -> BaseSrcCmd:
54
- cmd_cls = BaseSrcCmd(
55
- cmd_name=CMD_NAME,
56
- cli_config=GcsCliConfig,
57
- is_fsspec=True,
58
- )
59
- return cmd_cls
60
-
61
-
62
- def get_base_dest_cmd():
63
- from unstructured_ingest.cli.base.dest import BaseDestCmd
64
-
65
- cmd_cls = BaseDestCmd(
66
- cmd_name=CMD_NAME,
67
- cli_config=GcsCliConfig,
68
- write_config=GcsWriteConfig,
69
- is_fsspec=True,
70
- )
71
- return cmd_cls
@@ -1,74 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
11
-
12
- CMD_NAME = "s3"
13
-
14
-
15
- @dataclass
16
- class S3CliConfig(SimpleS3Config, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--anonymous"],
22
- is_flag=True,
23
- default=False,
24
- help="Connect to s3 without local AWS credentials.",
25
- ),
26
- click.Option(
27
- ["--endpoint-url"],
28
- type=str,
29
- default=None,
30
- help="Use this endpoint_url, if specified. Needed for "
31
- "connecting to non-AWS S3 buckets.",
32
- ),
33
- click.Option(
34
- ["--key"],
35
- type=str,
36
- default=None,
37
- help="If not anonymous, use this access key ID, if specified. Takes precedence "
38
- "over `aws_access_key_id` in client_kwargs.",
39
- ),
40
- click.Option(
41
- ["--secret"],
42
- type=str,
43
- default=None,
44
- help="If not anonymous, use this secret access key, if specified.",
45
- ),
46
- click.Option(
47
- ["--token"],
48
- type=str,
49
- default=None,
50
- help="If not anonymous, use this security token, if specified.",
51
- ),
52
- ]
53
- return options
54
-
55
-
56
- def get_base_src_cmd():
57
- cmd_cls = BaseSrcCmd(
58
- cmd_name=CMD_NAME,
59
- cli_config=S3CliConfig,
60
- is_fsspec=True,
61
- )
62
- return cmd_cls
63
-
64
-
65
- def get_base_dest_cmd():
66
- from unstructured_ingest.cli.base.dest import BaseDestCmd
67
-
68
- cmd_cls = BaseDestCmd(
69
- cmd_name=CMD_NAME,
70
- cli_config=S3CliConfig,
71
- write_config=S3WriteConfig,
72
- is_fsspec=True,
73
- )
74
- return cmd_cls
@@ -1,58 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.fsspec.sftp import SimpleSftpConfig
11
-
12
- CMD_NAME = "sftp"
13
-
14
-
15
- @dataclass
16
- class SftpCliConfig(SimpleSftpConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--username"],
22
- required=True,
23
- type=str,
24
- help="Username for sftp connection",
25
- ),
26
- click.Option(
27
- ["--password"],
28
- required=True,
29
- type=str,
30
- help="Password for sftp connection",
31
- ),
32
- click.Option(
33
- ["--look-for-keys"],
34
- required=False,
35
- default=False,
36
- is_flag=True,
37
- type=bool,
38
- help="Whether to search for private key files in ~/.ssh/",
39
- ),
40
- click.Option(
41
- ["--allow-agent"],
42
- required=False,
43
- default=False,
44
- is_flag=True,
45
- type=bool,
46
- help="Whether to connect to the SSH agent.",
47
- ),
48
- ]
49
- return options
50
-
51
-
52
- def get_base_src_cmd() -> BaseSrcCmd:
53
- cmd_cls = BaseSrcCmd(
54
- cmd_name=CMD_NAME,
55
- cli_config=SftpCliConfig,
56
- is_fsspec=True,
57
- )
58
- return cmd_cls