unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +49 -0
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/processes/connectors/github.py +221 -0
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
  279. unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
@@ -1,117 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.cmds.elasticsearch import ElasticsearchCliWriteConfig
8
- from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
9
- from unstructured_ingest.connector.opensearch import SimpleOpenSearchConfig
10
-
11
- CMD_NAME = "opensearch"
12
-
13
-
14
- @dataclass
15
- class OpenSearchCliConfig(SimpleOpenSearchConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--index-name"],
21
- required=True,
22
- type=str,
23
- help="Name of the OpenSearch index to pull data from, or upload data to.",
24
- ),
25
- click.Option(
26
- ["--hosts"],
27
- type=DelimitedString(),
28
- help='List of the OpenSearch hosts to connect to, e.g. "http://localhost:9200"',
29
- ),
30
- click.Option(
31
- ["--fields"],
32
- type=DelimitedString(),
33
- default=[],
34
- help="If provided, will limit the fields returned by OpenSearch "
35
- "to this comma-delimited list",
36
- ),
37
- click.Option(
38
- ["--username"], type=str, default=None, help="username when using basic auth"
39
- ),
40
- click.Option(
41
- ["--password"],
42
- type=str,
43
- default=None,
44
- help="password when using basic auth",
45
- ),
46
- click.Option(
47
- ["--use-ssl"],
48
- type=bool,
49
- default=False,
50
- is_flag=True,
51
- help="use ssl for the connection",
52
- ),
53
- click.Option(
54
- ["--verify-certs"],
55
- type=bool,
56
- default=False,
57
- is_flag=True,
58
- help="whether to verify SSL certificates",
59
- ),
60
- click.Option(
61
- ["--ssl-show-warn"],
62
- type=bool,
63
- default=False,
64
- is_flag=True,
65
- help="show warning when verify certs is disabled",
66
- ),
67
- click.Option(
68
- ["--ca-certs"],
69
- type=click.Path(),
70
- default=None,
71
- help="path to CA bundle",
72
- ),
73
- click.Option(
74
- ["--client-cert"],
75
- type=click.Path(),
76
- default=None,
77
- help="path to the file containing the private key and the certificate,"
78
- " or cert only if using client_key",
79
- ),
80
- click.Option(
81
- ["--client-key"],
82
- type=click.Path(),
83
- default=None,
84
- help="path to the file containing the private key"
85
- " if using separate cert and key files",
86
- ),
87
- click.Option(
88
- ["--batch-size"],
89
- default=100,
90
- type=click.IntRange(0),
91
- help="how many records to read at a time per process",
92
- ),
93
- ]
94
- return options
95
-
96
-
97
- def get_base_src_cmd() -> BaseSrcCmd:
98
- cmd_cls = BaseSrcCmd(
99
- cmd_name="opensearch",
100
- cli_config=OpenSearchCliConfig,
101
- )
102
- return cmd_cls
103
-
104
-
105
- def get_base_dest_cmd():
106
- from unstructured_ingest.cli.base.dest import BaseDestCmd
107
-
108
- cmd_cls = BaseDestCmd(
109
- cmd_name="opensearch",
110
- cli_config=OpenSearchCliConfig,
111
- additional_cli_options=[ElasticsearchCliWriteConfig],
112
- addition_configs={
113
- "connector_config": SimpleOpenSearchConfig,
114
- "write_config": ElasticsearchCliWriteConfig,
115
- },
116
- )
117
- return cmd_cls
@@ -1,67 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- DelimitedString,
11
- )
12
- from unstructured_ingest.connector.outlook import SimpleOutlookConfig
13
-
14
-
15
- @dataclass
16
- class OutlookCliConfig(SimpleOutlookConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--client-id"],
22
- required=True,
23
- type=str,
24
- help="Microsoft app client ID",
25
- ),
26
- click.Option(
27
- ["--user-email"],
28
- required=True,
29
- type=str,
30
- help="Outlook email to download messages from.",
31
- ),
32
- click.Option(
33
- ["--tenant"],
34
- default="common",
35
- help="ID or domain name associated with your Azure AD instance",
36
- ),
37
- click.Option(
38
- ["--outlook-folders"],
39
- default=None,
40
- type=DelimitedString(),
41
- help="Folders to download email messages from. "
42
- "Do not specify subfolders. Use quotes if spaces in folder names.",
43
- ),
44
- click.Option(
45
- ["--client-cred"],
46
- default=None,
47
- type=str,
48
- help="Microsoft App client secret",
49
- ),
50
- click.Option(
51
- ["--authority-url"],
52
- default="https://login.microsoftonline.com",
53
- type=str,
54
- help="Authentication token provider for Microsoft apps, default is "
55
- "https://login.microsoftonline.com",
56
- ),
57
- ]
58
- return options
59
-
60
-
61
- def get_base_src_cmd() -> BaseSrcCmd:
62
- cmd_cls = BaseSrcCmd(
63
- cmd_name="outlook",
64
- cli_config=OutlookCliConfig,
65
- additional_cli_options=[CliRecursiveConfig],
66
- )
67
- return cmd_cls
@@ -1,71 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import (
7
- CliConfig,
8
- )
9
- from unstructured_ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig
10
-
11
-
12
- @dataclass
13
- class PineconeCliConfig(SimplePineconeConfig, CliConfig):
14
- @staticmethod
15
- def get_cli_options() -> t.List[click.Option]:
16
- options = [
17
- click.Option(
18
- ["--api-key"],
19
- required=True,
20
- type=str,
21
- help="API key used for authenticating to a Pinecone instance.",
22
- envvar="PINECONE_API_KEY",
23
- show_envvar=True,
24
- ),
25
- click.Option(
26
- ["--index-name"],
27
- required=True,
28
- type=str,
29
- help="The name of the pinecone index to connect to.",
30
- ),
31
- click.Option(
32
- ["--environment"],
33
- required=True,
34
- type=str,
35
- help="The environment where the index lives. Eg. 'gcp-starter' or 'us-east1-gcp'",
36
- ),
37
- ]
38
- return options
39
-
40
-
41
- @dataclass
42
- class PineconeCliWriteConfig(PineconeWriteConfig, CliConfig):
43
- @staticmethod
44
- def get_cli_options() -> t.List[click.Option]:
45
- options = [
46
- click.Option(
47
- ["--batch-size"],
48
- default=50,
49
- type=int,
50
- help="Number of records per batch",
51
- ),
52
- click.Option(
53
- ["--num-processes"],
54
- default=2,
55
- type=int,
56
- help="Number of parallel processes with which to upload elements",
57
- ),
58
- ]
59
- return options
60
-
61
-
62
- def get_base_dest_cmd():
63
- from unstructured_ingest.cli.base.dest import BaseDestCmd
64
-
65
- cmd_cls = BaseDestCmd(
66
- cmd_name="pinecone",
67
- cli_config=PineconeCliConfig,
68
- additional_cli_options=[PineconeCliWriteConfig],
69
- write_config=PineconeWriteConfig,
70
- )
71
- return cmd_cls
@@ -1,124 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import (
7
- CliConfig,
8
- )
9
- from unstructured_ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig
10
-
11
-
12
- @dataclass
13
- class QdrantCliConfig(SimpleQdrantConfig, CliConfig):
14
- @staticmethod
15
- def get_cli_options() -> t.List[click.Option]:
16
- options = [
17
- click.Option(
18
- ["--collection-name"],
19
- required=True,
20
- type=str,
21
- help="The name of the Qdrant collection to use.",
22
- ),
23
- click.Option(
24
- ["--location"],
25
- type=str,
26
- help="The location of the Qdrant cluster.",
27
- ),
28
- click.Option(
29
- ["--url"],
30
- type=str,
31
- help="The location of the Qdrant cluster.",
32
- ),
33
- click.Option(
34
- ["--port"],
35
- type=int,
36
- default=6333,
37
- help="Port of the REST API interface. Default: 6333.",
38
- ),
39
- click.Option(
40
- ["--grpc-port"],
41
- type=int,
42
- default=6334,
43
- help="Port of the gRPC interface. Default: 6334.",
44
- ),
45
- click.Option(
46
- ["--prefer-grpc"],
47
- type=bool,
48
- is_flag=True,
49
- help="Whether to use gPRC interface whenever possible in methods. Default: False.",
50
- ),
51
- click.Option(
52
- ["--https"],
53
- type=bool,
54
- is_flag=True,
55
- help="Whether to use HTTPS(SSL) protocol. Default: False.",
56
- ),
57
- click.Option(
58
- ["--prefix"],
59
- type=str,
60
- help="Prefix to add the REST API endpoints.",
61
- ),
62
- click.Option(
63
- ["--timeout"],
64
- type=int,
65
- help="Timeout for operations. Default: 5.0 seconds for REST, unlimited for gRPC.",
66
- ),
67
- click.Option(
68
- ["--host"],
69
- type=str,
70
- help="Host name of the Qdrant service.",
71
- ),
72
- click.Option(
73
- ["--path"],
74
- type=str,
75
- help="Persistence path for QdrantLocal.",
76
- ),
77
- click.Option(
78
- ["--force-disable-check-same-thread"],
79
- type=bool,
80
- is_flag=True,
81
- help="Whether to force disable check same thread for QdrantLocal.",
82
- ),
83
- click.Option(
84
- ["--api-key"],
85
- type=str,
86
- help="API key for authentication in Qdrant Cloud. Default: None.",
87
- envvar="QDRANT_API_KEY",
88
- show_envvar=True,
89
- ),
90
- ]
91
- return options
92
-
93
-
94
- @dataclass
95
- class QdrantCliWriteConfig(QdrantWriteConfig, CliConfig):
96
- @staticmethod
97
- def get_cli_options() -> t.List[click.Option]:
98
- options = [
99
- click.Option(
100
- ["--batch-size"],
101
- default=50,
102
- type=int,
103
- help="Number of points to upload per batch",
104
- ),
105
- click.Option(
106
- ["--num-processes"],
107
- default=2,
108
- type=int,
109
- help="Number of parallel processes with which to upload",
110
- ),
111
- ]
112
- return options
113
-
114
-
115
- def get_base_dest_cmd():
116
- from unstructured_ingest.cli.base.dest import BaseDestCmd
117
-
118
- cmd_cls = BaseDestCmd(
119
- cmd_name="qdrant",
120
- cli_config=QdrantCliConfig,
121
- additional_cli_options=[QdrantCliWriteConfig],
122
- write_config=QdrantWriteConfig,
123
- )
124
- return cmd_cls
@@ -1,67 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.reddit import SimpleRedditConfig
11
-
12
-
13
- @dataclass
14
- class RedditCliConfig(SimpleRedditConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--client-id"],
20
- required=True,
21
- type=str,
22
- help="The client ID, see "
23
- "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites" # noqa: E501
24
- " for more information.",
25
- ),
26
- click.Option(
27
- ["--client-secret"],
28
- required=True,
29
- type=str,
30
- help="The client secret, see "
31
- "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites" # noqa: E501
32
- " for more information.",
33
- ),
34
- click.Option(
35
- ["--subreddit-name"],
36
- required=True,
37
- type=str,
38
- help='The name of a subreddit, without the "r\\", e.g. "machinelearning"',
39
- ),
40
- click.Option(
41
- ["--search-query"],
42
- default=None,
43
- type=str,
44
- help="If set, return posts using this query. Otherwise, use hot posts.",
45
- ),
46
- click.Option(
47
- ["--num-posts"],
48
- required=True,
49
- type=click.IntRange(0),
50
- help="If set, limits the number of posts to pull in.",
51
- ),
52
- click.Option(
53
- ["--user-agent"],
54
- required=True,
55
- type=str,
56
- help="user agent request header to use when calling Reddit API",
57
- ),
58
- ]
59
- return options
60
-
61
-
62
- def get_base_src_cmd() -> BaseSrcCmd:
63
- cmd_cls = BaseSrcCmd(
64
- cmd_name="reddit",
65
- cli_config=RedditCliConfig,
66
- )
67
- return cmd_cls
@@ -1,58 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- DelimitedString,
11
- )
12
- from unstructured_ingest.connector.salesforce import SimpleSalesforceConfig
13
-
14
-
15
- @dataclass
16
- class SalesforceCliConfig(SimpleSalesforceConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- possible_categories = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
20
- options = [
21
- click.Option(
22
- ["--username"],
23
- required=True,
24
- type=str,
25
- help="Salesforce username usually looks like an email.",
26
- ),
27
- click.Option(
28
- ["--consumer-key"],
29
- required=True,
30
- type=str,
31
- help="For the Salesforce JWT auth. Found in Consumer Details.",
32
- ),
33
- click.Option(
34
- ["--private-key"],
35
- required=True,
36
- type=str,
37
- help="Path to the private key or its contents for the Salesforce JWT auth. "
38
- "Key file is usually named server.key.",
39
- ),
40
- click.Option(
41
- ["--categories"],
42
- default=None,
43
- required=True,
44
- type=DelimitedString(choices=possible_categories),
45
- help="Comma-delimited salesforce categories to download. "
46
- "Currently only {}.".format(", ".join(possible_categories)),
47
- ),
48
- ]
49
- return options
50
-
51
-
52
- def get_base_src_cmd() -> BaseSrcCmd:
53
- cmd_cls = BaseSrcCmd(
54
- cmd_name="salesforce",
55
- cli_config=SalesforceCliConfig,
56
- additional_cli_options=[CliRecursiveConfig],
57
- )
58
- return cmd_cls
@@ -1,66 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- )
11
- from unstructured_ingest.connector.sharepoint import SimpleSharepointConfig
12
-
13
-
14
- @dataclass
15
- class SharepointCliConfig(SimpleSharepointConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--client-id"],
21
- default=None,
22
- type=str,
23
- help="Sharepoint app client ID",
24
- ),
25
- click.Option(
26
- ["--client-cred"],
27
- default=None,
28
- type=str,
29
- help="Sharepoint app secret",
30
- ),
31
- click.Option(
32
- ["--site"],
33
- default=None,
34
- type=str,
35
- help="Sharepoint site url. Process either base url e.g \
36
- https://[tenant].sharepoint.com or relative sites \
37
- https://[tenant].sharepoint.com/sites/<site_name>. \
38
- To process all sites within the tenant pass a site url as \
39
- https://[tenant]-admin.sharepoint.com.\
40
- This requires the app to be registered at a tenant level",
41
- ),
42
- click.Option(
43
- ["--path"],
44
- default="Shared Documents",
45
- type=str,
46
- help="Path from which to start parsing files. If the connector is to \
47
- process all sites within the tenant this filter will be applied to \
48
- all sites document libraries. Default 'Shared Documents'",
49
- ),
50
- click.Option(
51
- ["--files-only"],
52
- is_flag=True,
53
- default=False,
54
- help="Process only files.",
55
- ),
56
- ]
57
- return options
58
-
59
-
60
- def get_base_src_cmd() -> BaseSrcCmd:
61
- cmd_cls = BaseSrcCmd(
62
- cmd_name="sharepoint",
63
- cli_config=SharepointCliConfig,
64
- additional_cli_options=[CliRecursiveConfig],
65
- )
66
- return cmd_cls
@@ -1,56 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- DelimitedString,
10
- )
11
- from unstructured_ingest.connector.slack import SimpleSlackConfig
12
-
13
-
14
- @dataclass
15
- class SlackCliConfig(SimpleSlackConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--token"],
21
- required=True,
22
- type=str,
23
- help="Bot token used to access Slack API, must have channels:history "
24
- "scope for the bot user",
25
- ),
26
- click.Option(
27
- ["--channels"],
28
- required=True,
29
- type=DelimitedString(),
30
- help="Comma-delimited list of Slack channel IDs to pull messages from, "
31
- "can be a public or private channel",
32
- ),
33
- click.Option(
34
- ["--start-date"],
35
- default=None,
36
- type=str,
37
- help="Start date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
38
- "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
39
- ),
40
- click.Option(
41
- ["--end-date"],
42
- default=None,
43
- type=str,
44
- help="End date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
45
- "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
46
- ),
47
- ]
48
- return options
49
-
50
-
51
- def get_base_src_cmd() -> BaseSrcCmd:
52
- cmd_cls = BaseSrcCmd(
53
- cmd_name="slack",
54
- cli_config=SlackCliConfig,
55
- )
56
- return cmd_cls
@@ -1,66 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import CliConfig
7
- from unstructured_ingest.connector.sql import SimpleSqlConfig
8
- from unstructured_ingest.interfaces import WriteConfig
9
-
10
- SQL_DRIVERS = {"postgresql", "sqlite"}
11
-
12
-
13
- @dataclass
14
- class SqlCliConfig(SimpleSqlConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--db-type"],
20
- required=True,
21
- type=click.Choice(SQL_DRIVERS),
22
- help="Type of the database backend",
23
- ),
24
- click.Option(
25
- ["--username"],
26
- default=None,
27
- type=str,
28
- help="DB username",
29
- ),
30
- click.Option(
31
- ["--password"],
32
- default=None,
33
- type=str,
34
- help="DB password",
35
- ),
36
- click.Option(
37
- ["--host"],
38
- default=None,
39
- type=str,
40
- help="DB host",
41
- ),
42
- click.Option(
43
- ["--port"],
44
- default=None,
45
- type=int,
46
- help="DB host connection port",
47
- ),
48
- click.Option(
49
- ["--database"],
50
- default=None,
51
- type=str,
52
- help="Database name. For sqlite databases, this is the path to the .db file.",
53
- ),
54
- ]
55
- return options
56
-
57
-
58
- def get_base_dest_cmd():
59
- from unstructured_ingest.cli.base.dest import BaseDestCmd
60
-
61
- cmd_cls = BaseDestCmd(
62
- cmd_name="sql",
63
- cli_config=SqlCliConfig,
64
- write_config=WriteConfig,
65
- )
66
- return cmd_cls