unstructured-ingest 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (356) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/__init__.py +14 -0
  4. unstructured_ingest/cli/base/__init__.py +0 -0
  5. unstructured_ingest/cli/base/cmd.py +19 -0
  6. unstructured_ingest/cli/base/dest.py +87 -0
  7. unstructured_ingest/cli/base/src.py +57 -0
  8. unstructured_ingest/cli/cli.py +32 -0
  9. unstructured_ingest/cli/cmd_factory.py +12 -0
  10. unstructured_ingest/cli/cmds/__init__.py +145 -0
  11. unstructured_ingest/cli/cmds/airtable.py +69 -0
  12. unstructured_ingest/cli/cmds/astra.py +99 -0
  13. unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
  14. unstructured_ingest/cli/cmds/biomed.py +52 -0
  15. unstructured_ingest/cli/cmds/chroma.py +104 -0
  16. unstructured_ingest/cli/cmds/clarifai.py +71 -0
  17. unstructured_ingest/cli/cmds/confluence.py +69 -0
  18. unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
  19. unstructured_ingest/cli/cmds/delta_table.py +94 -0
  20. unstructured_ingest/cli/cmds/discord.py +47 -0
  21. unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
  22. unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  23. unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
  24. unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
  25. unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
  26. unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
  27. unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
  28. unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
  29. unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
  30. unstructured_ingest/cli/cmds/github.py +54 -0
  31. unstructured_ingest/cli/cmds/gitlab.py +54 -0
  32. unstructured_ingest/cli/cmds/google_drive.py +49 -0
  33. unstructured_ingest/cli/cmds/hubspot.py +70 -0
  34. unstructured_ingest/cli/cmds/jira.py +71 -0
  35. unstructured_ingest/cli/cmds/kafka.py +102 -0
  36. unstructured_ingest/cli/cmds/local.py +43 -0
  37. unstructured_ingest/cli/cmds/mongodb.py +72 -0
  38. unstructured_ingest/cli/cmds/notion.py +48 -0
  39. unstructured_ingest/cli/cmds/onedrive.py +66 -0
  40. unstructured_ingest/cli/cmds/opensearch.py +117 -0
  41. unstructured_ingest/cli/cmds/outlook.py +67 -0
  42. unstructured_ingest/cli/cmds/pinecone.py +71 -0
  43. unstructured_ingest/cli/cmds/qdrant.py +124 -0
  44. unstructured_ingest/cli/cmds/reddit.py +67 -0
  45. unstructured_ingest/cli/cmds/salesforce.py +58 -0
  46. unstructured_ingest/cli/cmds/sharepoint.py +66 -0
  47. unstructured_ingest/cli/cmds/slack.py +56 -0
  48. unstructured_ingest/cli/cmds/sql.py +66 -0
  49. unstructured_ingest/cli/cmds/vectara.py +66 -0
  50. unstructured_ingest/cli/cmds/weaviate.py +98 -0
  51. unstructured_ingest/cli/cmds/wikipedia.py +40 -0
  52. unstructured_ingest/cli/common.py +7 -0
  53. unstructured_ingest/cli/interfaces.py +656 -0
  54. unstructured_ingest/cli/utils.py +205 -0
  55. unstructured_ingest/connector/__init__.py +0 -0
  56. unstructured_ingest/connector/airtable.py +309 -0
  57. unstructured_ingest/connector/astra.py +237 -0
  58. unstructured_ingest/connector/azure_cognitive_search.py +144 -0
  59. unstructured_ingest/connector/biomed.py +313 -0
  60. unstructured_ingest/connector/chroma.py +158 -0
  61. unstructured_ingest/connector/clarifai.py +122 -0
  62. unstructured_ingest/connector/confluence.py +285 -0
  63. unstructured_ingest/connector/databricks_volumes.py +137 -0
  64. unstructured_ingest/connector/delta_table.py +203 -0
  65. unstructured_ingest/connector/discord.py +180 -0
  66. unstructured_ingest/connector/elasticsearch.py +396 -0
  67. unstructured_ingest/connector/fsspec/__init__.py +0 -0
  68. unstructured_ingest/connector/fsspec/azure.py +78 -0
  69. unstructured_ingest/connector/fsspec/box.py +109 -0
  70. unstructured_ingest/connector/fsspec/dropbox.py +160 -0
  71. unstructured_ingest/connector/fsspec/fsspec.py +359 -0
  72. unstructured_ingest/connector/fsspec/gcs.py +82 -0
  73. unstructured_ingest/connector/fsspec/s3.py +62 -0
  74. unstructured_ingest/connector/fsspec/sftp.py +81 -0
  75. unstructured_ingest/connector/git.py +124 -0
  76. unstructured_ingest/connector/github.py +173 -0
  77. unstructured_ingest/connector/gitlab.py +142 -0
  78. unstructured_ingest/connector/google_drive.py +349 -0
  79. unstructured_ingest/connector/hubspot.py +278 -0
  80. unstructured_ingest/connector/jira.py +469 -0
  81. unstructured_ingest/connector/kafka.py +294 -0
  82. unstructured_ingest/connector/local.py +139 -0
  83. unstructured_ingest/connector/mongodb.py +285 -0
  84. unstructured_ingest/connector/notion/__init__.py +0 -0
  85. unstructured_ingest/connector/notion/client.py +233 -0
  86. unstructured_ingest/connector/notion/connector.py +468 -0
  87. unstructured_ingest/connector/notion/helpers.py +584 -0
  88. unstructured_ingest/connector/notion/interfaces.py +32 -0
  89. unstructured_ingest/connector/notion/types/__init__.py +0 -0
  90. unstructured_ingest/connector/notion/types/block.py +95 -0
  91. unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
  92. unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
  93. unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
  94. unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
  95. unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
  96. unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
  97. unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
  98. unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
  99. unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
  100. unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
  101. unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
  102. unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
  103. unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
  104. unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
  105. unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
  106. unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
  107. unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
  108. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
  109. unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
  110. unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
  111. unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
  112. unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
  113. unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
  114. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
  115. unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
  116. unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
  117. unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
  118. unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
  119. unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
  120. unstructured_ingest/connector/notion/types/database.py +72 -0
  121. unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
  122. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
  123. unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
  124. unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
  125. unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
  126. unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
  127. unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
  128. unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
  129. unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
  130. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
  131. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
  132. unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
  133. unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
  134. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
  135. unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
  136. unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
  137. unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
  138. unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
  139. unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
  140. unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
  141. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
  142. unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
  143. unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
  144. unstructured_ingest/connector/notion/types/date.py +26 -0
  145. unstructured_ingest/connector/notion/types/file.py +51 -0
  146. unstructured_ingest/connector/notion/types/page.py +44 -0
  147. unstructured_ingest/connector/notion/types/parent.py +66 -0
  148. unstructured_ingest/connector/notion/types/rich_text.py +189 -0
  149. unstructured_ingest/connector/notion/types/user.py +76 -0
  150. unstructured_ingest/connector/onedrive.py +232 -0
  151. unstructured_ingest/connector/opensearch.py +218 -0
  152. unstructured_ingest/connector/outlook.py +285 -0
  153. unstructured_ingest/connector/pinecone.py +140 -0
  154. unstructured_ingest/connector/qdrant.py +144 -0
  155. unstructured_ingest/connector/reddit.py +166 -0
  156. unstructured_ingest/connector/registry.py +109 -0
  157. unstructured_ingest/connector/salesforce.py +301 -0
  158. unstructured_ingest/connector/sharepoint.py +573 -0
  159. unstructured_ingest/connector/slack.py +224 -0
  160. unstructured_ingest/connector/sql.py +199 -0
  161. unstructured_ingest/connector/vectara.py +248 -0
  162. unstructured_ingest/connector/weaviate.py +190 -0
  163. unstructured_ingest/connector/wikipedia.py +208 -0
  164. unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
  165. unstructured_ingest/enhanced_dataclass/core.py +99 -0
  166. unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
  167. unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
  168. unstructured_ingest/error.py +49 -0
  169. unstructured_ingest/evaluate.py +338 -0
  170. unstructured_ingest/ingest_backoff/__init__.py +3 -0
  171. unstructured_ingest/ingest_backoff/_common.py +102 -0
  172. unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
  173. unstructured_ingest/interfaces.py +838 -0
  174. unstructured_ingest/logger.py +130 -0
  175. unstructured_ingest/main.py +11 -0
  176. unstructured_ingest/pipeline/__init__.py +22 -0
  177. unstructured_ingest/pipeline/copy.py +19 -0
  178. unstructured_ingest/pipeline/doc_factory.py +12 -0
  179. unstructured_ingest/pipeline/interfaces.py +265 -0
  180. unstructured_ingest/pipeline/partition.py +60 -0
  181. unstructured_ingest/pipeline/permissions.py +12 -0
  182. unstructured_ingest/pipeline/pipeline.py +117 -0
  183. unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  184. unstructured_ingest/pipeline/reformat/chunking.py +130 -0
  185. unstructured_ingest/pipeline/reformat/embedding.py +66 -0
  186. unstructured_ingest/pipeline/source.py +77 -0
  187. unstructured_ingest/pipeline/utils.py +6 -0
  188. unstructured_ingest/pipeline/write.py +18 -0
  189. unstructured_ingest/processor.py +93 -0
  190. unstructured_ingest/runner/__init__.py +104 -0
  191. unstructured_ingest/runner/airtable.py +35 -0
  192. unstructured_ingest/runner/astra.py +34 -0
  193. unstructured_ingest/runner/base_runner.py +89 -0
  194. unstructured_ingest/runner/biomed.py +45 -0
  195. unstructured_ingest/runner/confluence.py +35 -0
  196. unstructured_ingest/runner/delta_table.py +34 -0
  197. unstructured_ingest/runner/discord.py +35 -0
  198. unstructured_ingest/runner/elasticsearch.py +40 -0
  199. unstructured_ingest/runner/fsspec/__init__.py +0 -0
  200. unstructured_ingest/runner/fsspec/azure.py +30 -0
  201. unstructured_ingest/runner/fsspec/box.py +28 -0
  202. unstructured_ingest/runner/fsspec/dropbox.py +30 -0
  203. unstructured_ingest/runner/fsspec/fsspec.py +40 -0
  204. unstructured_ingest/runner/fsspec/gcs.py +28 -0
  205. unstructured_ingest/runner/fsspec/s3.py +28 -0
  206. unstructured_ingest/runner/fsspec/sftp.py +28 -0
  207. unstructured_ingest/runner/github.py +37 -0
  208. unstructured_ingest/runner/gitlab.py +37 -0
  209. unstructured_ingest/runner/google_drive.py +35 -0
  210. unstructured_ingest/runner/hubspot.py +35 -0
  211. unstructured_ingest/runner/jira.py +35 -0
  212. unstructured_ingest/runner/kafka.py +34 -0
  213. unstructured_ingest/runner/local.py +23 -0
  214. unstructured_ingest/runner/mongodb.py +34 -0
  215. unstructured_ingest/runner/notion.py +61 -0
  216. unstructured_ingest/runner/onedrive.py +35 -0
  217. unstructured_ingest/runner/opensearch.py +40 -0
  218. unstructured_ingest/runner/outlook.py +33 -0
  219. unstructured_ingest/runner/reddit.py +35 -0
  220. unstructured_ingest/runner/salesforce.py +33 -0
  221. unstructured_ingest/runner/sharepoint.py +35 -0
  222. unstructured_ingest/runner/slack.py +33 -0
  223. unstructured_ingest/runner/utils.py +47 -0
  224. unstructured_ingest/runner/wikipedia.py +35 -0
  225. unstructured_ingest/runner/writers/__init__.py +48 -0
  226. unstructured_ingest/runner/writers/astra.py +22 -0
  227. unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
  228. unstructured_ingest/runner/writers/base_writer.py +26 -0
  229. unstructured_ingest/runner/writers/chroma.py +22 -0
  230. unstructured_ingest/runner/writers/clarifai.py +19 -0
  231. unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
  232. unstructured_ingest/runner/writers/delta_table.py +24 -0
  233. unstructured_ingest/runner/writers/elasticsearch.py +24 -0
  234. unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  235. unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
  236. unstructured_ingest/runner/writers/fsspec/box.py +21 -0
  237. unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
  238. unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
  239. unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
  240. unstructured_ingest/runner/writers/kafka.py +21 -0
  241. unstructured_ingest/runner/writers/mongodb.py +21 -0
  242. unstructured_ingest/runner/writers/opensearch.py +26 -0
  243. unstructured_ingest/runner/writers/pinecone.py +21 -0
  244. unstructured_ingest/runner/writers/qdrant.py +19 -0
  245. unstructured_ingest/runner/writers/sql.py +22 -0
  246. unstructured_ingest/runner/writers/vectara.py +22 -0
  247. unstructured_ingest/runner/writers/weaviate.py +21 -0
  248. unstructured_ingest/utils/__init__.py +0 -0
  249. unstructured_ingest/utils/compression.py +117 -0
  250. unstructured_ingest/utils/data_prep.py +112 -0
  251. unstructured_ingest/utils/dep_check.py +66 -0
  252. unstructured_ingest/utils/string_and_date_utils.py +39 -0
  253. unstructured_ingest/utils/table.py +73 -0
  254. unstructured_ingest/v2/__init__.py +1 -0
  255. unstructured_ingest/v2/cli/__init__.py +0 -0
  256. unstructured_ingest/v2/cli/base/__init__.py +4 -0
  257. unstructured_ingest/v2/cli/base/cmd.py +215 -0
  258. unstructured_ingest/v2/cli/base/dest.py +76 -0
  259. unstructured_ingest/v2/cli/base/importer.py +34 -0
  260. unstructured_ingest/v2/cli/base/src.py +70 -0
  261. unstructured_ingest/v2/cli/cli.py +24 -0
  262. unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
  263. unstructured_ingest/v2/cli/cmds/astra.py +85 -0
  264. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
  265. unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
  266. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
  267. unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
  268. unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
  269. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
  270. unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
  271. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
  272. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
  273. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
  274. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
  275. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
  276. unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
  277. unstructured_ingest/v2/cli/cmds/local.py +60 -0
  278. unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
  279. unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
  280. unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
  281. unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
  282. unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
  283. unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
  284. unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
  285. unstructured_ingest/v2/cli/cmds/sql.py +84 -0
  286. unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
  287. unstructured_ingest/v2/cli/configs/__init__.py +6 -0
  288. unstructured_ingest/v2/cli/configs/chunk.py +89 -0
  289. unstructured_ingest/v2/cli/configs/embed.py +74 -0
  290. unstructured_ingest/v2/cli/configs/partition.py +99 -0
  291. unstructured_ingest/v2/cli/configs/processor.py +88 -0
  292. unstructured_ingest/v2/cli/interfaces.py +27 -0
  293. unstructured_ingest/v2/cli/utils.py +240 -0
  294. unstructured_ingest/v2/example.py +37 -0
  295. unstructured_ingest/v2/interfaces/__init__.py +29 -0
  296. unstructured_ingest/v2/interfaces/connector.py +32 -0
  297. unstructured_ingest/v2/interfaces/downloader.py +79 -0
  298. unstructured_ingest/v2/interfaces/file_data.py +49 -0
  299. unstructured_ingest/v2/interfaces/indexer.py +28 -0
  300. unstructured_ingest/v2/interfaces/process.py +20 -0
  301. unstructured_ingest/v2/interfaces/processor.py +48 -0
  302. unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
  303. unstructured_ingest/v2/interfaces/uploader.py +39 -0
  304. unstructured_ingest/v2/logger.py +126 -0
  305. unstructured_ingest/v2/main.py +11 -0
  306. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  307. unstructured_ingest/v2/pipeline/interfaces.py +167 -0
  308. unstructured_ingest/v2/pipeline/pipeline.py +284 -0
  309. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  310. unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
  311. unstructured_ingest/v2/pipeline/steps/download.py +124 -0
  312. unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
  313. unstructured_ingest/v2/pipeline/steps/index.py +61 -0
  314. unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
  315. unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
  316. unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
  317. unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
  318. unstructured_ingest/v2/pipeline/utils.py +15 -0
  319. unstructured_ingest/v2/processes/__init__.py +0 -0
  320. unstructured_ingest/v2/processes/chunker.py +97 -0
  321. unstructured_ingest/v2/processes/connector_registry.py +63 -0
  322. unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
  323. unstructured_ingest/v2/processes/connectors/astra.py +152 -0
  324. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
  325. unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
  326. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
  327. unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
  328. unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
  329. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
  330. unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
  331. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
  332. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
  333. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
  334. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
  335. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
  336. unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
  337. unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
  338. unstructured_ingest/v2/processes/connectors/local.py +204 -0
  339. unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
  340. unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
  341. unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
  342. unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
  343. unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
  344. unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
  345. unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
  346. unstructured_ingest/v2/processes/connectors/sql.py +269 -0
  347. unstructured_ingest/v2/processes/connectors/utils.py +19 -0
  348. unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
  349. unstructured_ingest/v2/processes/embedder.py +76 -0
  350. unstructured_ingest/v2/processes/partitioner.py +166 -0
  351. unstructured_ingest/v2/processes/uncompress.py +43 -0
  352. unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
  353. unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
  354. unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
  355. unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
  356. unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,104 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.interfaces import CliConfig, Dict
7
+ from unstructured_ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
8
+
9
+
10
+ @dataclass
11
+ class ChromaCliConfig(SimpleChromaConfig, CliConfig):
12
+ @staticmethod
13
+ def get_cli_options() -> t.List[click.Option]:
14
+ options = [
15
+ click.Option(
16
+ ["--path"],
17
+ required=False,
18
+ type=str,
19
+ help="Location where Chroma is persisted," "if not connecting via http.",
20
+ ),
21
+ click.Option(
22
+ ["--settings"],
23
+ required=False,
24
+ type=Dict(),
25
+ help="A dictionary of settings to communicate with the chroma server."
26
+ 'example: \'{"persist_directory":"./chroma-persist"}\' ',
27
+ ),
28
+ click.Option(
29
+ ["--tenant"],
30
+ required=False,
31
+ default="default_tenant",
32
+ type=str,
33
+ help="The tenant to use for this client. Chroma defaults to 'default_tenant'.",
34
+ ),
35
+ click.Option(
36
+ ["--database"],
37
+ required=False,
38
+ default="default_database",
39
+ type=str,
40
+ help="The database to use for this client."
41
+ "Chroma defaults to 'default_database'.",
42
+ ),
43
+ click.Option(
44
+ ["--host"],
45
+ required=False,
46
+ type=str,
47
+ help="The hostname of the Chroma server.",
48
+ ),
49
+ click.Option(
50
+ ["--port"],
51
+ required=False,
52
+ type=int,
53
+ help="The port of the Chroma server.",
54
+ ),
55
+ click.Option(
56
+ ["--ssl"],
57
+ required=False,
58
+ default=False,
59
+ is_flag=True,
60
+ type=bool,
61
+ help="Whether to use SSL to connect to the Chroma server.",
62
+ ),
63
+ click.Option(
64
+ ["--headers"],
65
+ required=False,
66
+ type=Dict(),
67
+ help="A dictionary of headers to send to the Chroma server."
68
+ 'example: \'{"Authorization":"Basic()"}\' ',
69
+ ),
70
+ click.Option(
71
+ ["--collection-name"],
72
+ required=True,
73
+ type=str,
74
+ help="The name of the Chroma collection to write into.",
75
+ ),
76
+ ]
77
+ return options
78
+
79
+
80
+ @dataclass
81
+ class ChromaCliWriteConfig(ChromaWriteConfig, CliConfig):
82
+ @staticmethod
83
+ def get_cli_options() -> t.List[click.Option]:
84
+ options = [
85
+ click.Option(
86
+ ["--batch-size"],
87
+ default=100,
88
+ type=int,
89
+ help="Number of records per batch",
90
+ ),
91
+ ]
92
+ return options
93
+
94
+
95
+ def get_base_dest_cmd():
96
+ from unstructured_ingest.cli.base.dest import BaseDestCmd
97
+
98
+ cmd_cls = BaseDestCmd(
99
+ cmd_name="chroma",
100
+ cli_config=ChromaCliConfig,
101
+ additional_cli_options=[ChromaCliWriteConfig],
102
+ write_config=ChromaWriteConfig,
103
+ )
104
+ return cmd_cls
@@ -0,0 +1,71 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.interfaces import CliConfig
7
+ from unstructured_ingest.connector.clarifai import (
8
+ ClarifaiWriteConfig,
9
+ SimpleClarifaiConfig,
10
+ )
11
+
12
+ CMD_NAME = "clarifai"
13
+
14
+
15
+ @dataclass
16
+ class ClarifaiCliConfig(SimpleClarifaiConfig, CliConfig):
17
+ @staticmethod
18
+ def get_cli_options() -> t.List[click.Option]:
19
+ options = [
20
+ click.Option(
21
+ ["--api-key"],
22
+ required=True,
23
+ type=str,
24
+ help="The CLARIFAI_PAT of the user to access clarifai platform apps and models",
25
+ envvar="CLARIFAI_PAT",
26
+ show_envvar=True,
27
+ ),
28
+ click.Option(
29
+ ["--app-id"],
30
+ required=True,
31
+ type=str,
32
+ help="Clarifai app name/id",
33
+ ),
34
+ click.Option(
35
+ ["--user-id"],
36
+ required=True,
37
+ type=str,
38
+ help="Clarifai User name/ID",
39
+ ),
40
+ click.Option(
41
+ ["--dataset-id"], type=str, default=None, help="Clarifai App Dataset ID (optional)"
42
+ ),
43
+ ]
44
+ return options
45
+
46
+
47
+ @dataclass
48
+ class ClarifaiCliWriteConfig(ClarifaiWriteConfig, CliConfig):
49
+ @staticmethod
50
+ def get_cli_options() -> t.List[click.option]:
51
+ options = [
52
+ click.Option(
53
+ ["--batch-size"],
54
+ type=int,
55
+ default=50,
56
+ help="No of inputs upload per batch",
57
+ ),
58
+ ]
59
+ return options
60
+
61
+
62
+ def get_base_dest_cmd():
63
+ from unstructured_ingest.cli.base.dest import BaseDestCmd
64
+
65
+ cmd_cls = BaseDestCmd(
66
+ cmd_name=CMD_NAME,
67
+ cli_config=ClarifaiCliConfig,
68
+ additional_cli_options=[ClarifaiCliWriteConfig],
69
+ write_config=ClarifaiWriteConfig,
70
+ )
71
+ return cmd_cls
@@ -0,0 +1,69 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.base.src import BaseSrcCmd
7
+ from unstructured_ingest.cli.interfaces import (
8
+ CliConfig,
9
+ DelimitedString,
10
+ )
11
+ from unstructured_ingest.connector.confluence import SimpleConfluenceConfig
12
+
13
+
14
+ @dataclass
15
+ class ConfluenceCliConfig(SimpleConfluenceConfig, CliConfig):
16
+ @staticmethod
17
+ def get_cli_options() -> t.List[click.Option]:
18
+ options = [
19
+ click.Option(
20
+ ["--api-token"],
21
+ required=True,
22
+ help="API Token to authenticate into Confluence Cloud. "
23
+ "Check "
24
+ "https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ "
25
+ "for more info.",
26
+ ),
27
+ click.Option(
28
+ ["--url"],
29
+ required=True,
30
+ help='URL to Confluence Cloud, e.g. "unstructured-ingest-test.atlassian.net"',
31
+ ),
32
+ click.Option(
33
+ ["--user-email"],
34
+ required=True,
35
+ help="Email to authenticate into Confluence Cloud",
36
+ ),
37
+ click.Option(
38
+ ["--spaces"],
39
+ default=None,
40
+ type=DelimitedString(),
41
+ help="A list of confluence space ids to be fetched. From each fetched space, "
42
+ "--num-of-docs-from-each-space number of docs will be ingested. "
43
+ "--spaces and --num-of-spaces cannot be used at the same time",
44
+ ),
45
+ click.Option(
46
+ ["--max-num-of-docs-from-each-space"],
47
+ default=100,
48
+ help="Number of documents to be aimed to be ingested from each fetched "
49
+ "confluence space. If any space has fewer documents, all the documents from "
50
+ "that space will be ingested. Documents are not necessarily "
51
+ "ingested in order of creation date.",
52
+ ),
53
+ click.Option(
54
+ ["--max-num-of-spaces"],
55
+ default=500,
56
+ help="Number of confluence space ids to be fetched. From each fetched space, "
57
+ "--num-of-docs-from-each-space number of docs will be ingested. "
58
+ "--spaces and --num-of-spaces cannot be used at the same time",
59
+ ),
60
+ ]
61
+ return options
62
+
63
+
64
+ def get_base_src_cmd() -> BaseSrcCmd:
65
+ cmd_cls = BaseSrcCmd(
66
+ cmd_name="confluence",
67
+ cli_config=ConfluenceCliConfig,
68
+ )
69
+ return cmd_cls
@@ -0,0 +1,163 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.interfaces import CliConfig
7
+ from unstructured_ingest.connector.databricks_volumes import (
8
+ DatabricksVolumesWriteConfig,
9
+ SimpleDatabricksVolumesConfig,
10
+ )
11
+
12
+ CMD_NAME = "databricks-volumes"
13
+
14
+
15
+ @dataclass
16
+ class DatabricksVolumesCliConfig(SimpleDatabricksVolumesConfig, CliConfig):
17
+ @staticmethod
18
+ def get_cli_options() -> t.List[click.Option]:
19
+ options = [
20
+ click.Option(
21
+ ["--host"],
22
+ type=str,
23
+ default=None,
24
+ help="The Databricks host URL for either the "
25
+ "Databricks workspace endpoint or the "
26
+ "Databricks accounts endpoint.",
27
+ ),
28
+ click.Option(
29
+ ["--account-id"],
30
+ type=str,
31
+ default=None,
32
+ help="The Databricks account ID for the Databricks "
33
+ "accounts endpoint. Only has effect when Host is "
34
+ "either https://accounts.cloud.databricks.com/ (AWS), "
35
+ "https://accounts.azuredatabricks.net/ (Azure), "
36
+ "or https://accounts.gcp.databricks.com/ (GCP).",
37
+ ),
38
+ click.Option(
39
+ ["--username"],
40
+ type=str,
41
+ default=None,
42
+ help="The Databricks username part of basic authentication. "
43
+ "Only possible when Host is *.cloud.databricks.com (AWS).",
44
+ ),
45
+ click.Option(
46
+ ["--password"],
47
+ type=str,
48
+ default=None,
49
+ help="The Databricks password part of basic authentication. "
50
+ "Only possible when Host is *.cloud.databricks.com (AWS).",
51
+ ),
52
+ click.Option(["--client-id"], type=str, default=None),
53
+ click.Option(["--client-secret"], type=str, default=None),
54
+ click.Option(
55
+ ["--token"],
56
+ type=str,
57
+ default=None,
58
+ help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
59
+ "Azure Active Directory (Azure AD) token (Azure).",
60
+ ),
61
+ click.Option(
62
+ ["--azure-workspace-resource-id"],
63
+ type=str,
64
+ default=None,
65
+ help="The Azure Resource Manager ID for the Azure Databricks workspace, "
66
+ "which is exchanged for a Databricks host URL.",
67
+ ),
68
+ click.Option(
69
+ ["--azure-client-secret"],
70
+ type=str,
71
+ default=None,
72
+ help="The Azure AD service principal’s client secret.",
73
+ ),
74
+ click.Option(
75
+ ["--azure-client-id"],
76
+ type=str,
77
+ default=None,
78
+ help="The Azure AD service principal’s application ID.",
79
+ ),
80
+ click.Option(
81
+ ["--azure-tenant-id"],
82
+ type=str,
83
+ default=None,
84
+ help="The Azure AD service principal’s tenant ID.",
85
+ ),
86
+ click.Option(
87
+ ["--azure-environment"],
88
+ type=str,
89
+ default=None,
90
+ help="The Azure environment type (such as Public, UsGov, China, and Germany) for a "
91
+ "specific set of API endpoints. Defaults to PUBLIC.",
92
+ ),
93
+ click.Option(
94
+ ["--auth-type"],
95
+ type=str,
96
+ default=None,
97
+ help="When multiple auth attributes are available in the "
98
+ "environment, use the auth type specified by this "
99
+ "argument. This argument also holds the currently "
100
+ "selected auth.",
101
+ ),
102
+ click.Option(["--cluster-id"], type=str, default=None),
103
+ click.Option(["--google-credentials"], type=str, default=None),
104
+ click.Option(["--google-service-account"], type=str, default=None),
105
+ ]
106
+ return options
107
+
108
+
109
+ @dataclass
110
+ class DatabricksVolumesCliWriteConfig(DatabricksVolumesWriteConfig, CliConfig):
111
+ @staticmethod
112
+ def get_cli_options() -> t.List[click.Option]:
113
+ options = [
114
+ click.Option(
115
+ ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog"
116
+ ),
117
+ click.Option(
118
+ ["--catalog"],
119
+ type=str,
120
+ required=True,
121
+ help="Name of the catalog in the Databricks Unity Catalog service",
122
+ ),
123
+ click.Option(
124
+ ["--volume-path"],
125
+ type=str,
126
+ required=False,
127
+ default=None,
128
+ help="Optional path within the volume to write to",
129
+ ),
130
+ click.Option(
131
+ ["--overwrite"],
132
+ type=bool,
133
+ is_flag=True,
134
+ help="If true, an existing file will be overwritten.",
135
+ ),
136
+ click.Option(
137
+ ["--encoding"],
138
+ type=str,
139
+ required=True,
140
+ default="utf-8",
141
+ help="Encoding applied to the data when written to the volume",
142
+ ),
143
+ click.Option(
144
+ ["--schema"],
145
+ type=str,
146
+ required=True,
147
+ default="default",
148
+ help="Schema associated with the volume to write to in the Unity Catalog service",
149
+ ),
150
+ ]
151
+ return options
152
+
153
+
154
+ def get_base_dest_cmd():
155
+ from unstructured_ingest.cli.base.dest import BaseDestCmd
156
+
157
+ cmd_cls = BaseDestCmd(
158
+ cmd_name=CMD_NAME,
159
+ cli_config=DatabricksVolumesCliConfig,
160
+ additional_cli_options=[DatabricksVolumesCliWriteConfig],
161
+ write_config=DatabricksVolumesWriteConfig,
162
+ )
163
+ return cmd_cls
@@ -0,0 +1,94 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.base.src import BaseSrcCmd
7
+ from unstructured_ingest.cli.interfaces import CliConfig, Dict
8
+ from unstructured_ingest.connector.delta_table import DeltaTableWriteConfig, SimpleDeltaTableConfig
9
+
10
+ CMD_NAME = "delta-table"
11
+
12
+
13
+ @dataclass
14
+ class DeltaTableCliConfig(SimpleDeltaTableConfig, CliConfig):
15
+ @staticmethod
16
+ def get_cli_options() -> t.List[click.Option]:
17
+ options = [
18
+ click.Option(
19
+ ["--table-uri"],
20
+ required=True,
21
+ help="the path of the DeltaTable",
22
+ ),
23
+ click.Option(
24
+ ["--version"],
25
+ default=None,
26
+ type=int,
27
+ help="version of the DeltaTable",
28
+ ),
29
+ click.Option(
30
+ ["--storage_options"],
31
+ required=False,
32
+ type=Dict(),
33
+ default=None,
34
+ help="a dictionary of the options to use for the storage backend, "
35
+ "passed in as a json string",
36
+ ),
37
+ click.Option(
38
+ ["--without-files"],
39
+ is_flag=True,
40
+ default=False,
41
+ help="If set, will load table without tracking files.",
42
+ ),
43
+ ]
44
+ return options
45
+
46
+
47
+ @dataclass
48
+ class DeltaTableCliWriteConfig(DeltaTableWriteConfig, CliConfig):
49
+ @staticmethod
50
+ def get_cli_options() -> t.List[click.Option]:
51
+ options = [
52
+ click.Option(
53
+ ["--overwrite-schema"],
54
+ is_flag=True,
55
+ default=False,
56
+ help="Flag to overwrite schema of destination table",
57
+ ),
58
+ click.Option(
59
+ ["--drop-empty-cols"],
60
+ is_flag=True,
61
+ default=False,
62
+ help="Flag to drop any columns that have no content",
63
+ ),
64
+ click.Option(
65
+ ["--mode"],
66
+ default="error",
67
+ type=click.Choice(["error", "append", "overwrite", "ignore"]),
68
+ help="How to handle existing data. Default is to error if table already exists. "
69
+ "If 'append', will add new data. "
70
+ "If 'overwrite', will replace table with new data. "
71
+ "If 'ignore', will not write anything if table already exists.",
72
+ ),
73
+ ]
74
+ return options
75
+
76
+
77
+ def get_base_src_cmd() -> BaseSrcCmd:
78
+ cmd_cls = BaseSrcCmd(
79
+ cmd_name=CMD_NAME,
80
+ cli_config=DeltaTableCliConfig,
81
+ )
82
+ return cmd_cls
83
+
84
+
85
+ def get_base_dest_cmd():
86
+ from unstructured_ingest.cli.base.dest import BaseDestCmd
87
+
88
+ cmd_cls = BaseDestCmd(
89
+ cmd_name=CMD_NAME,
90
+ cli_config=DeltaTableCliConfig,
91
+ additional_cli_options=[DeltaTableCliWriteConfig],
92
+ write_config=DeltaTableWriteConfig,
93
+ )
94
+ return cmd_cls
@@ -0,0 +1,47 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.base.src import BaseSrcCmd
7
+ from unstructured_ingest.cli.interfaces import (
8
+ CliConfig,
9
+ DelimitedString,
10
+ )
11
+ from unstructured_ingest.connector.discord import SimpleDiscordConfig
12
+
13
+
14
+ @dataclass
15
+ class DiscordCliConfig(SimpleDiscordConfig, CliConfig):
16
+ @staticmethod
17
+ def get_cli_options() -> t.List[click.Option]:
18
+ options = [
19
+ click.Option(
20
+ ["--token"],
21
+ required=True,
22
+ help="Bot token used to access Discord API, must have "
23
+ "READ_MESSAGE_HISTORY scope for the bot user",
24
+ ),
25
+ click.Option(
26
+ ["--channels"],
27
+ required=True,
28
+ type=DelimitedString(),
29
+ help="Comma-delimited list of discord channel ids to ingest from.",
30
+ ),
31
+ click.Option(
32
+ ["--period"],
33
+ default=None,
34
+ type=click.IntRange(0),
35
+ help="Number of days to go back in the history of "
36
+ "discord channels, must be a number",
37
+ ),
38
+ ]
39
+ return options
40
+
41
+
42
+ def get_base_src_cmd() -> BaseSrcCmd:
43
+ cmd_cls = BaseSrcCmd(
44
+ cmd_name="discord",
45
+ cli_config=DiscordCliConfig,
46
+ )
47
+ return cmd_cls
@@ -0,0 +1,133 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import click
5
+
6
+ from unstructured_ingest.cli.base.src import BaseSrcCmd
7
+ from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
8
+ from unstructured_ingest.connector.elasticsearch import (
9
+ ElasticsearchWriteConfig,
10
+ SimpleElasticsearchConfig,
11
+ )
12
+
13
+ CMD_NAME = "elasticsearch"
14
+
15
+
16
+ @dataclass
17
+ class ElasticsearchCliConfig(SimpleElasticsearchConfig, CliConfig):
18
+ @staticmethod
19
+ def get_cli_options() -> t.List[click.Option]:
20
+ options = [
21
+ click.Option(
22
+ ["--index-name"],
23
+ required=True,
24
+ type=str,
25
+ help="Name of the Elasticsearch index to pull data from, or upload data to.",
26
+ ),
27
+ click.Option(
28
+ ["--hosts"],
29
+ type=DelimitedString(),
30
+ help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"',
31
+ ),
32
+ click.Option(
33
+ ["--fields"],
34
+ type=DelimitedString(),
35
+ default=[],
36
+ help="If provided, will limit the fields returned by Elasticsearch "
37
+ "to this comma-delimited list",
38
+ ),
39
+ click.Option(
40
+ ["--username"], type=str, default=None, help="username when using basic auth"
41
+ ),
42
+ click.Option(
43
+ ["--password"],
44
+ type=str,
45
+ default=None,
46
+ help="password when using basic auth or connecting to a cloud instance",
47
+ ),
48
+ click.Option(
49
+ ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud"
50
+ ),
51
+ click.Option(
52
+ ["--es-api-key"], type=str, default=None, help="api key used for authentication"
53
+ ),
54
+ click.Option(
55
+ ["--api-key-id"],
56
+ type=str,
57
+ default=None,
58
+ help="id associated with api key used for authentication: "
59
+ "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html", # noqa: E501
60
+ ),
61
+ click.Option(
62
+ ["--bearer-auth"],
63
+ type=str,
64
+ default=None,
65
+ help="bearer token used for HTTP bearer authentication",
66
+ ),
67
+ click.Option(
68
+ ["--ca-certs"],
69
+ type=click.Path(),
70
+ default=None,
71
+ ),
72
+ click.Option(
73
+ ["--ssl-assert-fingerprint"],
74
+ type=str,
75
+ default=None,
76
+ help="SHA256 fingerprint value",
77
+ ),
78
+ click.Option(
79
+ ["--batch-size"],
80
+ default=100,
81
+ type=click.IntRange(0),
82
+ help="how many records to read at a time per process",
83
+ ),
84
+ ]
85
+ return options
86
+
87
+
88
+ @dataclass
89
+ class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig):
90
+ @staticmethod
91
+ def get_cli_options() -> t.List[click.Option]:
92
+ options = [
93
+ click.Option(
94
+ ["--batch-size-bytes"],
95
+ required=False,
96
+ default=15_000_000,
97
+ type=int,
98
+ help="Size limit (in bytes) for each batch of items to be uploaded. Check"
99
+ " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html"
100
+ "#_how_big_is_too_big for more information.",
101
+ ),
102
+ click.Option(
103
+ ["--num-processes"],
104
+ required=False,
105
+ default=1,
106
+ type=int,
107
+ help="Number of processes to be used while uploading content",
108
+ ),
109
+ ]
110
+ return options
111
+
112
+
113
+ def get_base_src_cmd() -> BaseSrcCmd:
114
+ cmd_cls = BaseSrcCmd(
115
+ cmd_name="elasticsearch",
116
+ cli_config=ElasticsearchCliConfig,
117
+ )
118
+ return cmd_cls
119
+
120
+
121
+ def get_base_dest_cmd():
122
+ from unstructured_ingest.cli.base.dest import BaseDestCmd
123
+
124
+ cmd_cls = BaseDestCmd(
125
+ cmd_name="elasticsearch",
126
+ cli_config=ElasticsearchCliConfig,
127
+ additional_cli_options=[ElasticsearchCliWriteConfig],
128
+ addition_configs={
129
+ "connector_config": SimpleElasticsearchConfig,
130
+ "write_config": ElasticsearchCliWriteConfig,
131
+ },
132
+ )
133
+ return cmd_cls
File without changes