unstructured-ingest 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (356) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/__init__.py +14 -0
  4. unstructured_ingest/cli/base/__init__.py +0 -0
  5. unstructured_ingest/cli/base/cmd.py +19 -0
  6. unstructured_ingest/cli/base/dest.py +87 -0
  7. unstructured_ingest/cli/base/src.py +57 -0
  8. unstructured_ingest/cli/cli.py +32 -0
  9. unstructured_ingest/cli/cmd_factory.py +12 -0
  10. unstructured_ingest/cli/cmds/__init__.py +145 -0
  11. unstructured_ingest/cli/cmds/airtable.py +69 -0
  12. unstructured_ingest/cli/cmds/astra.py +99 -0
  13. unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
  14. unstructured_ingest/cli/cmds/biomed.py +52 -0
  15. unstructured_ingest/cli/cmds/chroma.py +104 -0
  16. unstructured_ingest/cli/cmds/clarifai.py +71 -0
  17. unstructured_ingest/cli/cmds/confluence.py +69 -0
  18. unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
  19. unstructured_ingest/cli/cmds/delta_table.py +94 -0
  20. unstructured_ingest/cli/cmds/discord.py +47 -0
  21. unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
  22. unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  23. unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
  24. unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
  25. unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
  26. unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
  27. unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
  28. unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
  29. unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
  30. unstructured_ingest/cli/cmds/github.py +54 -0
  31. unstructured_ingest/cli/cmds/gitlab.py +54 -0
  32. unstructured_ingest/cli/cmds/google_drive.py +49 -0
  33. unstructured_ingest/cli/cmds/hubspot.py +70 -0
  34. unstructured_ingest/cli/cmds/jira.py +71 -0
  35. unstructured_ingest/cli/cmds/kafka.py +102 -0
  36. unstructured_ingest/cli/cmds/local.py +43 -0
  37. unstructured_ingest/cli/cmds/mongodb.py +72 -0
  38. unstructured_ingest/cli/cmds/notion.py +48 -0
  39. unstructured_ingest/cli/cmds/onedrive.py +66 -0
  40. unstructured_ingest/cli/cmds/opensearch.py +117 -0
  41. unstructured_ingest/cli/cmds/outlook.py +67 -0
  42. unstructured_ingest/cli/cmds/pinecone.py +71 -0
  43. unstructured_ingest/cli/cmds/qdrant.py +124 -0
  44. unstructured_ingest/cli/cmds/reddit.py +67 -0
  45. unstructured_ingest/cli/cmds/salesforce.py +58 -0
  46. unstructured_ingest/cli/cmds/sharepoint.py +66 -0
  47. unstructured_ingest/cli/cmds/slack.py +56 -0
  48. unstructured_ingest/cli/cmds/sql.py +66 -0
  49. unstructured_ingest/cli/cmds/vectara.py +66 -0
  50. unstructured_ingest/cli/cmds/weaviate.py +98 -0
  51. unstructured_ingest/cli/cmds/wikipedia.py +40 -0
  52. unstructured_ingest/cli/common.py +7 -0
  53. unstructured_ingest/cli/interfaces.py +656 -0
  54. unstructured_ingest/cli/utils.py +205 -0
  55. unstructured_ingest/connector/__init__.py +0 -0
  56. unstructured_ingest/connector/airtable.py +309 -0
  57. unstructured_ingest/connector/astra.py +237 -0
  58. unstructured_ingest/connector/azure_cognitive_search.py +144 -0
  59. unstructured_ingest/connector/biomed.py +313 -0
  60. unstructured_ingest/connector/chroma.py +158 -0
  61. unstructured_ingest/connector/clarifai.py +122 -0
  62. unstructured_ingest/connector/confluence.py +285 -0
  63. unstructured_ingest/connector/databricks_volumes.py +137 -0
  64. unstructured_ingest/connector/delta_table.py +203 -0
  65. unstructured_ingest/connector/discord.py +180 -0
  66. unstructured_ingest/connector/elasticsearch.py +396 -0
  67. unstructured_ingest/connector/fsspec/__init__.py +0 -0
  68. unstructured_ingest/connector/fsspec/azure.py +78 -0
  69. unstructured_ingest/connector/fsspec/box.py +109 -0
  70. unstructured_ingest/connector/fsspec/dropbox.py +160 -0
  71. unstructured_ingest/connector/fsspec/fsspec.py +359 -0
  72. unstructured_ingest/connector/fsspec/gcs.py +82 -0
  73. unstructured_ingest/connector/fsspec/s3.py +62 -0
  74. unstructured_ingest/connector/fsspec/sftp.py +81 -0
  75. unstructured_ingest/connector/git.py +124 -0
  76. unstructured_ingest/connector/github.py +173 -0
  77. unstructured_ingest/connector/gitlab.py +142 -0
  78. unstructured_ingest/connector/google_drive.py +349 -0
  79. unstructured_ingest/connector/hubspot.py +278 -0
  80. unstructured_ingest/connector/jira.py +469 -0
  81. unstructured_ingest/connector/kafka.py +294 -0
  82. unstructured_ingest/connector/local.py +139 -0
  83. unstructured_ingest/connector/mongodb.py +285 -0
  84. unstructured_ingest/connector/notion/__init__.py +0 -0
  85. unstructured_ingest/connector/notion/client.py +233 -0
  86. unstructured_ingest/connector/notion/connector.py +468 -0
  87. unstructured_ingest/connector/notion/helpers.py +584 -0
  88. unstructured_ingest/connector/notion/interfaces.py +32 -0
  89. unstructured_ingest/connector/notion/types/__init__.py +0 -0
  90. unstructured_ingest/connector/notion/types/block.py +95 -0
  91. unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
  92. unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
  93. unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
  94. unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
  95. unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
  96. unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
  97. unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
  98. unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
  99. unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
  100. unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
  101. unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
  102. unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
  103. unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
  104. unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
  105. unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
  106. unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
  107. unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
  108. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
  109. unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
  110. unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
  111. unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
  112. unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
  113. unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
  114. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
  115. unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
  116. unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
  117. unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
  118. unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
  119. unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
  120. unstructured_ingest/connector/notion/types/database.py +72 -0
  121. unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
  122. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
  123. unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
  124. unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
  125. unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
  126. unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
  127. unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
  128. unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
  129. unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
  130. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
  131. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
  132. unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
  133. unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
  134. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
  135. unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
  136. unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
  137. unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
  138. unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
  139. unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
  140. unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
  141. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
  142. unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
  143. unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
  144. unstructured_ingest/connector/notion/types/date.py +26 -0
  145. unstructured_ingest/connector/notion/types/file.py +51 -0
  146. unstructured_ingest/connector/notion/types/page.py +44 -0
  147. unstructured_ingest/connector/notion/types/parent.py +66 -0
  148. unstructured_ingest/connector/notion/types/rich_text.py +189 -0
  149. unstructured_ingest/connector/notion/types/user.py +76 -0
  150. unstructured_ingest/connector/onedrive.py +232 -0
  151. unstructured_ingest/connector/opensearch.py +218 -0
  152. unstructured_ingest/connector/outlook.py +285 -0
  153. unstructured_ingest/connector/pinecone.py +140 -0
  154. unstructured_ingest/connector/qdrant.py +144 -0
  155. unstructured_ingest/connector/reddit.py +166 -0
  156. unstructured_ingest/connector/registry.py +109 -0
  157. unstructured_ingest/connector/salesforce.py +301 -0
  158. unstructured_ingest/connector/sharepoint.py +573 -0
  159. unstructured_ingest/connector/slack.py +224 -0
  160. unstructured_ingest/connector/sql.py +199 -0
  161. unstructured_ingest/connector/vectara.py +248 -0
  162. unstructured_ingest/connector/weaviate.py +190 -0
  163. unstructured_ingest/connector/wikipedia.py +208 -0
  164. unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
  165. unstructured_ingest/enhanced_dataclass/core.py +99 -0
  166. unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
  167. unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
  168. unstructured_ingest/error.py +49 -0
  169. unstructured_ingest/evaluate.py +338 -0
  170. unstructured_ingest/ingest_backoff/__init__.py +3 -0
  171. unstructured_ingest/ingest_backoff/_common.py +102 -0
  172. unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
  173. unstructured_ingest/interfaces.py +838 -0
  174. unstructured_ingest/logger.py +130 -0
  175. unstructured_ingest/main.py +11 -0
  176. unstructured_ingest/pipeline/__init__.py +22 -0
  177. unstructured_ingest/pipeline/copy.py +19 -0
  178. unstructured_ingest/pipeline/doc_factory.py +12 -0
  179. unstructured_ingest/pipeline/interfaces.py +265 -0
  180. unstructured_ingest/pipeline/partition.py +60 -0
  181. unstructured_ingest/pipeline/permissions.py +12 -0
  182. unstructured_ingest/pipeline/pipeline.py +117 -0
  183. unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  184. unstructured_ingest/pipeline/reformat/chunking.py +130 -0
  185. unstructured_ingest/pipeline/reformat/embedding.py +66 -0
  186. unstructured_ingest/pipeline/source.py +77 -0
  187. unstructured_ingest/pipeline/utils.py +6 -0
  188. unstructured_ingest/pipeline/write.py +18 -0
  189. unstructured_ingest/processor.py +93 -0
  190. unstructured_ingest/runner/__init__.py +104 -0
  191. unstructured_ingest/runner/airtable.py +35 -0
  192. unstructured_ingest/runner/astra.py +34 -0
  193. unstructured_ingest/runner/base_runner.py +89 -0
  194. unstructured_ingest/runner/biomed.py +45 -0
  195. unstructured_ingest/runner/confluence.py +35 -0
  196. unstructured_ingest/runner/delta_table.py +34 -0
  197. unstructured_ingest/runner/discord.py +35 -0
  198. unstructured_ingest/runner/elasticsearch.py +40 -0
  199. unstructured_ingest/runner/fsspec/__init__.py +0 -0
  200. unstructured_ingest/runner/fsspec/azure.py +30 -0
  201. unstructured_ingest/runner/fsspec/box.py +28 -0
  202. unstructured_ingest/runner/fsspec/dropbox.py +30 -0
  203. unstructured_ingest/runner/fsspec/fsspec.py +40 -0
  204. unstructured_ingest/runner/fsspec/gcs.py +28 -0
  205. unstructured_ingest/runner/fsspec/s3.py +28 -0
  206. unstructured_ingest/runner/fsspec/sftp.py +28 -0
  207. unstructured_ingest/runner/github.py +37 -0
  208. unstructured_ingest/runner/gitlab.py +37 -0
  209. unstructured_ingest/runner/google_drive.py +35 -0
  210. unstructured_ingest/runner/hubspot.py +35 -0
  211. unstructured_ingest/runner/jira.py +35 -0
  212. unstructured_ingest/runner/kafka.py +34 -0
  213. unstructured_ingest/runner/local.py +23 -0
  214. unstructured_ingest/runner/mongodb.py +34 -0
  215. unstructured_ingest/runner/notion.py +61 -0
  216. unstructured_ingest/runner/onedrive.py +35 -0
  217. unstructured_ingest/runner/opensearch.py +40 -0
  218. unstructured_ingest/runner/outlook.py +33 -0
  219. unstructured_ingest/runner/reddit.py +35 -0
  220. unstructured_ingest/runner/salesforce.py +33 -0
  221. unstructured_ingest/runner/sharepoint.py +35 -0
  222. unstructured_ingest/runner/slack.py +33 -0
  223. unstructured_ingest/runner/utils.py +47 -0
  224. unstructured_ingest/runner/wikipedia.py +35 -0
  225. unstructured_ingest/runner/writers/__init__.py +48 -0
  226. unstructured_ingest/runner/writers/astra.py +22 -0
  227. unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
  228. unstructured_ingest/runner/writers/base_writer.py +26 -0
  229. unstructured_ingest/runner/writers/chroma.py +22 -0
  230. unstructured_ingest/runner/writers/clarifai.py +19 -0
  231. unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
  232. unstructured_ingest/runner/writers/delta_table.py +24 -0
  233. unstructured_ingest/runner/writers/elasticsearch.py +24 -0
  234. unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  235. unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
  236. unstructured_ingest/runner/writers/fsspec/box.py +21 -0
  237. unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
  238. unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
  239. unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
  240. unstructured_ingest/runner/writers/kafka.py +21 -0
  241. unstructured_ingest/runner/writers/mongodb.py +21 -0
  242. unstructured_ingest/runner/writers/opensearch.py +26 -0
  243. unstructured_ingest/runner/writers/pinecone.py +21 -0
  244. unstructured_ingest/runner/writers/qdrant.py +19 -0
  245. unstructured_ingest/runner/writers/sql.py +22 -0
  246. unstructured_ingest/runner/writers/vectara.py +22 -0
  247. unstructured_ingest/runner/writers/weaviate.py +21 -0
  248. unstructured_ingest/utils/__init__.py +0 -0
  249. unstructured_ingest/utils/compression.py +117 -0
  250. unstructured_ingest/utils/data_prep.py +112 -0
  251. unstructured_ingest/utils/dep_check.py +66 -0
  252. unstructured_ingest/utils/string_and_date_utils.py +39 -0
  253. unstructured_ingest/utils/table.py +73 -0
  254. unstructured_ingest/v2/__init__.py +1 -0
  255. unstructured_ingest/v2/cli/__init__.py +0 -0
  256. unstructured_ingest/v2/cli/base/__init__.py +4 -0
  257. unstructured_ingest/v2/cli/base/cmd.py +215 -0
  258. unstructured_ingest/v2/cli/base/dest.py +76 -0
  259. unstructured_ingest/v2/cli/base/importer.py +34 -0
  260. unstructured_ingest/v2/cli/base/src.py +70 -0
  261. unstructured_ingest/v2/cli/cli.py +24 -0
  262. unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
  263. unstructured_ingest/v2/cli/cmds/astra.py +85 -0
  264. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
  265. unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
  266. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
  267. unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
  268. unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
  269. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
  270. unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
  271. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
  272. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
  273. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
  274. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
  275. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
  276. unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
  277. unstructured_ingest/v2/cli/cmds/local.py +60 -0
  278. unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
  279. unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
  280. unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
  281. unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
  282. unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
  283. unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
  284. unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
  285. unstructured_ingest/v2/cli/cmds/sql.py +84 -0
  286. unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
  287. unstructured_ingest/v2/cli/configs/__init__.py +6 -0
  288. unstructured_ingest/v2/cli/configs/chunk.py +89 -0
  289. unstructured_ingest/v2/cli/configs/embed.py +74 -0
  290. unstructured_ingest/v2/cli/configs/partition.py +99 -0
  291. unstructured_ingest/v2/cli/configs/processor.py +88 -0
  292. unstructured_ingest/v2/cli/interfaces.py +27 -0
  293. unstructured_ingest/v2/cli/utils.py +240 -0
  294. unstructured_ingest/v2/example.py +37 -0
  295. unstructured_ingest/v2/interfaces/__init__.py +29 -0
  296. unstructured_ingest/v2/interfaces/connector.py +32 -0
  297. unstructured_ingest/v2/interfaces/downloader.py +79 -0
  298. unstructured_ingest/v2/interfaces/file_data.py +49 -0
  299. unstructured_ingest/v2/interfaces/indexer.py +28 -0
  300. unstructured_ingest/v2/interfaces/process.py +20 -0
  301. unstructured_ingest/v2/interfaces/processor.py +48 -0
  302. unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
  303. unstructured_ingest/v2/interfaces/uploader.py +39 -0
  304. unstructured_ingest/v2/logger.py +126 -0
  305. unstructured_ingest/v2/main.py +11 -0
  306. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  307. unstructured_ingest/v2/pipeline/interfaces.py +167 -0
  308. unstructured_ingest/v2/pipeline/pipeline.py +284 -0
  309. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  310. unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
  311. unstructured_ingest/v2/pipeline/steps/download.py +124 -0
  312. unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
  313. unstructured_ingest/v2/pipeline/steps/index.py +61 -0
  314. unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
  315. unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
  316. unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
  317. unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
  318. unstructured_ingest/v2/pipeline/utils.py +15 -0
  319. unstructured_ingest/v2/processes/__init__.py +0 -0
  320. unstructured_ingest/v2/processes/chunker.py +97 -0
  321. unstructured_ingest/v2/processes/connector_registry.py +63 -0
  322. unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
  323. unstructured_ingest/v2/processes/connectors/astra.py +152 -0
  324. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
  325. unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
  326. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
  327. unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
  328. unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
  329. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
  330. unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
  331. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
  332. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
  333. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
  334. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
  335. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
  336. unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
  337. unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
  338. unstructured_ingest/v2/processes/connectors/local.py +204 -0
  339. unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
  340. unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
  341. unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
  342. unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
  343. unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
  344. unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
  345. unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
  346. unstructured_ingest/v2/processes/connectors/sql.py +269 -0
  347. unstructured_ingest/v2/processes/connectors/utils.py +19 -0
  348. unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
  349. unstructured_ingest/v2/processes/embedder.py +76 -0
  350. unstructured_ingest/v2/processes/partitioner.py +166 -0
  351. unstructured_ingest/v2/processes/uncompress.py +43 -0
  352. unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
  353. unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
  354. unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
  355. unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
  356. unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,78 @@
1
+ import asyncio
2
+ import hashlib
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Callable, Optional, TypedDict
7
+
8
+ from unstructured_ingest.v2.interfaces import FileData
9
+ from unstructured_ingest.v2.logger import logger
10
+ from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
11
+ from unstructured_ingest.v2.pipeline.utils import sterilize_dict
12
+ from unstructured_ingest.v2.processes.partitioner import Partitioner
13
+
14
+ STEP_ID = "partition"
15
+
16
+
17
+ class PartitionStepResponse(TypedDict):
18
+ file_data_path: str
19
+ path: str
20
+
21
+
22
+ @dataclass
23
+ class PartitionStep(PipelineStep):
24
+ process: Partitioner
25
+ identifier: str = STEP_ID
26
+
27
+ def __str__(self):
28
+ return f"{self.identifier} ({self.process.config.strategy})"
29
+
30
+ def __post_init__(self):
31
+ config = sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
32
+ logger.info(f"Created {self.identifier} with configs: {config}")
33
+
34
+ def should_partition(self, filepath: Path, file_data: FileData) -> bool:
35
+ if self.context.reprocess or file_data.reprocess:
36
+ return True
37
+ return not filepath.exists()
38
+
39
+ def get_output_filepath(self, filename: Path) -> Path:
40
+ hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
41
+ filepath = (self.cache_dir / hashed_output_file).resolve()
42
+ filepath.parent.mkdir(parents=True, exist_ok=True)
43
+ return filepath
44
+
45
+ def _save_output(self, output_filepath: str, partitioned_content: list[dict]):
46
+ with open(str(output_filepath), "w") as f:
47
+ logger.debug(f"Writing partitioned output to: {output_filepath}")
48
+ json.dump(partitioned_content, f, indent=2)
49
+
50
+ async def _run_async(
51
+ self, fn: Callable, path: str, file_data_path: str
52
+ ) -> Optional[PartitionStepResponse]:
53
+ path = Path(path)
54
+ file_data = FileData.from_file(path=file_data_path)
55
+ output_filepath = self.get_output_filepath(filename=Path(file_data_path))
56
+ if not self.should_partition(filepath=output_filepath, file_data=file_data):
57
+ logger.debug(f"Skipping partitioning, output already exists: {output_filepath}")
58
+ return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath))
59
+ fn_kwargs = {"filename": path, "metadata": file_data.metadata}
60
+ if not asyncio.iscoroutinefunction(fn):
61
+ partitioned_content = fn(**fn_kwargs)
62
+ elif semaphore := self.context.semaphore:
63
+ async with semaphore:
64
+ partitioned_content = await fn(**fn_kwargs)
65
+ else:
66
+ partitioned_content = await fn(**fn_kwargs)
67
+ self._save_output(
68
+ output_filepath=str(output_filepath), partitioned_content=partitioned_content
69
+ )
70
+ return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath))
71
+
72
+ def get_hash(self, extras: Optional[list[str]]) -> str:
73
+ hashable_string = json.dumps(
74
+ self.process.config.to_dict(), sort_keys=True, ensure_ascii=True
75
+ )
76
+ if extras:
77
+ hashable_string += "".join(extras)
78
+ return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
@@ -0,0 +1,64 @@
1
+ import asyncio
2
+ import hashlib
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Callable, Optional, TypedDict
7
+
8
+ from unstructured_ingest.v2.interfaces.file_data import FileData
9
+ from unstructured_ingest.v2.interfaces.upload_stager import UploadStager
10
+ from unstructured_ingest.v2.logger import logger
11
+ from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
12
+ from unstructured_ingest.v2.pipeline.utils import sterilize_dict
13
+
14
+ STEP_ID = "upload_stage"
15
+
16
+
17
+ class UploadStageStepResponse(TypedDict):
18
+ file_data_path: str
19
+ path: str
20
+
21
+
22
+ @dataclass
23
+ class UploadStageStep(PipelineStep):
24
+ process: UploadStager
25
+ identifier: str = STEP_ID
26
+
27
+ def __str__(self):
28
+ return f"{self.identifier} ({self.process.__class__.__name__})"
29
+
30
+ def __post_init__(self):
31
+ config = (
32
+ sterilize_dict(self.process.upload_stager_config.to_dict(redact_sensitive=True))
33
+ if self.process.upload_stager_config
34
+ else None
35
+ )
36
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
37
+ logger.info(f"Created {self.identifier} with configs: {config}")
38
+
39
+ async def _run_async(
40
+ self, fn: Callable, path: str, file_data_path: str
41
+ ) -> UploadStageStepResponse:
42
+ path = Path(path)
43
+ fn_kwargs = {
44
+ "elements_filepath": path,
45
+ "file_data": FileData.from_file(path=file_data_path),
46
+ "output_dir": self.cache_dir,
47
+ "output_filename": self.get_hash(extras=[path.name]),
48
+ }
49
+ if not asyncio.iscoroutinefunction(fn):
50
+ staged_output_path = fn(**fn_kwargs)
51
+ elif semaphore := self.context.semaphore:
52
+ async with semaphore:
53
+ staged_output_path = await fn(**fn_kwargs)
54
+ else:
55
+ staged_output_path = await fn(**fn_kwargs)
56
+ return UploadStageStepResponse(file_data_path=file_data_path, path=str(staged_output_path))
57
+
58
+ def get_hash(self, extras: Optional[list[str]]) -> str:
59
+ hashable_string = json.dumps(
60
+ self.process.upload_stager_config.to_dict(), sort_keys=True, ensure_ascii=True
61
+ )
62
+ if extras:
63
+ hashable_string += "".join(extras)
64
+ return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
@@ -0,0 +1,68 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import Callable, TypedDict
4
+
5
+ from unstructured_ingest.v2.interfaces.file_data import FileData
6
+ from unstructured_ingest.v2.logger import logger
7
+ from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
8
+ from unstructured_ingest.v2.pipeline.utils import sterilize_dict
9
+ from unstructured_ingest.v2.processes.uncompress import Uncompressor
10
+
11
+ STEP_ID = "uncompress"
12
+
13
+
14
+ class UncompressStepResponse(TypedDict):
15
+ file_data_path: str
16
+ path: str
17
+
18
+
19
+ class UncompressStep(PipelineStep):
20
+ process: Uncompressor
21
+ identifier: str = STEP_ID
22
+
23
+ def __post_init__(self):
24
+ config = (
25
+ sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
26
+ if self.process.config
27
+ else None
28
+ )
29
+ logger.info(f"Created {self.identifier} with configs: {config}")
30
+
31
+ def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]:
32
+ file_data = FileData.from_file(path=file_data_path)
33
+ new_file_data = self.process.run(file_data=file_data)
34
+ responses = []
35
+ for new_file in new_file_data:
36
+ new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
37
+ new_file.to_file(path=str(new_file_data_path.resolve()))
38
+ responses.append(
39
+ UncompressStepResponse(
40
+ path=new_file.source_identifiers.fullpath,
41
+ file_data_path=str(new_file_data_path),
42
+ )
43
+ )
44
+ return responses
45
+
46
+ async def _run_async(
47
+ self, fn: Callable, path: str, file_data_path: str
48
+ ) -> list[UncompressStepResponse]:
49
+ file_data = FileData.from_file(path=file_data_path)
50
+ fn_kwargs = {"file_data": file_data}
51
+ if not asyncio.iscoroutinefunction(fn):
52
+ new_file_data = fn(**fn_kwargs)
53
+ elif semaphore := self.context.semaphore:
54
+ async with semaphore:
55
+ new_file_data = await fn(**fn_kwargs)
56
+ else:
57
+ new_file_data = await fn(**fn_kwargs)
58
+ responses = []
59
+ for new_file in new_file_data:
60
+ new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
61
+ new_file.to_file(path=str(new_file_data_path.resolve()))
62
+ responses.append(
63
+ UncompressStepResponse(
64
+ path=new_file.source_identifiers.fullpath,
65
+ file_data_path=str(new_file_data_path),
66
+ )
67
+ )
68
+ return responses
@@ -0,0 +1,73 @@
1
+ import asyncio
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import Callable, Optional, TypedDict
5
+
6
+ from unstructured_ingest.v2.interfaces import FileData
7
+ from unstructured_ingest.v2.interfaces.uploader import UploadContent, Uploader
8
+ from unstructured_ingest.v2.logger import logger
9
+ from unstructured_ingest.v2.pipeline.interfaces import PipelineStep, iterable_input, timed
10
+ from unstructured_ingest.v2.pipeline.utils import sterilize_dict
11
+
12
+ STEP_ID = "upload"
13
+
14
+
15
+ class UploadStepContent(TypedDict):
16
+ path: str
17
+ file_data_path: str
18
+
19
+
20
+ @dataclass
21
+ class UploadStep(PipelineStep):
22
+ process: Uploader
23
+ identifier: str = STEP_ID
24
+
25
+ def __str__(self):
26
+ return f"{self.identifier} ({self.process.__class__.__name__})"
27
+
28
+ def __post_init__(self):
29
+ config = (
30
+ sterilize_dict(self.process.upload_config.to_dict(redact_sensitive=True))
31
+ if self.process.upload_config
32
+ else None
33
+ )
34
+ connection_config = (
35
+ sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True))
36
+ if self.process.connection_config
37
+ else None
38
+ )
39
+ logger.info(
40
+ f"Created {self.identifier} with configs: {config}, "
41
+ f"connection configs: {connection_config}"
42
+ )
43
+
44
+ def process_whole(self, iterable: iterable_input):
45
+ self.run(contents=iterable)
46
+
47
+ @timed
48
+ def __call__(self, iterable: iterable_input):
49
+ logger.info(
50
+ f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore
51
+ )
52
+ if self.process.is_async():
53
+ self.process_async(iterable=iterable)
54
+ else:
55
+ self.process_whole(iterable=iterable)
56
+
57
+ def _run(self, fn: Callable, contents: list[UploadStepContent]):
58
+ upload_contents = [
59
+ UploadContent(path=Path(c["path"]), file_data=FileData.from_file(c["file_data_path"]))
60
+ for c in contents
61
+ ]
62
+ fn(contents=upload_contents)
63
+
64
+ async def _run_async(self, path: str, file_data_path: str, fn: Optional[Callable] = None):
65
+ fn = fn or self.process.run_async
66
+ fn_kwargs = {"path": Path(path), "file_data": FileData.from_file(path=file_data_path)}
67
+ if not asyncio.iscoroutinefunction(fn):
68
+ fn(**fn_kwargs)
69
+ elif semaphore := self.context.semaphore:
70
+ async with semaphore:
71
+ await fn(**fn_kwargs)
72
+ else:
73
+ await fn(**fn_kwargs)
@@ -0,0 +1,15 @@
1
+ import json
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+
5
+
6
+ def sterilize_dict(data: dict) -> dict:
7
+ def json_serial(obj):
8
+ if isinstance(obj, Path):
9
+ return obj.as_posix()
10
+ if isinstance(obj, datetime):
11
+ return obj.isoformat()
12
+ raise TypeError("Type %s not serializable" % type(obj))
13
+
14
+ data_s = json.dumps(data, default=json_serial)
15
+ return json.loads(data_s)
File without changes
@@ -0,0 +1,97 @@
1
+ from abc import ABC
2
+ from dataclasses import dataclass, fields
3
+ from pathlib import Path
4
+ from typing import Any, Optional
5
+
6
+ from unstructured.chunking import dispatch
7
+ from unstructured.documents.elements import Element, assign_and_map_hash_ids
8
+ from unstructured.staging.base import dict_to_elements, elements_from_json
9
+
10
+ from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
11
+ from unstructured_ingest.v2.interfaces.process import BaseProcess
12
+ from unstructured_ingest.v2.logger import logger
13
+
14
+
15
+ @dataclass
16
+ class ChunkerConfig(EnhancedDataClassJsonMixin):
17
+ chunking_strategy: Optional[str] = None
18
+ chunking_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general"
19
+ chunk_by_api: bool = False
20
+ chunk_api_key: Optional[str] = enhanced_field(default=None, sensitive=True)
21
+
22
+ chunk_combine_text_under_n_chars: Optional[int] = None
23
+ chunk_include_orig_elements: Optional[bool] = None
24
+ chunk_max_characters: Optional[int] = None
25
+ chunk_multipage_sections: Optional[bool] = None
26
+ chunk_new_after_n_chars: Optional[int] = None
27
+ chunk_overlap: Optional[int] = None
28
+ chunk_overlap_all: Optional[bool] = None
29
+
30
+ def to_chunking_kwargs(self) -> dict[str, Any]:
31
+ return {
32
+ "chunking_strategy": self.chunking_strategy,
33
+ "combine_under_n_chars": self.chunk_combine_text_under_n_chars,
34
+ "max_characters": self.chunk_max_characters,
35
+ "include_orig_elements": self.chunk_include_orig_elements,
36
+ "multipage_sections": self.chunk_multipage_sections,
37
+ "new_after_n_chars": self.chunk_new_after_n_chars,
38
+ "overlap": self.chunk_overlap,
39
+ "overlap_all": self.chunk_overlap_all,
40
+ }
41
+
42
+
43
+ @dataclass
44
+ class Chunker(BaseProcess, ABC):
45
+ config: ChunkerConfig
46
+
47
+ def is_async(self) -> bool:
48
+ return self.config.chunk_by_api
49
+
50
+ def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]:
51
+ elements = elements_from_json(filename=str(elements_filepath))
52
+ if not elements:
53
+ return elements
54
+ local_chunking_strategies = ("basic", "by_title")
55
+ if self.config.chunking_strategy not in local_chunking_strategies:
56
+ logger.warning(
57
+ "chunking strategy not supported for local chunking: {}, must be one of: {}".format(
58
+ self.config.chunking_strategy, ", ".join(local_chunking_strategies)
59
+ )
60
+ )
61
+ return elements
62
+ chunked_elements = dispatch.chunk(elements=elements, **self.config.to_chunking_kwargs())
63
+ assign_and_map_hash_ids(chunked_elements)
64
+ return chunked_elements
65
+
66
+ async def run_async(self, elements_filepath: Path, **kwargs: Any) -> list[Element]:
67
+ from unstructured_client import UnstructuredClient
68
+ from unstructured_client.models.shared import Files, PartitionParameters
69
+
70
+ client = UnstructuredClient(
71
+ api_key_auth=self.config.chunk_api_key,
72
+ server_url=self.config.chunking_endpoint,
73
+ )
74
+ partition_request = self.config.to_chunking_kwargs()
75
+ possible_fields = [f.name for f in fields(PartitionParameters)]
76
+ filtered_partition_request = {
77
+ k: v for k, v in partition_request.items() if k in possible_fields
78
+ }
79
+ if len(filtered_partition_request) != len(partition_request):
80
+ logger.debug(
81
+ "Following fields were omitted due to not being "
82
+ "supported by the currently used unstructured client: {}".format(
83
+ ", ".join([v for v in partition_request if v not in filtered_partition_request])
84
+ )
85
+ )
86
+ with open(elements_filepath, "rb") as f:
87
+ files = Files(
88
+ content=f.read(),
89
+ file_name=str(elements_filepath.resolve()),
90
+ )
91
+ filtered_partition_request["files"] = files
92
+ partition_params = PartitionParameters(**filtered_partition_request)
93
+ resp = client.general.partition(partition_params)
94
+ elements_raw = resp.elements or []
95
+ elements = dict_to_elements(elements_raw)
96
+ assign_and_map_hash_ids(elements)
97
+ return elements
@@ -0,0 +1,63 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional, Type, TypeVar
3
+
4
+ from unstructured_ingest.v2.interfaces import (
5
+ ConnectionConfig,
6
+ Downloader,
7
+ DownloaderConfig,
8
+ Indexer,
9
+ IndexerConfig,
10
+ Uploader,
11
+ UploaderConfig,
12
+ UploadStager,
13
+ UploadStagerConfig,
14
+ )
15
+
16
+ IndexerT = TypeVar("IndexerT", bound=Indexer)
17
+ IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig)
18
+ DownloaderT = TypeVar("DownloaderT", bound=Downloader)
19
+ DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig)
20
+ ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig)
21
+ UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig)
22
+ UploadStagerT = TypeVar("UploadStagerT", bound=UploadStager)
23
+ UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig)
24
+ UploaderT = TypeVar("UploaderT", bound=Uploader)
25
+
26
+
27
+ @dataclass
28
+ class SourceRegistryEntry:
29
+ indexer: Type[IndexerT]
30
+ downloader: Type[DownloaderT]
31
+
32
+ downloader_config: Optional[Type[DownloaderConfigT]] = None
33
+ indexer_config: Optional[Type[IndexerConfigT]] = None
34
+ connection_config: Optional[Type[ConnectionConfigT]] = None
35
+
36
+
37
+ source_registry: dict[str, SourceRegistryEntry] = {}
38
+
39
+
40
+ def add_source_entry(source_type: str, entry: SourceRegistryEntry):
41
+ if source_type in source_registry:
42
+ raise ValueError(f"source {source_type} has already been registered")
43
+ source_registry[source_type] = entry
44
+
45
+
46
+ @dataclass
47
+ class DestinationRegistryEntry:
48
+ uploader: Type[UploaderT]
49
+ upload_stager: Optional[Type[UploadStagerT]] = None
50
+
51
+ upload_stager_config: Optional[Type[UploadStagerConfigT]] = None
52
+ uploader_config: Optional[Type[UploaderConfigT]] = None
53
+
54
+ connection_config: Optional[Type[ConnectionConfigT]] = None
55
+
56
+
57
+ destination_registry: dict[str, DestinationRegistryEntry] = {}
58
+
59
+
60
+ def add_destination_entry(destination_type: str, entry: DestinationRegistryEntry):
61
+ if destination_type in destination_registry:
62
+ raise ValueError(f"destination {destination_type} has already been registered")
63
+ destination_registry[destination_type] = entry
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import unstructured.ingest.v2.processes.connectors.fsspec # noqa: F401
4
+
5
+ from unstructured_ingest.v2.processes.connector_registry import (
6
+ add_destination_entry,
7
+ add_source_entry,
8
+ )
9
+
10
+ from .astra import CONNECTOR_TYPE as ASTRA_CONNECTOR_TYPE
11
+ from .astra import astra_destination_entry
12
+ from .chroma import CONNECTOR_TYPE as CHROMA_CONNECTOR_TYPE
13
+ from .chroma import chroma_destination_entry
14
+ from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE
15
+ from .databricks_volumes import databricks_volumes_destination_entry
16
+ from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE
17
+ from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry
18
+ from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
19
+ from .google_drive import google_drive_source_entry
20
+ from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE
21
+ from .local import local_destination_entry, local_source_entry
22
+ from .mongodb import CONNECTOR_TYPE as MONGODB_CONNECTOR_TYPE
23
+ from .mongodb import mongodb_destination_entry
24
+ from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE
25
+ from .onedrive import onedrive_source_entry
26
+ from .opensearch import CONNECTOR_TYPE as OPENSEARCH_CONNECTOR_TYPE
27
+ from .opensearch import opensearch_destination_entry, opensearch_source_entry
28
+ from .pinecone import CONNECTOR_TYPE as PINECONE_CONNECTOR_TYPE
29
+ from .pinecone import pinecone_destination_entry
30
+ from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE
31
+ from .salesforce import salesforce_source_entry
32
+ from .sharepoint import CONNECTOR_TYPE as SHAREPOINT_CONNECTOR_TYPE
33
+ from .sharepoint import sharepoint_source_entry
34
+ from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
35
+ from .singlestore import singlestore_destination_entry
36
+ from .sql import CONNECTOR_TYPE as SQL_CONNECTOR_TYPE
37
+ from .sql import sql_destination_entry
38
+ from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
39
+ from .weaviate import weaviate_destination_entry
40
+
41
+ add_destination_entry(destination_type=ASTRA_CONNECTOR_TYPE, entry=astra_destination_entry)
42
+
43
+ add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_destination_entry)
44
+
45
+ add_source_entry(source_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_source_entry)
46
+ add_destination_entry(
47
+ destination_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_destination_entry
48
+ )
49
+
50
+ add_source_entry(source_type=GOOGLE_DRIVE_CONNECTOR_TYPE, entry=google_drive_source_entry)
51
+
52
+ add_source_entry(source_type=LOCAL_CONNECTOR_TYPE, entry=local_source_entry)
53
+ add_destination_entry(destination_type=LOCAL_CONNECTOR_TYPE, entry=local_destination_entry)
54
+
55
+ add_source_entry(source_type=ONEDRIVE_CONNECTOR_TYPE, entry=onedrive_source_entry)
56
+
57
+ add_source_entry(source_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_source_entry)
58
+ add_destination_entry(
59
+ destination_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_destination_entry
60
+ )
61
+
62
+ add_source_entry(source_type=SALESFORCE_CONNECTOR_TYPE, entry=salesforce_source_entry)
63
+
64
+ add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry)
65
+
66
+ add_destination_entry(
67
+ destination_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_destination_entry
68
+ )
69
+
70
+ add_destination_entry(destination_type=SQL_CONNECTOR_TYPE, entry=sql_destination_entry)
71
+
72
+ add_destination_entry(destination_type=MONGODB_CONNECTOR_TYPE, entry=mongodb_destination_entry)
73
+ add_destination_entry(destination_type=PINECONE_CONNECTOR_TYPE, entry=pinecone_destination_entry)
74
+ add_source_entry(source_type=SHAREPOINT_CONNECTOR_TYPE, entry=sharepoint_source_entry)
75
+ add_destination_entry(
76
+ destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
77
+ )