unstructured-ingest 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (356) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/__init__.py +14 -0
  4. unstructured_ingest/cli/base/__init__.py +0 -0
  5. unstructured_ingest/cli/base/cmd.py +19 -0
  6. unstructured_ingest/cli/base/dest.py +87 -0
  7. unstructured_ingest/cli/base/src.py +57 -0
  8. unstructured_ingest/cli/cli.py +32 -0
  9. unstructured_ingest/cli/cmd_factory.py +12 -0
  10. unstructured_ingest/cli/cmds/__init__.py +145 -0
  11. unstructured_ingest/cli/cmds/airtable.py +69 -0
  12. unstructured_ingest/cli/cmds/astra.py +99 -0
  13. unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
  14. unstructured_ingest/cli/cmds/biomed.py +52 -0
  15. unstructured_ingest/cli/cmds/chroma.py +104 -0
  16. unstructured_ingest/cli/cmds/clarifai.py +71 -0
  17. unstructured_ingest/cli/cmds/confluence.py +69 -0
  18. unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
  19. unstructured_ingest/cli/cmds/delta_table.py +94 -0
  20. unstructured_ingest/cli/cmds/discord.py +47 -0
  21. unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
  22. unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  23. unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
  24. unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
  25. unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
  26. unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
  27. unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
  28. unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
  29. unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
  30. unstructured_ingest/cli/cmds/github.py +54 -0
  31. unstructured_ingest/cli/cmds/gitlab.py +54 -0
  32. unstructured_ingest/cli/cmds/google_drive.py +49 -0
  33. unstructured_ingest/cli/cmds/hubspot.py +70 -0
  34. unstructured_ingest/cli/cmds/jira.py +71 -0
  35. unstructured_ingest/cli/cmds/kafka.py +102 -0
  36. unstructured_ingest/cli/cmds/local.py +43 -0
  37. unstructured_ingest/cli/cmds/mongodb.py +72 -0
  38. unstructured_ingest/cli/cmds/notion.py +48 -0
  39. unstructured_ingest/cli/cmds/onedrive.py +66 -0
  40. unstructured_ingest/cli/cmds/opensearch.py +117 -0
  41. unstructured_ingest/cli/cmds/outlook.py +67 -0
  42. unstructured_ingest/cli/cmds/pinecone.py +71 -0
  43. unstructured_ingest/cli/cmds/qdrant.py +124 -0
  44. unstructured_ingest/cli/cmds/reddit.py +67 -0
  45. unstructured_ingest/cli/cmds/salesforce.py +58 -0
  46. unstructured_ingest/cli/cmds/sharepoint.py +66 -0
  47. unstructured_ingest/cli/cmds/slack.py +56 -0
  48. unstructured_ingest/cli/cmds/sql.py +66 -0
  49. unstructured_ingest/cli/cmds/vectara.py +66 -0
  50. unstructured_ingest/cli/cmds/weaviate.py +98 -0
  51. unstructured_ingest/cli/cmds/wikipedia.py +40 -0
  52. unstructured_ingest/cli/common.py +7 -0
  53. unstructured_ingest/cli/interfaces.py +656 -0
  54. unstructured_ingest/cli/utils.py +205 -0
  55. unstructured_ingest/connector/__init__.py +0 -0
  56. unstructured_ingest/connector/airtable.py +309 -0
  57. unstructured_ingest/connector/astra.py +237 -0
  58. unstructured_ingest/connector/azure_cognitive_search.py +144 -0
  59. unstructured_ingest/connector/biomed.py +313 -0
  60. unstructured_ingest/connector/chroma.py +158 -0
  61. unstructured_ingest/connector/clarifai.py +122 -0
  62. unstructured_ingest/connector/confluence.py +285 -0
  63. unstructured_ingest/connector/databricks_volumes.py +137 -0
  64. unstructured_ingest/connector/delta_table.py +203 -0
  65. unstructured_ingest/connector/discord.py +180 -0
  66. unstructured_ingest/connector/elasticsearch.py +396 -0
  67. unstructured_ingest/connector/fsspec/__init__.py +0 -0
  68. unstructured_ingest/connector/fsspec/azure.py +78 -0
  69. unstructured_ingest/connector/fsspec/box.py +109 -0
  70. unstructured_ingest/connector/fsspec/dropbox.py +160 -0
  71. unstructured_ingest/connector/fsspec/fsspec.py +359 -0
  72. unstructured_ingest/connector/fsspec/gcs.py +82 -0
  73. unstructured_ingest/connector/fsspec/s3.py +62 -0
  74. unstructured_ingest/connector/fsspec/sftp.py +81 -0
  75. unstructured_ingest/connector/git.py +124 -0
  76. unstructured_ingest/connector/github.py +173 -0
  77. unstructured_ingest/connector/gitlab.py +142 -0
  78. unstructured_ingest/connector/google_drive.py +349 -0
  79. unstructured_ingest/connector/hubspot.py +278 -0
  80. unstructured_ingest/connector/jira.py +469 -0
  81. unstructured_ingest/connector/kafka.py +294 -0
  82. unstructured_ingest/connector/local.py +139 -0
  83. unstructured_ingest/connector/mongodb.py +285 -0
  84. unstructured_ingest/connector/notion/__init__.py +0 -0
  85. unstructured_ingest/connector/notion/client.py +233 -0
  86. unstructured_ingest/connector/notion/connector.py +468 -0
  87. unstructured_ingest/connector/notion/helpers.py +584 -0
  88. unstructured_ingest/connector/notion/interfaces.py +32 -0
  89. unstructured_ingest/connector/notion/types/__init__.py +0 -0
  90. unstructured_ingest/connector/notion/types/block.py +95 -0
  91. unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
  92. unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
  93. unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
  94. unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
  95. unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
  96. unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
  97. unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
  98. unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
  99. unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
  100. unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
  101. unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
  102. unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
  103. unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
  104. unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
  105. unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
  106. unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
  107. unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
  108. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
  109. unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
  110. unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
  111. unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
  112. unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
  113. unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
  114. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
  115. unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
  116. unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
  117. unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
  118. unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
  119. unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
  120. unstructured_ingest/connector/notion/types/database.py +72 -0
  121. unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
  122. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
  123. unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
  124. unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
  125. unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
  126. unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
  127. unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
  128. unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
  129. unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
  130. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
  131. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
  132. unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
  133. unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
  134. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
  135. unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
  136. unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
  137. unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
  138. unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
  139. unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
  140. unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
  141. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
  142. unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
  143. unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
  144. unstructured_ingest/connector/notion/types/date.py +26 -0
  145. unstructured_ingest/connector/notion/types/file.py +51 -0
  146. unstructured_ingest/connector/notion/types/page.py +44 -0
  147. unstructured_ingest/connector/notion/types/parent.py +66 -0
  148. unstructured_ingest/connector/notion/types/rich_text.py +189 -0
  149. unstructured_ingest/connector/notion/types/user.py +76 -0
  150. unstructured_ingest/connector/onedrive.py +232 -0
  151. unstructured_ingest/connector/opensearch.py +218 -0
  152. unstructured_ingest/connector/outlook.py +285 -0
  153. unstructured_ingest/connector/pinecone.py +140 -0
  154. unstructured_ingest/connector/qdrant.py +144 -0
  155. unstructured_ingest/connector/reddit.py +166 -0
  156. unstructured_ingest/connector/registry.py +109 -0
  157. unstructured_ingest/connector/salesforce.py +301 -0
  158. unstructured_ingest/connector/sharepoint.py +573 -0
  159. unstructured_ingest/connector/slack.py +224 -0
  160. unstructured_ingest/connector/sql.py +199 -0
  161. unstructured_ingest/connector/vectara.py +248 -0
  162. unstructured_ingest/connector/weaviate.py +190 -0
  163. unstructured_ingest/connector/wikipedia.py +208 -0
  164. unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
  165. unstructured_ingest/enhanced_dataclass/core.py +99 -0
  166. unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
  167. unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
  168. unstructured_ingest/error.py +49 -0
  169. unstructured_ingest/evaluate.py +338 -0
  170. unstructured_ingest/ingest_backoff/__init__.py +3 -0
  171. unstructured_ingest/ingest_backoff/_common.py +102 -0
  172. unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
  173. unstructured_ingest/interfaces.py +838 -0
  174. unstructured_ingest/logger.py +130 -0
  175. unstructured_ingest/main.py +11 -0
  176. unstructured_ingest/pipeline/__init__.py +22 -0
  177. unstructured_ingest/pipeline/copy.py +19 -0
  178. unstructured_ingest/pipeline/doc_factory.py +12 -0
  179. unstructured_ingest/pipeline/interfaces.py +265 -0
  180. unstructured_ingest/pipeline/partition.py +60 -0
  181. unstructured_ingest/pipeline/permissions.py +12 -0
  182. unstructured_ingest/pipeline/pipeline.py +117 -0
  183. unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  184. unstructured_ingest/pipeline/reformat/chunking.py +130 -0
  185. unstructured_ingest/pipeline/reformat/embedding.py +66 -0
  186. unstructured_ingest/pipeline/source.py +77 -0
  187. unstructured_ingest/pipeline/utils.py +6 -0
  188. unstructured_ingest/pipeline/write.py +18 -0
  189. unstructured_ingest/processor.py +93 -0
  190. unstructured_ingest/runner/__init__.py +104 -0
  191. unstructured_ingest/runner/airtable.py +35 -0
  192. unstructured_ingest/runner/astra.py +34 -0
  193. unstructured_ingest/runner/base_runner.py +89 -0
  194. unstructured_ingest/runner/biomed.py +45 -0
  195. unstructured_ingest/runner/confluence.py +35 -0
  196. unstructured_ingest/runner/delta_table.py +34 -0
  197. unstructured_ingest/runner/discord.py +35 -0
  198. unstructured_ingest/runner/elasticsearch.py +40 -0
  199. unstructured_ingest/runner/fsspec/__init__.py +0 -0
  200. unstructured_ingest/runner/fsspec/azure.py +30 -0
  201. unstructured_ingest/runner/fsspec/box.py +28 -0
  202. unstructured_ingest/runner/fsspec/dropbox.py +30 -0
  203. unstructured_ingest/runner/fsspec/fsspec.py +40 -0
  204. unstructured_ingest/runner/fsspec/gcs.py +28 -0
  205. unstructured_ingest/runner/fsspec/s3.py +28 -0
  206. unstructured_ingest/runner/fsspec/sftp.py +28 -0
  207. unstructured_ingest/runner/github.py +37 -0
  208. unstructured_ingest/runner/gitlab.py +37 -0
  209. unstructured_ingest/runner/google_drive.py +35 -0
  210. unstructured_ingest/runner/hubspot.py +35 -0
  211. unstructured_ingest/runner/jira.py +35 -0
  212. unstructured_ingest/runner/kafka.py +34 -0
  213. unstructured_ingest/runner/local.py +23 -0
  214. unstructured_ingest/runner/mongodb.py +34 -0
  215. unstructured_ingest/runner/notion.py +61 -0
  216. unstructured_ingest/runner/onedrive.py +35 -0
  217. unstructured_ingest/runner/opensearch.py +40 -0
  218. unstructured_ingest/runner/outlook.py +33 -0
  219. unstructured_ingest/runner/reddit.py +35 -0
  220. unstructured_ingest/runner/salesforce.py +33 -0
  221. unstructured_ingest/runner/sharepoint.py +35 -0
  222. unstructured_ingest/runner/slack.py +33 -0
  223. unstructured_ingest/runner/utils.py +47 -0
  224. unstructured_ingest/runner/wikipedia.py +35 -0
  225. unstructured_ingest/runner/writers/__init__.py +48 -0
  226. unstructured_ingest/runner/writers/astra.py +22 -0
  227. unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
  228. unstructured_ingest/runner/writers/base_writer.py +26 -0
  229. unstructured_ingest/runner/writers/chroma.py +22 -0
  230. unstructured_ingest/runner/writers/clarifai.py +19 -0
  231. unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
  232. unstructured_ingest/runner/writers/delta_table.py +24 -0
  233. unstructured_ingest/runner/writers/elasticsearch.py +24 -0
  234. unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  235. unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
  236. unstructured_ingest/runner/writers/fsspec/box.py +21 -0
  237. unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
  238. unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
  239. unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
  240. unstructured_ingest/runner/writers/kafka.py +21 -0
  241. unstructured_ingest/runner/writers/mongodb.py +21 -0
  242. unstructured_ingest/runner/writers/opensearch.py +26 -0
  243. unstructured_ingest/runner/writers/pinecone.py +21 -0
  244. unstructured_ingest/runner/writers/qdrant.py +19 -0
  245. unstructured_ingest/runner/writers/sql.py +22 -0
  246. unstructured_ingest/runner/writers/vectara.py +22 -0
  247. unstructured_ingest/runner/writers/weaviate.py +21 -0
  248. unstructured_ingest/utils/__init__.py +0 -0
  249. unstructured_ingest/utils/compression.py +117 -0
  250. unstructured_ingest/utils/data_prep.py +112 -0
  251. unstructured_ingest/utils/dep_check.py +66 -0
  252. unstructured_ingest/utils/string_and_date_utils.py +39 -0
  253. unstructured_ingest/utils/table.py +73 -0
  254. unstructured_ingest/v2/__init__.py +1 -0
  255. unstructured_ingest/v2/cli/__init__.py +0 -0
  256. unstructured_ingest/v2/cli/base/__init__.py +4 -0
  257. unstructured_ingest/v2/cli/base/cmd.py +215 -0
  258. unstructured_ingest/v2/cli/base/dest.py +76 -0
  259. unstructured_ingest/v2/cli/base/importer.py +34 -0
  260. unstructured_ingest/v2/cli/base/src.py +70 -0
  261. unstructured_ingest/v2/cli/cli.py +24 -0
  262. unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
  263. unstructured_ingest/v2/cli/cmds/astra.py +85 -0
  264. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
  265. unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
  266. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
  267. unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
  268. unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
  269. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
  270. unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
  271. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
  272. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
  273. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
  274. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
  275. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
  276. unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
  277. unstructured_ingest/v2/cli/cmds/local.py +60 -0
  278. unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
  279. unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
  280. unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
  281. unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
  282. unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
  283. unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
  284. unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
  285. unstructured_ingest/v2/cli/cmds/sql.py +84 -0
  286. unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
  287. unstructured_ingest/v2/cli/configs/__init__.py +6 -0
  288. unstructured_ingest/v2/cli/configs/chunk.py +89 -0
  289. unstructured_ingest/v2/cli/configs/embed.py +74 -0
  290. unstructured_ingest/v2/cli/configs/partition.py +99 -0
  291. unstructured_ingest/v2/cli/configs/processor.py +88 -0
  292. unstructured_ingest/v2/cli/interfaces.py +27 -0
  293. unstructured_ingest/v2/cli/utils.py +240 -0
  294. unstructured_ingest/v2/example.py +37 -0
  295. unstructured_ingest/v2/interfaces/__init__.py +29 -0
  296. unstructured_ingest/v2/interfaces/connector.py +32 -0
  297. unstructured_ingest/v2/interfaces/downloader.py +79 -0
  298. unstructured_ingest/v2/interfaces/file_data.py +49 -0
  299. unstructured_ingest/v2/interfaces/indexer.py +28 -0
  300. unstructured_ingest/v2/interfaces/process.py +20 -0
  301. unstructured_ingest/v2/interfaces/processor.py +48 -0
  302. unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
  303. unstructured_ingest/v2/interfaces/uploader.py +39 -0
  304. unstructured_ingest/v2/logger.py +126 -0
  305. unstructured_ingest/v2/main.py +11 -0
  306. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  307. unstructured_ingest/v2/pipeline/interfaces.py +167 -0
  308. unstructured_ingest/v2/pipeline/pipeline.py +284 -0
  309. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  310. unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
  311. unstructured_ingest/v2/pipeline/steps/download.py +124 -0
  312. unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
  313. unstructured_ingest/v2/pipeline/steps/index.py +61 -0
  314. unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
  315. unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
  316. unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
  317. unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
  318. unstructured_ingest/v2/pipeline/utils.py +15 -0
  319. unstructured_ingest/v2/processes/__init__.py +0 -0
  320. unstructured_ingest/v2/processes/chunker.py +97 -0
  321. unstructured_ingest/v2/processes/connector_registry.py +63 -0
  322. unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
  323. unstructured_ingest/v2/processes/connectors/astra.py +152 -0
  324. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
  325. unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
  326. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
  327. unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
  328. unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
  329. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
  330. unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
  331. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
  332. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
  333. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
  334. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
  335. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
  336. unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
  337. unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
  338. unstructured_ingest/v2/processes/connectors/local.py +204 -0
  339. unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
  340. unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
  341. unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
  342. unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
  343. unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
  344. unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
  345. unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
  346. unstructured_ingest/v2/processes/connectors/sql.py +269 -0
  347. unstructured_ingest/v2/processes/connectors/utils.py +19 -0
  348. unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
  349. unstructured_ingest/v2/processes/embedder.py +76 -0
  350. unstructured_ingest/v2/processes/partitioner.py +166 -0
  351. unstructured_ingest/v2/processes/uncompress.py +43 -0
  352. unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
  353. unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
  354. unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
  355. unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
  356. unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,396 @@
1
+ import copy
2
+ import hashlib
3
+ import typing as t
4
+ import uuid
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ from dataclasses_json.core import Json
9
+
10
+ from unstructured_ingest.enhanced_dataclass import enhanced_field
11
+ from unstructured_ingest.enhanced_dataclass.core import _asdict
12
+ from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
13
+ from unstructured_ingest.interfaces import (
14
+ AccessConfig,
15
+ BaseConnectorConfig,
16
+ BaseDestinationConnector,
17
+ BaseIngestDocBatch,
18
+ BaseSingleIngestDoc,
19
+ BaseSourceConnector,
20
+ IngestDocCleanupMixin,
21
+ SourceConnectorCleanupMixin,
22
+ SourceMetadata,
23
+ WriteConfig,
24
+ )
25
+ from unstructured_ingest.logger import logger
26
+ from unstructured_ingest.utils.data_prep import flatten_dict, generator_batching_wbytes
27
+ from unstructured_ingest.utils.dep_check import requires_dependencies
28
+
29
+ if t.TYPE_CHECKING:
30
+ from elasticsearch import Elasticsearch
31
+
32
+
33
+ @dataclass
34
+ class ElasticsearchAccessConfig(AccessConfig):
35
+ hosts: t.Optional[t.List[str]] = None
36
+ username: t.Optional[str] = None
37
+ password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
38
+ cloud_id: t.Optional[str] = None
39
+ api_key: t.Optional[str] = enhanced_field(
40
+ default=None, sensitive=True, overload_name="es_api_key"
41
+ )
42
+ api_key_id: t.Optional[str] = None
43
+ bearer_auth: t.Optional[str] = enhanced_field(default=None, sensitive=True)
44
+ ca_certs: t.Optional[str] = None
45
+ ssl_assert_fingerprint: t.Optional[str] = enhanced_field(default=None, sensitive=True)
46
+
47
+ def to_dict(self, **kwargs) -> t.Dict[str, Json]:
48
+ d = super().to_dict(**kwargs)
49
+ # Update auth related fields to conform to what the SDK expects based on the
50
+ # supported methods:
51
+ # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html
52
+ if not self.ca_certs:
53
+ # ES library already sets a default for this, don't want to
54
+ # introduce data by setting it to None
55
+ d.pop("ca_certs")
56
+ if self.password and (self.cloud_id or self.ca_certs or self.ssl_assert_fingerprint):
57
+ d.pop("password")
58
+ d["basic_auth"] = ("elastic", self.password)
59
+ elif not self.cloud_id and self.username and self.password:
60
+ d.pop("username", None)
61
+ d.pop("password", None)
62
+ d["basic_auth"] = (self.username, self.password)
63
+ elif self.api_key and self.api_key_id:
64
+ d.pop("api_key_id", None)
65
+ d.pop("api_key", None)
66
+ d["api_key"] = (self.api_key_id, self.api_key)
67
+ # This doesn't exist on the client init, remove:
68
+ d.pop("api_key_id", None)
69
+ return d
70
+
71
+
72
+ @dataclass
73
+ class SimpleElasticsearchConfig(BaseConnectorConfig):
74
+ """Connector config where:
75
+ url is the url to access the elasticsearch server,
76
+ index_name is the name of the index to reach to,
77
+ """
78
+
79
+ index_name: str
80
+ batch_size: int = 100
81
+ fields: t.List[str] = field(default_factory=list)
82
+ access_config: ElasticsearchAccessConfig = None
83
+
84
+
85
+ @dataclass
86
+ class ElasticsearchDocumentMeta:
87
+ """Metadata specifying:
88
+ name of the elasticsearch index that is being reached to,
89
+ and the id of document that is being reached to,
90
+ """
91
+
92
+ index_name: str
93
+ document_id: str
94
+
95
+
96
+ @dataclass
97
+ class ElasticsearchIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
98
+ """Class encapsulating fetching a doc and writing processed results (but not
99
+ doing the processing!).
100
+
101
+ Current implementation creates a python Elasticsearch client to fetch each doc,
102
+ rather than creating a client for each thread.
103
+ """
104
+
105
+ connector_config: SimpleElasticsearchConfig
106
+ document_meta: ElasticsearchDocumentMeta
107
+ document: dict = field(default_factory=dict)
108
+ registry_name: str = "elasticsearch"
109
+
110
+ # TODO: remove one of filename or _tmp_download_file, using a wrapper
111
+ @property
112
+ def filename(self):
113
+ f = self.document_meta.document_id
114
+ if self.connector_config.fields:
115
+ f = "{}-{}".format(
116
+ f,
117
+ hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8],
118
+ )
119
+ return (
120
+ Path(self.read_config.download_dir) / self.document_meta.index_name / f"{f}.txt"
121
+ ).resolve()
122
+
123
+ @property
124
+ def _output_filename(self):
125
+ """Create filename document id combined with a hash of the query to uniquely identify
126
+ the output file."""
127
+ # Generate SHA256 hash and take the first 8 characters
128
+ filename = self.document_meta.document_id
129
+ if self.connector_config.fields:
130
+ filename = "{}-{}".format(
131
+ filename,
132
+ hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8],
133
+ )
134
+ output_file = f"{filename}.json"
135
+ return (
136
+ Path(self.processor_config.output_dir) / self.connector_config.index_name / output_file
137
+ )
138
+
139
+ def update_source_metadata(self, **kwargs):
140
+ if self.document is None:
141
+ self.source_metadata = SourceMetadata(
142
+ exists=False,
143
+ )
144
+ return
145
+ self.source_metadata = SourceMetadata(
146
+ version=self.document["_version"],
147
+ exists=True,
148
+ )
149
+
150
+ @SourceConnectionError.wrap
151
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
152
+ @BaseSingleIngestDoc.skip_if_file_exists
153
+ def get_file(self):
154
+ pass
155
+
156
+ @property
157
+ def date_created(self) -> t.Optional[str]:
158
+ return None
159
+
160
+ @property
161
+ def date_modified(self) -> t.Optional[str]:
162
+ return None
163
+
164
+ @property
165
+ def source_url(self) -> t.Optional[str]:
166
+ return None
167
+
168
+ @property
169
+ def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
170
+ return {
171
+ "hosts": self.connector_config.access_config.hosts,
172
+ "index_name": self.connector_config.index_name,
173
+ "document_id": self.document_meta.document_id,
174
+ }
175
+
176
+
177
+ @dataclass
178
+ class ElasticsearchIngestDocBatch(BaseIngestDocBatch):
179
+ connector_config: SimpleElasticsearchConfig
180
+ ingest_docs: t.List[ElasticsearchIngestDoc] = field(default_factory=list)
181
+ list_of_ids: t.List[str] = field(default_factory=list)
182
+ registry_name: str = "elasticsearch_batch"
183
+
184
+ def __post_init__(self):
185
+ # Until python3.8 is deprecated, this is a limitation of dataclass inheritance
186
+ # to make it a required field
187
+ if len(self.list_of_ids) == 0:
188
+ raise ValueError("list_of_ids is required")
189
+
190
+ @property
191
+ def unique_id(self) -> str:
192
+ return ",".join(sorted(self.list_of_ids))
193
+
194
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
195
+ def _get_docs(self):
196
+ from elasticsearch import Elasticsearch
197
+ from elasticsearch.helpers import scan
198
+
199
+ es = Elasticsearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
200
+ scan_query = {
201
+ "_source": self.connector_config.fields,
202
+ "version": True,
203
+ "query": {"ids": {"values": self.list_of_ids}},
204
+ }
205
+
206
+ result = scan(
207
+ es,
208
+ query=scan_query,
209
+ scroll="1m",
210
+ index=self.connector_config.index_name,
211
+ )
212
+ return list(result)
213
+
214
+ @SourceConnectionError.wrap
215
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
216
+ def get_files(self):
217
+ documents = self._get_docs()
218
+ for doc in documents:
219
+ ingest_doc = ElasticsearchIngestDoc(
220
+ processor_config=self.processor_config,
221
+ read_config=self.read_config,
222
+ connector_config=self.connector_config,
223
+ document=doc,
224
+ document_meta=ElasticsearchDocumentMeta(
225
+ self.connector_config.index_name, doc["_id"]
226
+ ),
227
+ )
228
+ ingest_doc.update_source_metadata()
229
+ doc_body = doc["_source"]
230
+ filename = ingest_doc.filename
231
+ flattened_dict = flatten_dict(dictionary=doc_body)
232
+ str_values = [str(value) for value in flattened_dict.values()]
233
+ concatenated_values = "\n".join(str_values)
234
+
235
+ filename.parent.mkdir(parents=True, exist_ok=True)
236
+ with open(filename, "w", encoding="utf8") as f:
237
+ f.write(concatenated_values)
238
+ self.ingest_docs.append(ingest_doc)
239
+
240
+
241
+ @dataclass
242
+ class ElasticsearchSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
243
+ """Fetches particular fields from all documents in a given elasticsearch cluster and index"""
244
+
245
+ connector_config: SimpleElasticsearchConfig
246
+ _es: t.Optional["Elasticsearch"] = field(init=False, default=None)
247
+
248
+ @property
249
+ def es(self):
250
+ from elasticsearch import Elasticsearch
251
+
252
+ if self._es is None:
253
+ self._es = Elasticsearch(
254
+ **self.connector_config.access_config.to_dict(apply_name_overload=False)
255
+ )
256
+ return self._es
257
+
258
+ def check_connection(self):
259
+ try:
260
+ self.es.perform_request("HEAD", "/", headers={"accept": "application/json"})
261
+ except Exception as e:
262
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
263
+ raise SourceConnectionError(f"failed to validate connection: {e}")
264
+
265
+ def __post_init__(self):
266
+ self.scan_query: dict = {"stored_fields": [], "query": {"match_all": {}}}
267
+
268
+ def initialize(self):
269
+ pass
270
+
271
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
272
+ def _get_doc_ids(self):
273
+ """Fetches all document ids in an index"""
274
+ from elasticsearch.helpers import scan
275
+
276
+ hits = scan(
277
+ self.es,
278
+ query=self.scan_query,
279
+ scroll="1m",
280
+ index=self.connector_config.index_name,
281
+ )
282
+
283
+ return [hit["_id"] for hit in hits]
284
+
285
+ def get_ingest_docs(self):
286
+ """Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
287
+ ids = self._get_doc_ids()
288
+ id_batches = [
289
+ ids[
290
+ i
291
+ * self.connector_config.batch_size : (i + 1) # noqa
292
+ * self.connector_config.batch_size
293
+ ]
294
+ for i in range(
295
+ (len(ids) + self.connector_config.batch_size - 1)
296
+ // self.connector_config.batch_size
297
+ )
298
+ ]
299
+ return [
300
+ ElasticsearchIngestDocBatch(
301
+ connector_config=self.connector_config,
302
+ processor_config=self.processor_config,
303
+ read_config=self.read_config,
304
+ list_of_ids=batched_ids,
305
+ )
306
+ for batched_ids in id_batches
307
+ ]
308
+
309
+
310
+ @dataclass
311
+ class ElasticsearchWriteConfig(WriteConfig):
312
+ batch_size_bytes: int = 15_000_000
313
+ num_processes: int = 1
314
+
315
+
316
+ @dataclass
317
+ class ElasticsearchDestinationConnector(BaseDestinationConnector):
318
+ write_config: ElasticsearchWriteConfig
319
+ connector_config: SimpleElasticsearchConfig
320
+ _client: t.Optional["Elasticsearch"] = field(init=False, default=None)
321
+
322
+ def to_dict(self, **kwargs):
323
+ """
324
+ The _client variable in this dataclass breaks deepcopy due to:
325
+ TypeError: cannot pickle '_thread.lock' object
326
+ When serializing, remove it, meaning client data will need to be reinitialized
327
+ when deserialized
328
+ """
329
+ self_cp = copy.copy(self)
330
+ if hasattr(self_cp, "_client"):
331
+ setattr(self_cp, "_client", None)
332
+ return _asdict(self_cp, **kwargs)
333
+
334
+ @DestinationConnectionError.wrap
335
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
336
+ def generate_client(self) -> "Elasticsearch":
337
+ from elasticsearch import Elasticsearch
338
+
339
+ return Elasticsearch(
340
+ **self.connector_config.access_config.to_dict(apply_name_overload=False)
341
+ )
342
+
343
+ @property
344
+ def client(self):
345
+ if self._client is None:
346
+ self._client = self.generate_client()
347
+ return self._client
348
+
349
+ def initialize(self):
350
+ _ = self.client
351
+
352
+ @DestinationConnectionError.wrap
353
+ def check_connection(self):
354
+ try:
355
+ assert self.client.ping()
356
+ except Exception as e:
357
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
358
+ raise DestinationConnectionError(f"failed to validate connection: {e}")
359
+
360
+ @requires_dependencies(["elasticsearch"], extras="elasticsearch")
361
+ def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
362
+ logger.info(
363
+ f"writing document batches to destination"
364
+ f" index named {self.connector_config.index_name}"
365
+ f" at {self.connector_config.access_config.hosts}"
366
+ f" with batch size (in bytes) {self.write_config.batch_size_bytes}"
367
+ f" with {self.write_config.num_processes} (number of) processes"
368
+ )
369
+ from elasticsearch.helpers import parallel_bulk
370
+
371
+ for batch in generator_batching_wbytes(
372
+ elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes
373
+ ):
374
+ for success, info in parallel_bulk(
375
+ self.client, batch, thread_count=self.write_config.num_processes
376
+ ):
377
+ if not success:
378
+ logger.error(
379
+ "upload failed for a batch in elasticsearch destination connector:", info
380
+ )
381
+
382
+ def normalize_dict(self, element_dict: dict) -> dict:
383
+ return {
384
+ "_index": self.connector_config.index_name,
385
+ "_id": str(uuid.uuid4()),
386
+ "_source": {
387
+ "element_id": element_dict.pop("element_id", None),
388
+ "embeddings": element_dict.pop("embeddings", None),
389
+ "text": element_dict.pop("text", None),
390
+ "type": element_dict.pop("type", None),
391
+ "metadata": flatten_dict(
392
+ element_dict.pop("metadata", None),
393
+ separator="-",
394
+ ),
395
+ },
396
+ }
File without changes
@@ -0,0 +1,78 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.connector.fsspec.fsspec import (
5
+ FsspecDestinationConnector,
6
+ FsspecIngestDoc,
7
+ FsspecSourceConnector,
8
+ FsspecWriteConfig,
9
+ SimpleFsspecConfig,
10
+ WriteTextConfig,
11
+ )
12
+ from unstructured_ingest.enhanced_dataclass import enhanced_field
13
+ from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
14
+ from unstructured_ingest.interfaces import AccessConfig
15
+ from unstructured_ingest.logger import logger
16
+ from unstructured_ingest.utils.dep_check import requires_dependencies
17
+
18
+
19
+ @dataclass
20
+ class AzureWriteTextConfig(WriteTextConfig):
21
+ overwrite: bool = False
22
+
23
+
24
+ @dataclass
25
+ class AzureWriteConfig(FsspecWriteConfig):
26
+ write_text_config: t.Optional[AzureWriteTextConfig] = None
27
+
28
+
29
+ @dataclass
30
+ class AzureAccessConfig(AccessConfig):
31
+ account_name: t.Optional[str] = enhanced_field(default=None, sensitive=True)
32
+ account_key: t.Optional[str] = enhanced_field(default=None, sensitive=True)
33
+ connection_string: t.Optional[str] = enhanced_field(default=None, sensitive=True)
34
+ sas_token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
35
+
36
+
37
+ @dataclass
38
+ class SimpleAzureBlobStorageConfig(SimpleFsspecConfig):
39
+ access_config: AzureAccessConfig = None
40
+
41
+
42
+ @dataclass
43
+ class AzureBlobStorageIngestDoc(FsspecIngestDoc):
44
+ connector_config: SimpleAzureBlobStorageConfig
45
+ registry_name: str = "azure"
46
+
47
+ @SourceConnectionError.wrap
48
+ @requires_dependencies(["adlfs", "fsspec"], extras="azure")
49
+ def get_file(self):
50
+ super().get_file()
51
+
52
+
53
+ @dataclass
54
+ class AzureBlobStorageSourceConnector(FsspecSourceConnector):
55
+ connector_config: SimpleAzureBlobStorageConfig
56
+
57
+ def __post_init__(self):
58
+ self.ingest_doc_cls: t.Type[AzureBlobStorageIngestDoc] = AzureBlobStorageIngestDoc
59
+
60
+
61
+ @dataclass
62
+ class AzureBlobStorageDestinationConnector(FsspecDestinationConnector):
63
+ connector_config: SimpleAzureBlobStorageConfig
64
+ write_config: AzureWriteConfig
65
+
66
+ @requires_dependencies(["adlfs", "fsspec"], extras="azure")
67
+ def initialize(self):
68
+ super().initialize()
69
+
70
+ @requires_dependencies(["adlfs"], extras="azure")
71
+ def check_connection(self):
72
+ from adlfs import AzureBlobFileSystem
73
+
74
+ try:
75
+ AzureBlobFileSystem(**self.connector_config.get_access_config())
76
+ except ValueError as connection_error:
77
+ logger.error(f"failed to validate connection: {connection_error}", exc_info=True)
78
+ raise DestinationConnectionError(f"failed to validate connection: {connection_error}")
@@ -0,0 +1,109 @@
1
+ """
2
+ Box Connector
3
+ Box does not make it simple to download files with an App.
4
+ First of all, this does not work with a free Box account.
5
+ Make sure the App service email is a collaborator for your folder (co-owner or editor)
6
+ Make sure you have the 'write all files' application scope
7
+ Maybe check 'Make api calls as the as-user header'
8
+ REAUTHORIZE app after making any of the above changes
9
+ """
10
+
11
+ import typing as t
12
+ from dataclasses import dataclass
13
+
14
+ from unstructured_ingest.connector.fsspec.fsspec import (
15
+ FsspecDestinationConnector,
16
+ FsspecIngestDoc,
17
+ FsspecSourceConnector,
18
+ FsspecWriteConfig,
19
+ SimpleFsspecConfig,
20
+ )
21
+ from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
22
+ from unstructured_ingest.interfaces import AccessConfig
23
+ from unstructured_ingest.logger import logger
24
+ from unstructured_ingest.utils.dep_check import requires_dependencies
25
+
26
+
27
+ class AccessTokenError(Exception):
28
+ """There is a problem with the Access Token."""
29
+
30
+
31
+ @dataclass
32
+ class BoxWriteConfig(FsspecWriteConfig):
33
+ pass
34
+
35
+
36
+ @dataclass
37
+ class BoxAccessConfig(AccessConfig):
38
+ box_app_config: t.Optional[str] = None
39
+
40
+
41
+ @dataclass
42
+ class SimpleBoxConfig(SimpleFsspecConfig):
43
+ access_config: BoxAccessConfig = None
44
+
45
+ @requires_dependencies(["boxfs"], extras="box")
46
+ def get_access_config(self) -> dict:
47
+ # Return access_kwargs with oauth. The oauth object can not be stored directly in the config
48
+ # because it is not serializable.
49
+ from boxsdk import JWTAuth
50
+
51
+ access_kwargs_with_oauth: dict[str, t.Any] = {
52
+ "oauth": JWTAuth.from_settings_file(
53
+ self.access_config.box_app_config,
54
+ ),
55
+ }
56
+ access_config: dict[str, t.Any] = self.access_config.to_dict()
57
+ access_config.pop("box_app_config", None)
58
+ access_kwargs_with_oauth.update(access_config)
59
+
60
+ return access_kwargs_with_oauth
61
+
62
+
63
+ @dataclass
64
+ class BoxIngestDoc(FsspecIngestDoc):
65
+ connector_config: SimpleBoxConfig
66
+ registry_name: str = "box"
67
+
68
+ @SourceConnectionError.wrap
69
+ @requires_dependencies(["boxfs", "fsspec"], extras="box")
70
+ def get_file(self):
71
+ super().get_file()
72
+
73
+
74
+ @dataclass
75
+ class BoxSourceConnector(FsspecSourceConnector):
76
+ connector_config: SimpleBoxConfig
77
+
78
+ @requires_dependencies(["boxfs"], extras="box")
79
+ def check_connection(self):
80
+ from boxfs import BoxFileSystem
81
+
82
+ try:
83
+ BoxFileSystem(**self.connector_config.get_access_config())
84
+ except Exception as e:
85
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
86
+ raise SourceConnectionError(f"failed to validate connection: {e}")
87
+
88
+ def __post_init__(self):
89
+ self.ingest_doc_cls: t.Type[BoxIngestDoc] = BoxIngestDoc
90
+
91
+
92
+ @dataclass
93
+ class BoxDestinationConnector(FsspecDestinationConnector):
94
+ connector_config: SimpleBoxConfig
95
+ write_config: BoxWriteConfig
96
+
97
+ @requires_dependencies(["boxfs", "fsspec"], extras="box")
98
+ def initialize(self):
99
+ super().initialize()
100
+
101
+ @requires_dependencies(["boxfs"], extras="box")
102
+ def check_connection(self):
103
+ from boxfs import BoxFileSystem
104
+
105
+ try:
106
+ BoxFileSystem(**self.connector_config.get_access_config())
107
+ except Exception as e:
108
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
109
+ raise DestinationConnectionError(f"failed to validate connection: {e}")