unstructured-ingest 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (356) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/__init__.py +14 -0
  4. unstructured_ingest/cli/base/__init__.py +0 -0
  5. unstructured_ingest/cli/base/cmd.py +19 -0
  6. unstructured_ingest/cli/base/dest.py +87 -0
  7. unstructured_ingest/cli/base/src.py +57 -0
  8. unstructured_ingest/cli/cli.py +32 -0
  9. unstructured_ingest/cli/cmd_factory.py +12 -0
  10. unstructured_ingest/cli/cmds/__init__.py +145 -0
  11. unstructured_ingest/cli/cmds/airtable.py +69 -0
  12. unstructured_ingest/cli/cmds/astra.py +99 -0
  13. unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
  14. unstructured_ingest/cli/cmds/biomed.py +52 -0
  15. unstructured_ingest/cli/cmds/chroma.py +104 -0
  16. unstructured_ingest/cli/cmds/clarifai.py +71 -0
  17. unstructured_ingest/cli/cmds/confluence.py +69 -0
  18. unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
  19. unstructured_ingest/cli/cmds/delta_table.py +94 -0
  20. unstructured_ingest/cli/cmds/discord.py +47 -0
  21. unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
  22. unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  23. unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
  24. unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
  25. unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
  26. unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
  27. unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
  28. unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
  29. unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
  30. unstructured_ingest/cli/cmds/github.py +54 -0
  31. unstructured_ingest/cli/cmds/gitlab.py +54 -0
  32. unstructured_ingest/cli/cmds/google_drive.py +49 -0
  33. unstructured_ingest/cli/cmds/hubspot.py +70 -0
  34. unstructured_ingest/cli/cmds/jira.py +71 -0
  35. unstructured_ingest/cli/cmds/kafka.py +102 -0
  36. unstructured_ingest/cli/cmds/local.py +43 -0
  37. unstructured_ingest/cli/cmds/mongodb.py +72 -0
  38. unstructured_ingest/cli/cmds/notion.py +48 -0
  39. unstructured_ingest/cli/cmds/onedrive.py +66 -0
  40. unstructured_ingest/cli/cmds/opensearch.py +117 -0
  41. unstructured_ingest/cli/cmds/outlook.py +67 -0
  42. unstructured_ingest/cli/cmds/pinecone.py +71 -0
  43. unstructured_ingest/cli/cmds/qdrant.py +124 -0
  44. unstructured_ingest/cli/cmds/reddit.py +67 -0
  45. unstructured_ingest/cli/cmds/salesforce.py +58 -0
  46. unstructured_ingest/cli/cmds/sharepoint.py +66 -0
  47. unstructured_ingest/cli/cmds/slack.py +56 -0
  48. unstructured_ingest/cli/cmds/sql.py +66 -0
  49. unstructured_ingest/cli/cmds/vectara.py +66 -0
  50. unstructured_ingest/cli/cmds/weaviate.py +98 -0
  51. unstructured_ingest/cli/cmds/wikipedia.py +40 -0
  52. unstructured_ingest/cli/common.py +7 -0
  53. unstructured_ingest/cli/interfaces.py +656 -0
  54. unstructured_ingest/cli/utils.py +205 -0
  55. unstructured_ingest/connector/__init__.py +0 -0
  56. unstructured_ingest/connector/airtable.py +309 -0
  57. unstructured_ingest/connector/astra.py +237 -0
  58. unstructured_ingest/connector/azure_cognitive_search.py +144 -0
  59. unstructured_ingest/connector/biomed.py +313 -0
  60. unstructured_ingest/connector/chroma.py +158 -0
  61. unstructured_ingest/connector/clarifai.py +122 -0
  62. unstructured_ingest/connector/confluence.py +285 -0
  63. unstructured_ingest/connector/databricks_volumes.py +137 -0
  64. unstructured_ingest/connector/delta_table.py +203 -0
  65. unstructured_ingest/connector/discord.py +180 -0
  66. unstructured_ingest/connector/elasticsearch.py +396 -0
  67. unstructured_ingest/connector/fsspec/__init__.py +0 -0
  68. unstructured_ingest/connector/fsspec/azure.py +78 -0
  69. unstructured_ingest/connector/fsspec/box.py +109 -0
  70. unstructured_ingest/connector/fsspec/dropbox.py +160 -0
  71. unstructured_ingest/connector/fsspec/fsspec.py +359 -0
  72. unstructured_ingest/connector/fsspec/gcs.py +82 -0
  73. unstructured_ingest/connector/fsspec/s3.py +62 -0
  74. unstructured_ingest/connector/fsspec/sftp.py +81 -0
  75. unstructured_ingest/connector/git.py +124 -0
  76. unstructured_ingest/connector/github.py +173 -0
  77. unstructured_ingest/connector/gitlab.py +142 -0
  78. unstructured_ingest/connector/google_drive.py +349 -0
  79. unstructured_ingest/connector/hubspot.py +278 -0
  80. unstructured_ingest/connector/jira.py +469 -0
  81. unstructured_ingest/connector/kafka.py +294 -0
  82. unstructured_ingest/connector/local.py +139 -0
  83. unstructured_ingest/connector/mongodb.py +285 -0
  84. unstructured_ingest/connector/notion/__init__.py +0 -0
  85. unstructured_ingest/connector/notion/client.py +233 -0
  86. unstructured_ingest/connector/notion/connector.py +468 -0
  87. unstructured_ingest/connector/notion/helpers.py +584 -0
  88. unstructured_ingest/connector/notion/interfaces.py +32 -0
  89. unstructured_ingest/connector/notion/types/__init__.py +0 -0
  90. unstructured_ingest/connector/notion/types/block.py +95 -0
  91. unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
  92. unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
  93. unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
  94. unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
  95. unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
  96. unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
  97. unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
  98. unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
  99. unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
  100. unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
  101. unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
  102. unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
  103. unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
  104. unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
  105. unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
  106. unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
  107. unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
  108. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
  109. unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
  110. unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
  111. unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
  112. unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
  113. unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
  114. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
  115. unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
  116. unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
  117. unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
  118. unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
  119. unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
  120. unstructured_ingest/connector/notion/types/database.py +72 -0
  121. unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
  122. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
  123. unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
  124. unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
  125. unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
  126. unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
  127. unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
  128. unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
  129. unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
  130. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
  131. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
  132. unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
  133. unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
  134. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
  135. unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
  136. unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
  137. unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
  138. unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
  139. unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
  140. unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
  141. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
  142. unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
  143. unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
  144. unstructured_ingest/connector/notion/types/date.py +26 -0
  145. unstructured_ingest/connector/notion/types/file.py +51 -0
  146. unstructured_ingest/connector/notion/types/page.py +44 -0
  147. unstructured_ingest/connector/notion/types/parent.py +66 -0
  148. unstructured_ingest/connector/notion/types/rich_text.py +189 -0
  149. unstructured_ingest/connector/notion/types/user.py +76 -0
  150. unstructured_ingest/connector/onedrive.py +232 -0
  151. unstructured_ingest/connector/opensearch.py +218 -0
  152. unstructured_ingest/connector/outlook.py +285 -0
  153. unstructured_ingest/connector/pinecone.py +140 -0
  154. unstructured_ingest/connector/qdrant.py +144 -0
  155. unstructured_ingest/connector/reddit.py +166 -0
  156. unstructured_ingest/connector/registry.py +109 -0
  157. unstructured_ingest/connector/salesforce.py +301 -0
  158. unstructured_ingest/connector/sharepoint.py +573 -0
  159. unstructured_ingest/connector/slack.py +224 -0
  160. unstructured_ingest/connector/sql.py +199 -0
  161. unstructured_ingest/connector/vectara.py +248 -0
  162. unstructured_ingest/connector/weaviate.py +190 -0
  163. unstructured_ingest/connector/wikipedia.py +208 -0
  164. unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
  165. unstructured_ingest/enhanced_dataclass/core.py +99 -0
  166. unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
  167. unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
  168. unstructured_ingest/error.py +49 -0
  169. unstructured_ingest/evaluate.py +338 -0
  170. unstructured_ingest/ingest_backoff/__init__.py +3 -0
  171. unstructured_ingest/ingest_backoff/_common.py +102 -0
  172. unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
  173. unstructured_ingest/interfaces.py +838 -0
  174. unstructured_ingest/logger.py +130 -0
  175. unstructured_ingest/main.py +11 -0
  176. unstructured_ingest/pipeline/__init__.py +22 -0
  177. unstructured_ingest/pipeline/copy.py +19 -0
  178. unstructured_ingest/pipeline/doc_factory.py +12 -0
  179. unstructured_ingest/pipeline/interfaces.py +265 -0
  180. unstructured_ingest/pipeline/partition.py +60 -0
  181. unstructured_ingest/pipeline/permissions.py +12 -0
  182. unstructured_ingest/pipeline/pipeline.py +117 -0
  183. unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  184. unstructured_ingest/pipeline/reformat/chunking.py +130 -0
  185. unstructured_ingest/pipeline/reformat/embedding.py +66 -0
  186. unstructured_ingest/pipeline/source.py +77 -0
  187. unstructured_ingest/pipeline/utils.py +6 -0
  188. unstructured_ingest/pipeline/write.py +18 -0
  189. unstructured_ingest/processor.py +93 -0
  190. unstructured_ingest/runner/__init__.py +104 -0
  191. unstructured_ingest/runner/airtable.py +35 -0
  192. unstructured_ingest/runner/astra.py +34 -0
  193. unstructured_ingest/runner/base_runner.py +89 -0
  194. unstructured_ingest/runner/biomed.py +45 -0
  195. unstructured_ingest/runner/confluence.py +35 -0
  196. unstructured_ingest/runner/delta_table.py +34 -0
  197. unstructured_ingest/runner/discord.py +35 -0
  198. unstructured_ingest/runner/elasticsearch.py +40 -0
  199. unstructured_ingest/runner/fsspec/__init__.py +0 -0
  200. unstructured_ingest/runner/fsspec/azure.py +30 -0
  201. unstructured_ingest/runner/fsspec/box.py +28 -0
  202. unstructured_ingest/runner/fsspec/dropbox.py +30 -0
  203. unstructured_ingest/runner/fsspec/fsspec.py +40 -0
  204. unstructured_ingest/runner/fsspec/gcs.py +28 -0
  205. unstructured_ingest/runner/fsspec/s3.py +28 -0
  206. unstructured_ingest/runner/fsspec/sftp.py +28 -0
  207. unstructured_ingest/runner/github.py +37 -0
  208. unstructured_ingest/runner/gitlab.py +37 -0
  209. unstructured_ingest/runner/google_drive.py +35 -0
  210. unstructured_ingest/runner/hubspot.py +35 -0
  211. unstructured_ingest/runner/jira.py +35 -0
  212. unstructured_ingest/runner/kafka.py +34 -0
  213. unstructured_ingest/runner/local.py +23 -0
  214. unstructured_ingest/runner/mongodb.py +34 -0
  215. unstructured_ingest/runner/notion.py +61 -0
  216. unstructured_ingest/runner/onedrive.py +35 -0
  217. unstructured_ingest/runner/opensearch.py +40 -0
  218. unstructured_ingest/runner/outlook.py +33 -0
  219. unstructured_ingest/runner/reddit.py +35 -0
  220. unstructured_ingest/runner/salesforce.py +33 -0
  221. unstructured_ingest/runner/sharepoint.py +35 -0
  222. unstructured_ingest/runner/slack.py +33 -0
  223. unstructured_ingest/runner/utils.py +47 -0
  224. unstructured_ingest/runner/wikipedia.py +35 -0
  225. unstructured_ingest/runner/writers/__init__.py +48 -0
  226. unstructured_ingest/runner/writers/astra.py +22 -0
  227. unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
  228. unstructured_ingest/runner/writers/base_writer.py +26 -0
  229. unstructured_ingest/runner/writers/chroma.py +22 -0
  230. unstructured_ingest/runner/writers/clarifai.py +19 -0
  231. unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
  232. unstructured_ingest/runner/writers/delta_table.py +24 -0
  233. unstructured_ingest/runner/writers/elasticsearch.py +24 -0
  234. unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  235. unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
  236. unstructured_ingest/runner/writers/fsspec/box.py +21 -0
  237. unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
  238. unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
  239. unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
  240. unstructured_ingest/runner/writers/kafka.py +21 -0
  241. unstructured_ingest/runner/writers/mongodb.py +21 -0
  242. unstructured_ingest/runner/writers/opensearch.py +26 -0
  243. unstructured_ingest/runner/writers/pinecone.py +21 -0
  244. unstructured_ingest/runner/writers/qdrant.py +19 -0
  245. unstructured_ingest/runner/writers/sql.py +22 -0
  246. unstructured_ingest/runner/writers/vectara.py +22 -0
  247. unstructured_ingest/runner/writers/weaviate.py +21 -0
  248. unstructured_ingest/utils/__init__.py +0 -0
  249. unstructured_ingest/utils/compression.py +117 -0
  250. unstructured_ingest/utils/data_prep.py +112 -0
  251. unstructured_ingest/utils/dep_check.py +66 -0
  252. unstructured_ingest/utils/string_and_date_utils.py +39 -0
  253. unstructured_ingest/utils/table.py +73 -0
  254. unstructured_ingest/v2/__init__.py +1 -0
  255. unstructured_ingest/v2/cli/__init__.py +0 -0
  256. unstructured_ingest/v2/cli/base/__init__.py +4 -0
  257. unstructured_ingest/v2/cli/base/cmd.py +215 -0
  258. unstructured_ingest/v2/cli/base/dest.py +76 -0
  259. unstructured_ingest/v2/cli/base/importer.py +34 -0
  260. unstructured_ingest/v2/cli/base/src.py +70 -0
  261. unstructured_ingest/v2/cli/cli.py +24 -0
  262. unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
  263. unstructured_ingest/v2/cli/cmds/astra.py +85 -0
  264. unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
  265. unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
  266. unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
  267. unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
  268. unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
  269. unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
  270. unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
  271. unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
  272. unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
  273. unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
  274. unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
  275. unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
  276. unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
  277. unstructured_ingest/v2/cli/cmds/local.py +60 -0
  278. unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
  279. unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
  280. unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
  281. unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
  282. unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
  283. unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
  284. unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
  285. unstructured_ingest/v2/cli/cmds/sql.py +84 -0
  286. unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
  287. unstructured_ingest/v2/cli/configs/__init__.py +6 -0
  288. unstructured_ingest/v2/cli/configs/chunk.py +89 -0
  289. unstructured_ingest/v2/cli/configs/embed.py +74 -0
  290. unstructured_ingest/v2/cli/configs/partition.py +99 -0
  291. unstructured_ingest/v2/cli/configs/processor.py +88 -0
  292. unstructured_ingest/v2/cli/interfaces.py +27 -0
  293. unstructured_ingest/v2/cli/utils.py +240 -0
  294. unstructured_ingest/v2/example.py +37 -0
  295. unstructured_ingest/v2/interfaces/__init__.py +29 -0
  296. unstructured_ingest/v2/interfaces/connector.py +32 -0
  297. unstructured_ingest/v2/interfaces/downloader.py +79 -0
  298. unstructured_ingest/v2/interfaces/file_data.py +49 -0
  299. unstructured_ingest/v2/interfaces/indexer.py +28 -0
  300. unstructured_ingest/v2/interfaces/process.py +20 -0
  301. unstructured_ingest/v2/interfaces/processor.py +48 -0
  302. unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
  303. unstructured_ingest/v2/interfaces/uploader.py +39 -0
  304. unstructured_ingest/v2/logger.py +126 -0
  305. unstructured_ingest/v2/main.py +11 -0
  306. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  307. unstructured_ingest/v2/pipeline/interfaces.py +167 -0
  308. unstructured_ingest/v2/pipeline/pipeline.py +284 -0
  309. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  310. unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
  311. unstructured_ingest/v2/pipeline/steps/download.py +124 -0
  312. unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
  313. unstructured_ingest/v2/pipeline/steps/index.py +61 -0
  314. unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
  315. unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
  316. unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
  317. unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
  318. unstructured_ingest/v2/pipeline/utils.py +15 -0
  319. unstructured_ingest/v2/processes/__init__.py +0 -0
  320. unstructured_ingest/v2/processes/chunker.py +97 -0
  321. unstructured_ingest/v2/processes/connector_registry.py +63 -0
  322. unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
  323. unstructured_ingest/v2/processes/connectors/astra.py +152 -0
  324. unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
  325. unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
  326. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
  327. unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
  328. unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
  329. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
  330. unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
  331. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
  332. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
  333. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
  334. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
  335. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
  336. unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
  337. unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
  338. unstructured_ingest/v2/processes/connectors/local.py +204 -0
  339. unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
  340. unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
  341. unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
  342. unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
  343. unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
  344. unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
  345. unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
  346. unstructured_ingest/v2/processes/connectors/sql.py +269 -0
  347. unstructured_ingest/v2/processes/connectors/utils.py +19 -0
  348. unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
  349. unstructured_ingest/v2/processes/embedder.py +76 -0
  350. unstructured_ingest/v2/processes/partitioner.py +166 -0
  351. unstructured_ingest/v2/processes/uncompress.py +43 -0
  352. unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
  353. unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
  354. unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
  355. unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
  356. unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,24 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.fsspec.azure import (
9
+ AzureWriteConfig,
10
+ SimpleAzureBlobStorageConfig,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class AzureWriter(Writer):
16
+ connector_config: "SimpleAzureBlobStorageConfig"
17
+ write_config: "AzureWriteConfig"
18
+
19
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
20
+ from unstructured_ingest.connector.fsspec.azure import (
21
+ AzureBlobStorageDestinationConnector,
22
+ )
23
+
24
+ return AzureBlobStorageDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
9
+
10
+
11
+ @dataclass
12
+ class BoxWriter(Writer):
13
+ connector_config: "SimpleBoxConfig"
14
+ write_config: "BoxWriteConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.fsspec.box import (
18
+ BoxDestinationConnector,
19
+ )
20
+
21
+ return BoxDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.fsspec.dropbox import DropboxWriteConfig, SimpleDropboxConfig
9
+
10
+
11
+ @dataclass
12
+ class DropboxWriter(Writer):
13
+ connector_config: "SimpleDropboxConfig"
14
+ write_config: "DropboxWriteConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.fsspec.dropbox import (
18
+ DropboxDestinationConnector,
19
+ )
20
+
21
+ return DropboxDestinationConnector
@@ -0,0 +1,19 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
9
+
10
+
11
+ @dataclass
12
+ class GcsWriter(Writer):
13
+ connector_config: "SimpleGcsConfig"
14
+ write_config: "GcsWriteConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.fsspec.gcs import GcsDestinationConnector
18
+
19
+ return GcsDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
9
+
10
+
11
+ @dataclass
12
+ class S3Writer(Writer):
13
+ connector_config: "SimpleS3Config"
14
+ write_config: "S3WriteConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.fsspec.s3 import (
18
+ S3DestinationConnector,
19
+ )
20
+
21
+ return S3DestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig
9
+
10
+
11
+ @dataclass
12
+ class KafkaWriter(Writer):
13
+ write_config: "KafkaWriteConfig"
14
+ connector_config: "SimpleKafkaConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.kafka import (
18
+ KafkaDestinationConnector,
19
+ )
20
+
21
+ return KafkaDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.mongodb import MongoDBWriteConfig, SimpleMongoDBConfig
9
+
10
+
11
+ @dataclass
12
+ class MongodbWriter(Writer):
13
+ write_config: "MongoDBWriteConfig"
14
+ connector_config: "SimpleMongoDBConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.mongodb import (
18
+ MongoDBDestinationConnector,
19
+ )
20
+
21
+ return MongoDBDestinationConnector
@@ -0,0 +1,26 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.elasticsearch import (
9
+ ElasticsearchWriteConfig,
10
+ )
11
+ from unstructured_ingest.connector.opensearch import (
12
+ SimpleOpenSearchConfig,
13
+ )
14
+
15
+
16
+ @dataclass
17
+ class OpenSearchWriter(Writer):
18
+ connector_config: "SimpleOpenSearchConfig"
19
+ write_config: "ElasticsearchWriteConfig"
20
+
21
+ def get_connector_cls(self) -> BaseDestinationConnector:
22
+ from unstructured_ingest.connector.opensearch import (
23
+ OpenSearchDestinationConnector,
24
+ )
25
+
26
+ return OpenSearchDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig
9
+
10
+
11
+ @dataclass
12
+ class PineconeWriter(Writer):
13
+ write_config: "PineconeWriteConfig"
14
+ connector_config: "SimplePineconeConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.pinecone import (
18
+ PineconeDestinationConnector,
19
+ )
20
+
21
+ return PineconeDestinationConnector
@@ -0,0 +1,19 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig
9
+
10
+
11
+ @dataclass
12
+ class QdrantWriter(Writer):
13
+ write_config: "QdrantWriteConfig"
14
+ connector_config: "SimpleQdrantConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.qdrant import QdrantDestinationConnector
18
+
19
+ return QdrantDestinationConnector
@@ -0,0 +1,22 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.sql import SimpleSqlConfig
9
+ from unstructured_ingest.interfaces import WriteConfig
10
+
11
+
12
+ @dataclass
13
+ class SqlWriter(Writer):
14
+ write_config: "WriteConfig"
15
+ connector_config: "SimpleSqlConfig"
16
+
17
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
18
+ from unstructured_ingest.connector.sql import (
19
+ SqlDestinationConnector,
20
+ )
21
+
22
+ return SqlDestinationConnector
@@ -0,0 +1,22 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
5
+ from unstructured_ingest.interfaces import BaseDestinationConnector
6
+ from unstructured_ingest.runner.writers.base_writer import Writer
7
+
8
+ if t.TYPE_CHECKING:
9
+ from unstructured_ingest.connector.vectara import SimpleVectaraConfig, VectaraWriteConfig
10
+
11
+
12
+ @dataclass
13
+ class VectaraWriter(Writer, EnhancedDataClassJsonMixin):
14
+ write_config: "VectaraWriteConfig"
15
+ connector_config: "SimpleVectaraConfig"
16
+
17
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
18
+ from unstructured_ingest.connector.vectara import (
19
+ VectaraDestinationConnector,
20
+ )
21
+
22
+ return VectaraDestinationConnector
@@ -0,0 +1,21 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.interfaces import BaseDestinationConnector
5
+ from unstructured_ingest.runner.writers.base_writer import Writer
6
+
7
+ if t.TYPE_CHECKING:
8
+ from unstructured_ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig
9
+
10
+
11
+ @dataclass
12
+ class WeaviateWriter(Writer):
13
+ write_config: "WeaviateWriteConfig"
14
+ connector_config: "SimpleWeaviateConfig"
15
+
16
+ def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
17
+ from unstructured_ingest.connector.weaviate import (
18
+ WeaviateDestinationConnector,
19
+ )
20
+
21
+ return WeaviateDestinationConnector
File without changes
@@ -0,0 +1,117 @@
1
+ import copy
2
+ import os
3
+ import sys
4
+ import tarfile
5
+ import zipfile
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import List, Optional
9
+
10
+ from unstructured_ingest.connector.local import LocalSourceConnector, SimpleLocalConfig
11
+ from unstructured_ingest.interfaces import (
12
+ BaseConnectorConfig,
13
+ BaseSingleIngestDoc,
14
+ ProcessorConfig,
15
+ ReadConfig,
16
+ )
17
+ from unstructured_ingest.logger import logger
18
+
19
+ ZIP_FILE_EXT = [".zip"]
20
+ TAR_FILE_EXT = [".tar", ".tar.gz", ".tgz"]
21
+
22
+
23
+ def uncompress_file(filename: str, path: Optional[str] = None) -> str:
24
+ """
25
+ Takes in a compressed zip or tar file and uncompresses it
26
+ """
27
+ # Create path if it doesn't already exist
28
+ if path:
29
+ Path(path).mkdir(parents=True, exist_ok=True)
30
+
31
+ if any(filename.endswith(ext) for ext in ZIP_FILE_EXT):
32
+ return uncompress_zip_file(zip_filename=filename, path=path)
33
+ elif any(filename.endswith(ext) for ext in TAR_FILE_EXT):
34
+ return uncompress_tar_file(tar_filename=filename, path=path)
35
+ else:
36
+ raise ValueError(
37
+ "filename {} not a recognized compressed extension: {}".format(
38
+ filename,
39
+ ", ".join(ZIP_FILE_EXT + TAR_FILE_EXT),
40
+ ),
41
+ )
42
+
43
+
44
+ def uncompress_zip_file(zip_filename: str, path: Optional[str] = None) -> str:
45
+ head, tail = os.path.split(zip_filename)
46
+ for ext in ZIP_FILE_EXT:
47
+ if tail.endswith(ext):
48
+ tail = tail[: -(len(ext))]
49
+ break
50
+ path = path if path else os.path.join(head, f"{tail}-zip-uncompressed")
51
+ logger.info(f"extracting zip {zip_filename} -> {path}")
52
+ with zipfile.ZipFile(zip_filename) as zfile:
53
+ zfile.extractall(path=path)
54
+ return path
55
+
56
+
57
+ def uncompress_tar_file(tar_filename: str, path: Optional[str] = None) -> str:
58
+ head, tail = os.path.split(tar_filename)
59
+ for ext in TAR_FILE_EXT:
60
+ if tail.endswith(ext):
61
+ tail = tail[: -(len(ext))]
62
+ break
63
+
64
+ path = path if path else os.path.join(head, f"{tail}-tar-uncompressed")
65
+ logger.info(f"extracting tar {tar_filename} -> {path}")
66
+ with tarfile.open(tar_filename, "r:gz") as tfile:
67
+ # NOTE(robinson: Mitigate against malicious content being extracted from the tar file.
68
+ # This was added in Python 3.12
69
+ # Ref: https://docs.python.org/3/library/tarfile.html#extraction-filters
70
+ if sys.version_info >= (3, 12):
71
+ tfile.extraction_filter = tarfile.tar_filter
72
+ else:
73
+ logger.warning(
74
+ "Extraction filtering for tar files is available for Python 3.12 and above. "
75
+ "Consider upgrading your Python version to improve security. "
76
+ "See https://docs.python.org/3/library/tarfile.html#extraction-filters"
77
+ )
78
+ tfile.extractall(path=path)
79
+ return path
80
+
81
+
82
+ @dataclass
83
+ class CompressionSourceConnectorMixin:
84
+ processor_config: ProcessorConfig
85
+ read_config: ReadConfig
86
+ connector_config: BaseConnectorConfig
87
+
88
+ def process_compressed_doc(self, doc: BaseSingleIngestDoc) -> List[BaseSingleIngestDoc]:
89
+ """
90
+ Utility function which helps process compressed files. Extracts the contents and returns
91
+ generated ingest docs via local source connector
92
+ """
93
+ # Download the raw file to local
94
+ doc.get_file()
95
+ path = uncompress_file(filename=str(doc.filename))
96
+ new_read_configs = copy.copy(self.read_config)
97
+ new_process_configs = copy.copy(self.processor_config)
98
+ relative_path = path.replace(self.read_config.download_dir, "")
99
+
100
+ if self.processor_config.output_dir.endswith(os.sep):
101
+ new_process_configs.output_dir = f"{self.processor_config.output_dir}{relative_path}"
102
+ else:
103
+ new_process_configs.output_dir = (
104
+ f"{self.processor_config.output_dir}{os.sep}{relative_path}"
105
+ )
106
+
107
+ local_connector = LocalSourceConnector(
108
+ connector_config=SimpleLocalConfig(
109
+ input_path=path,
110
+ recursive=True,
111
+ ),
112
+ read_config=new_read_configs,
113
+ processor_config=new_process_configs,
114
+ )
115
+ logger.info(f"Created local source connector: {local_connector.to_json()}")
116
+ local_connector.initialize()
117
+ return local_connector.get_ingest_docs()
@@ -0,0 +1,112 @@
1
+ import itertools
2
+ import json
3
+ from datetime import datetime
4
+ from typing import Any, Optional, Sequence, cast
5
+
6
+ DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d+%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z")
7
+
8
+
9
+ def batch_generator(iterable, batch_size=100):
10
+ """A helper function to break an iterable into batches of size batch_size."""
11
+ it = iter(iterable)
12
+ chunk = tuple(itertools.islice(it, batch_size))
13
+ while chunk:
14
+ yield chunk
15
+ chunk = tuple(itertools.islice(it, batch_size))
16
+
17
+
18
+ def generator_batching_wbytes(iterable, batch_size_limit_bytes=15_000_000):
19
+ """A helper function to break an iterable into chunks of specified bytes."""
20
+ current_batch, current_batch_size = [], 0
21
+
22
+ for item in iterable:
23
+ item_size_bytes = len(json.dumps(item).encode("utf-8"))
24
+
25
+ if current_batch_size + item_size_bytes <= batch_size_limit_bytes:
26
+ current_batch.append(item)
27
+ current_batch_size += item_size_bytes
28
+ else:
29
+ yield current_batch
30
+ current_batch, current_batch_size = [item], item_size_bytes
31
+
32
+ if current_batch:
33
+ yield current_batch
34
+
35
+
36
+ def flatten_dict(
37
+ dictionary: dict[str, Any],
38
+ parent_key: str = "",
39
+ separator: str = "_",
40
+ flatten_lists: bool = False,
41
+ remove_none: bool = False,
42
+ keys_to_omit: Optional[Sequence[str]] = None,
43
+ ) -> dict[str, Any]:
44
+ """Flattens a nested dictionary into a single level dictionary.
45
+
46
+ keys_to_omit is a list of keys that don't get flattened. If omitting a nested key, format as
47
+ {parent_key}{separator}{key}. If flatten_lists is True, then lists and tuples are flattened as
48
+ well. If remove_none is True, then None keys/values are removed from the flattened
49
+ dictionary.
50
+ """
51
+ keys_to_omit = keys_to_omit if keys_to_omit else []
52
+ flattened_dict: dict[str, Any] = {}
53
+ for key, value in dictionary.items():
54
+ new_key = f"{parent_key}{separator}{key}" if parent_key else key
55
+ if new_key in keys_to_omit:
56
+ flattened_dict[new_key] = value
57
+ elif value is None and remove_none:
58
+ continue
59
+ elif isinstance(value, dict):
60
+ value = cast("dict[str, Any]", value)
61
+ flattened_dict.update(
62
+ flatten_dict(
63
+ value, new_key, separator, flatten_lists, remove_none, keys_to_omit=keys_to_omit
64
+ ),
65
+ )
66
+ elif isinstance(value, (list, tuple)) and flatten_lists:
67
+ value = cast("list[Any] | tuple[Any]", value)
68
+ for index, item in enumerate(value):
69
+ flattened_dict.update(
70
+ flatten_dict(
71
+ {f"{new_key}{separator}{index}": item},
72
+ "",
73
+ separator,
74
+ flatten_lists,
75
+ remove_none,
76
+ keys_to_omit=keys_to_omit,
77
+ )
78
+ )
79
+ else:
80
+ flattened_dict[new_key] = value
81
+
82
+ return flattened_dict
83
+
84
+
85
+ def validate_date_args(date: Optional[str] = None) -> bool:
86
+ """Validate whether the provided date string satisfies any of the supported date formats.
87
+
88
+ Used by unstructured/ingest/connector/biomed.py
89
+
90
+ Returns `True` if the date string satisfies any of the supported formats, otherwise raises
91
+ `ValueError`.
92
+
93
+ Supported Date Formats:
94
+ - 'YYYY-MM-DD'
95
+ - 'YYYY-MM-DDTHH:MM:SS'
96
+ - 'YYYY-MM-DD+HH:MM:SS'
97
+ - 'YYYY-MM-DDTHH:MM:SS±HHMM'
98
+ """
99
+ if not date:
100
+ raise ValueError("The argument date is None.")
101
+
102
+ for format in DATE_FORMATS:
103
+ try:
104
+ datetime.strptime(date, format)
105
+ return True
106
+ except ValueError:
107
+ pass
108
+
109
+ raise ValueError(
110
+ f"The argument {date} does not satisfy the format:"
111
+ f" YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SS±HHMM",
112
+ )
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import importlib
5
+ from functools import wraps
6
+ from typing import (
7
+ Callable,
8
+ List,
9
+ Optional,
10
+ TypeVar,
11
+ )
12
+
13
+ from typing_extensions import ParamSpec
14
+
15
+ _T = TypeVar("_T")
16
+ _P = ParamSpec("_P")
17
+
18
+
19
+ def requires_dependencies(
20
+ dependencies: str | list[str],
21
+ extras: Optional[str] = None,
22
+ ) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
23
+ if isinstance(dependencies, str):
24
+ dependencies = [dependencies]
25
+
26
+ def decorator(func: Callable[_P, _T]) -> Callable[_P, _T]:
27
+ def run_check():
28
+ missing_deps: List[str] = []
29
+ for dep in dependencies:
30
+ if not dependency_exists(dep):
31
+ missing_deps.append(dep)
32
+ if len(missing_deps) > 0:
33
+ raise ImportError(
34
+ f"Following dependencies are missing: {', '.join(missing_deps)}. "
35
+ + (
36
+ f"""Please install them using `pip install "unstructured[{extras}]"`."""
37
+ if extras
38
+ else f"Please install them using `pip install {' '.join(missing_deps)}`."
39
+ ),
40
+ )
41
+
42
+ @wraps(func)
43
+ def wrapper(*args: _P.args, **kwargs: _P.kwargs):
44
+ run_check()
45
+ return func(*args, **kwargs)
46
+
47
+ @wraps(func)
48
+ async def wrapper_async(*args: _P.args, **kwargs: _P.kwargs):
49
+ run_check()
50
+ return await func(*args, **kwargs)
51
+
52
+ if asyncio.iscoroutinefunction(func):
53
+ return wrapper_async
54
+ return wrapper
55
+
56
+ return decorator
57
+
58
+
59
+ def dependency_exists(dependency: str):
60
+ try:
61
+ importlib.import_module(dependency)
62
+ except ImportError as e:
63
+ # Check to make sure this isn't some unrelated import error.
64
+ if dependency in repr(e):
65
+ return False
66
+ return True
@@ -0,0 +1,39 @@
1
+ import json
2
+ import typing as t
3
+ from datetime import datetime
4
+
5
+ from dateutil import parser
6
+
7
+
8
+ def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
9
+ """Helper function attempts to deserialize json string to a dictionary."""
10
+ try:
11
+ return json.loads(json_string)
12
+ except json.JSONDecodeError:
13
+ # Not neccessary an error if it is a path or malformed json
14
+ pass
15
+ try:
16
+ # This is common when single quotes are used instead of double quotes
17
+ return json.loads(json_string.replace("'", '"'))
18
+ except json.JSONDecodeError:
19
+ # Not neccessary an error if it is a path
20
+ pass
21
+ return json_string
22
+
23
+
24
+ def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
25
+ """
26
+ Ensures that the input value is converted to an ISO format datetime string.
27
+ Handles both datetime objects and strings.
28
+ """
29
+ if isinstance(timestamp, datetime):
30
+ return timestamp.isoformat()
31
+ elif isinstance(timestamp, str):
32
+ try:
33
+ # Parse the datetime string in various formats
34
+ dt = parser.parse(timestamp)
35
+ return dt.isoformat()
36
+ except ValueError as e:
37
+ raise ValueError(f"String '{timestamp}' could not be parsed as a datetime.") from e
38
+ else:
39
+ raise TypeError(f"Expected input type datetime or str, but got {type(timestamp)}.")